Index: vendor/clang/dist/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- vendor/clang/dist/lib/CodeGen/CGOpenMPRuntime.cpp (revision 294603) +++ vendor/clang/dist/lib/CodeGen/CGOpenMPRuntime.cpp (revision 294604) @@ -1,4258 +1,4260 @@ //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This provides a class for OpenMP runtime code generation. // //===----------------------------------------------------------------------===// #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include using namespace clang; using namespace CodeGen; namespace { /// \brief Base class for handling code generation inside OpenMP regions. class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { public: /// \brief Kinds of OpenMP regions used in codegen. enum CGOpenMPRegionKind { /// \brief Region with outlined function for standalone 'parallel' /// directive. ParallelOutlinedRegion, /// \brief Region with outlined function for standalone 'task' directive. TaskOutlinedRegion, /// \brief Region for constructs that do not require function outlining, /// like 'for', 'sections', 'atomic' etc. directives. InlinedRegion, /// \brief Region with outlined function for standalone 'target' directive. TargetRegion, }; CGOpenMPRegionInfo(const CapturedStmt &CS, const CGOpenMPRegionKind RegionKind, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel) : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel) : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. virtual const VarDecl *getThreadIDVariable() const = 0; /// \brief Emit the captured statement body. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; /// \brief Get an LValue for the current ThreadID variable. /// \return LValue for thread id variable. This LValue always has type int32*. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); CGOpenMPRegionKind getRegionKind() const { return RegionKind; } OpenMPDirectiveKind getDirectiveKind() const { return Kind; } bool hasCancel() const { return HasCancel; } static bool classof(const CGCapturedStmtInfo *Info) { return Info->getKind() == CR_OpenMP; } protected: CGOpenMPRegionKind RegionKind; RegionCodeGenTy CodeGen; OpenMPDirectiveKind Kind; bool HasCancel; }; /// \brief API for captured statement code generation in OpenMP constructs. class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { public: CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel) : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, HasCancel), ThreadIDVar(ThreadIDVar) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast(Info)->getRegionKind() == ParallelOutlinedRegion; } private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; }; /// \brief API for captured statement code generation in OpenMP constructs. class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { public: CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel) : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), ThreadIDVar(ThreadIDVar) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } /// \brief Get an LValue for the current ThreadID variable. LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast(Info)->getRegionKind() == TaskOutlinedRegion; } private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; }; /// \brief API for inlined captured statement code generation in OpenMP /// constructs. class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { public: CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel) : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), OldCSI(OldCSI), OuterRegionInfo(dyn_cast_or_null(OldCSI)) {} // \brief Retrieve the value of the context parameter. llvm::Value *getContextValue() const override { if (OuterRegionInfo) return OuterRegionInfo->getContextValue(); llvm_unreachable("No context value for inlined OpenMP region"); } void setContextValue(llvm::Value *V) override { if (OuterRegionInfo) { OuterRegionInfo->setContextValue(V); return; } llvm_unreachable("No context value for inlined OpenMP region"); } /// \brief Lookup the captured field decl for a variable. const FieldDecl *lookup(const VarDecl *VD) const override { if (OuterRegionInfo) return OuterRegionInfo->lookup(VD); // If there is no outer outlined region,no need to lookup in a list of // captured variables, we can use the original one. return nullptr; } FieldDecl *getThisFieldDecl() const override { if (OuterRegionInfo) return OuterRegionInfo->getThisFieldDecl(); return nullptr; } /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { if (OuterRegionInfo) return OuterRegionInfo->getThreadIDVariable(); return nullptr; } /// \brief Get the name of the capture helper. StringRef getHelperName() const override { if (auto *OuterRegionInfo = getOldCSI()) return OuterRegionInfo->getHelperName(); llvm_unreachable("No helper name for inlined OpenMP construct"); } CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast(Info)->getRegionKind() == InlinedRegion; } private: /// \brief CodeGen info about outer OpenMP region. CodeGenFunction::CGCapturedStmtInfo *OldCSI; CGOpenMPRegionInfo *OuterRegionInfo; }; /// \brief API for captured statement code generation in OpenMP target /// constructs. For this captures, implicit parameters are used instead of the /// captured fields. The name of the target region has to be unique in a given /// application so it is provided by the client, because only the client has /// the information to generate that. class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { public: CGOpenMPTargetRegionInfo(const CapturedStmt &CS, const RegionCodeGenTy &CodeGen, StringRef HelperName) : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, /*HasCancel=*/false), HelperName(HelperName) {} /// \brief This is unused for target regions because each starts executing /// with a single thread. const VarDecl *getThreadIDVariable() const override { return nullptr; } /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return HelperName; } static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast(Info)->getRegionKind() == TargetRegion; } private: StringRef HelperName; }; /// \brief RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; public: /// \brief Constructs region for combined constructs. /// \param CodeGen Code generation sequence for combined directives. Includes /// a list of functions used for code generation of implicitly inlined /// regions. InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel) : CGF(CGF) { // Start emission for the construct. CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); } ~InlinedOpenMPRegionRAII() { // Restore original CapturedStmtInfo only if we're done with code emission. auto *OldCSI = cast(CGF.CapturedStmtInfo)->getOldCSI(); delete CGF.CapturedStmtInfo; CGF.CapturedStmtInfo = OldCSI; } }; } // anonymous namespace static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr, QualType Ty) { AlignmentSource Source; CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source); return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align), Ty->getPointeeType(), Source); } LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return emitLoadOfPointerLValue(CGF, CGF.GetAddrOfLocalVar(getThreadIDVariable()), getThreadIDVariable()->getType()); } void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { if (!CGF.HaveInsertPoint()) return; // 1.2.2 OpenMP Language Terminology // Structured block - An executable statement with a single entry at the // top and a single exit at the bottom. // The point of exit cannot be a branch out of the structured block. // longjmp() and throw() must not violate the entry/exit criteria. CGF.EHStack.pushTerminate(); { CodeGenFunction::RunCleanupsScope Scope(CGF); CodeGen(CGF); } CGF.EHStack.popTerminate(); } LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( CodeGenFunction &CGF) { return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), getThreadIDVariable()->getType(), AlignmentSource::Decl); } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr), OffloadEntriesInfoManager(CGM) { IdentTy = llvm::StructType::create( "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, CGM.Int8PtrTy /* psource */, nullptr); // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), llvm::PointerType::getUnqual(CGM.Int32Ty)}; Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); loadOffloadInfoMetadata(); } void CGOpenMPRuntime::clear() { InternalVars.clear(); } // Layout information for ident_t. static CharUnits getIdentAlign(CodeGenModule &CGM) { return CGM.getPointerAlign(); } static CharUnits getIdentSize(CodeGenModule &CGM) { assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); return CharUnits::fromQuantity(16) + CGM.getPointerSize(); } static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) { // All the fields except the last are i32, so this works beautifully. return unsigned(Field) * CharUnits::fromQuantity(4); } static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, CGOpenMPRuntime::IdentFieldIndex Field, const llvm::Twine &Name = "") { auto Offset = getOffsetOfIdentField(Field); return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); } llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { assert(ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 *"); const CapturedStmt *CS = cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); bool HasCancel = false; if (auto *OPD = dyn_cast(&D)) HasCancel = OPD->hasCancel(); else if (auto *OPSD = dyn_cast(&D)) HasCancel = OPSD->hasCancel(); else if (auto *OPFD = dyn_cast(&D)) HasCancel = OPFD->hasCancel(); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); return CGF.GenerateOpenMPCapturedStmtFunction(*CS); } llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); auto *CS = cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, cast(D).hasCancel()); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); return CGF.GenerateCapturedStmtFunction(*CS); } Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { CharUnits Align = getIdentAlign(CGM); llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); if (!Entry) { if (!DefaultOpenMPPSource) { // Initialize default location for psource field of ident_t structure of // all ident_t objects. Format is ";file;function;line;column;;". // Taken from // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c DefaultOpenMPPSource = CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); DefaultOpenMPPSource = llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } auto DefaultOpenMPLocation = new llvm::GlobalVariable( CGM.getModule(), IdentTy, /*isConstant*/ true, llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); DefaultOpenMPLocation->setUnnamedAddr(true); DefaultOpenMPLocation->setAlignment(Align.getQuantity()); llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); llvm::Constant *Values[] = {Zero, llvm::ConstantInt::get(CGM.Int32Ty, Flags), Zero, Zero, DefaultOpenMPPSource}; llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); DefaultOpenMPLocation->setInitializer(Init); OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; } return Address(Entry, Align); } llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, OpenMPLocationFlags Flags) { // If no debug info is generated - return global default location. if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || Loc.isInvalid()) return getOrCreateDefaultLocation(Flags).getPointer(); assert(CGF.CurFn && "No function in current CodeGenFunction."); Address LocValue = Address::invalid(); auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); if (I != OpenMPLocThreadIDMap.end()) LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if // GetOpenMPThreadID was called before this routine. if (!LocValue.isValid()) { // Generate "ident_t .kmpc_loc.addr;" Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), ".kmpc_loc.addr"); auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.DebugLoc = AI.getPointer(); LocValue = AI; CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), CGM.getSize(getIdentSize(CGF.CGM))); } // char **psource = &.kmpc_loc_.addr.psource; Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); if (OMPDebugLoc == nullptr) { SmallString<128> Buffer2; llvm::raw_svector_ostream OS2(Buffer2); // Build debug location PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); OS2 << ";" << PLoc.getFilename() << ";"; if (const FunctionDecl *FD = dyn_cast_or_null(CGF.CurFuncDecl)) { OS2 << FD->getQualifiedNameAsString(); } OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; } // *psource = ";;;;;;"; CGF.Builder.CreateStore(OMPDebugLoc, PSource); // Our callers always pass this to a runtime function, so for // convenience, go ahead and return a naked pointer. return LocValue.getPointer(); } llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, SourceLocation Loc) { assert(CGF.CurFn && "No function in current CodeGenFunction."); llvm::Value *ThreadID = nullptr; // Check whether we've already cached a load of the thread id in this // function. auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); if (I != OpenMPLocThreadIDMap.end()) { ThreadID = I->second.ThreadID; if (ThreadID != nullptr) return ThreadID; } if (auto OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) { if (OMPRegionInfo->getThreadIDVariable()) { // Check if this an outlined function with thread id passed as argument. auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); // If value loaded in entry block, cache it and use it everywhere in // function. if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = ThreadID; } return ThreadID; } } // This is not an outlined function region - need to call __kmpc_int32 // kmpc_global_thread_num(ident_t *loc). // Generate thread id value and cache this value for use across the // function. CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); ThreadID = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), emitUpdateLocation(CGF, Loc)); auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = ThreadID; return ThreadID; } void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { assert(CGF.CurFn && "No function in current CodeGenFunction."); if (OpenMPLocThreadIDMap.count(CGF.CurFn)) OpenMPLocThreadIDMap.erase(CGF.CurFn); } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { return llvm::PointerType::getUnqual(IdentTy); } llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { return llvm::PointerType::getUnqual(Kmpc_MicroTy); } llvm::Constant * CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { llvm::Constant *RTLFn = nullptr; switch (Function) { case OMPRTL__kmpc_fork_call: { // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro // microtask, ...); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, getKmpc_MicroPointerTy()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); break; } case OMPRTL__kmpc_global_thread_num: { // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); break; } case OMPRTL__kmpc_threadprivate_cached: { // Build void *__kmpc_threadprivate_cached(ident_t *loc, // kmp_int32 global_tid, void *data, size_t size, void ***cache); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.SizeTy, CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); break; } case OMPRTL__kmpc_critical: { // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); break; } case OMPRTL__kmpc_critical_with_hint: { // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit, uintptr_t hint); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy), CGM.IntPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); break; } case OMPRTL__kmpc_threadprivate_register: { // Build void __kmpc_threadprivate_register(ident_t *, void *data, // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); // typedef void *(*kmpc_ctor)(void *); auto KmpcCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, /*isVarArg*/ false)->getPointerTo(); // typedef void *(*kmpc_cctor)(void *, void *); llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; auto KmpcCopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, /*isVarArg*/ false)->getPointerTo(); // typedef void (*kmpc_dtor)(void *); auto KmpcDtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) ->getPointerTo(); llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, KmpcCopyCtorTy, KmpcDtorTy}; auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); break; } case OMPRTL__kmpc_end_critical: { // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); break; } case OMPRTL__kmpc_cancel_barrier: { // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); break; } case OMPRTL__kmpc_barrier: { // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); break; } case OMPRTL__kmpc_for_static_fini: { // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); break; } case OMPRTL__kmpc_push_num_threads: { // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, // kmp_int32 num_threads) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); break; } case OMPRTL__kmpc_serialized_parallel: { // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); break; } case OMPRTL__kmpc_end_serialized_parallel: { // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); break; } case OMPRTL__kmpc_flush: { // Build void __kmpc_flush(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); break; } case OMPRTL__kmpc_master: { // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); break; } case OMPRTL__kmpc_end_master: { // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); break; } case OMPRTL__kmpc_omp_taskyield: { // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, // int end_part); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); break; } case OMPRTL__kmpc_single: { // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); break; } case OMPRTL__kmpc_end_single: { // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); break; } case OMPRTL__kmpc_omp_task_alloc: { // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); assert(KmpRoutineEntryPtrTy != nullptr && "Type kmp_routine_entry_t must be created."); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; // Return void * and then cast to particular kmp_task_t type. llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); break; } case OMPRTL__kmpc_omp_task: { // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); break; } case OMPRTL__kmpc_copyprivate: { // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), // kmp_int32 didit); llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; auto *CpyFnTy = llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, CpyFnTy->getPointerTo(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); break; } case OMPRTL__kmpc_reduce: { // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, /*isVarArg=*/false); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); break; } case OMPRTL__kmpc_reduce_nowait: { // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name // *lck); llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, /*isVarArg=*/false); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); break; } case OMPRTL__kmpc_end_reduce: { // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *lck); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); break; } case OMPRTL__kmpc_end_reduce_nowait: { // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *lck); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); break; } case OMPRTL__kmpc_omp_task_begin_if0: { // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); break; } case OMPRTL__kmpc_omp_task_complete_if0: { // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_complete_if0"); break; } case OMPRTL__kmpc_ordered: { // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); break; } case OMPRTL__kmpc_end_ordered: { // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); break; } case OMPRTL__kmpc_omp_taskwait: { // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); break; } case OMPRTL__kmpc_taskgroup: { // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); break; } case OMPRTL__kmpc_end_taskgroup: { // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); break; } case OMPRTL__kmpc_push_proc_bind: { // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, // int proc_bind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); break; } case OMPRTL__kmpc_omp_task_with_deps: { // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); break; } case OMPRTL__kmpc_omp_wait_deps: { // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, // kmp_depend_info_t *noalias_dep_list); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); break; } case OMPRTL__kmpc_cancellationpoint: { // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 // global_tid, kmp_int32 cncl_kind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); break; } case OMPRTL__kmpc_cancel: { // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, // kmp_int32 cncl_kind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); break; } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t // *arg_types); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int32Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); break; } case OMPRTL__tgt_unregister_lib: { // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); QualType ParamTy = CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); break; } } return RTLFn; } static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) { auto &C = CGF.getContext(); llvm::Value *Size = nullptr; auto SizeInChars = C.getTypeSizeInChars(Ty); if (SizeInChars.isZero()) { // getTypeSizeInChars() returns 0 for a VLA. while (auto *VAT = C.getAsVariableArrayType(Ty)) { llvm::Value *ArraySize; std::tie(ArraySize, Ty) = CGF.getVLASize(VAT); Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize; } SizeInChars = C.getTypeSizeInChars(Ty); assert(!SizeInChars.isZero()); Size = CGF.Builder.CreateNUWMul( Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity())); } else Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()); return Size; } llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" : "__kmpc_for_static_init_4u") : (IVSigned ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_8u"); auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; auto PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid CGM.Int32Ty, // schedtype llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter PtrTy, // p_lower PtrTy, // p_upper PtrTy, // p_stride ITy, // incr ITy // chunk }; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid CGM.Int32Ty, // schedtype ITy, // lower ITy, // upper ITy, // stride ITy // chunk }; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid }; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FnTy, Name); } llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; auto PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter PtrTy, // p_lower PtrTy, // p_upper PtrTy // p_stride }; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } llvm::Constant * CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { assert(!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()); // Lookup the entry, lazily creating it if necessary. return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)) + ".cache."); } Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) { if (CGM.getLangOpts().OpenMPUseTLS && CGM.getContext().getTargetInfo().isTLSSupported()) return VDAddr; auto VarTy = VDAddr.getElementType(); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), getOrCreateThreadPrivateCache(VD)}; return Address(CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), VDAddr.getAlignment()); } void CGOpenMPRuntime::emitThreadPrivateVarInit( CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime // library. auto OMPLoc = emitUpdateLocation(CGF, Loc); CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), OMPLoc); // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) // to register constructor/destructor for variable. llvm::Value *Args[] = {OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), Ctor, CopyCtor, Dtor}; CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); } llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF) { if (CGM.getLangOpts().OpenMPUseTLS && CGM.getContext().getTargetInfo().isTLSSupported()) return nullptr; VD = VD->getDefinition(CGM.getContext()); if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { ThreadPrivateWithDefinition.insert(VD); QualType ASTTy = VD->getType(); llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; auto Init = VD->getAnyInitializer(); if (CGM.getLangOpts().CPlusPlus && PerformInit) { // Generate function that re-emits the declaration's initializer into the // threadprivate copy of the variable VD CodeGenFunction CtorCGF(CGM); FunctionArgList Args; ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, CGM.getContext().VoidPtrTy); Args.push_back(&Dst); auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), /*isVariadic=*/false); auto FTy = CGM.getTypes().GetFunctionType(FI); auto Fn = CGM.CreateGlobalInitOrDestructFunction( FTy, ".__kmpc_global_ctor_.", FI, Loc); CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, Args, SourceLocation()); auto ArgVal = CtorCGF.EmitLoadOfScalar( CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); Address Arg = Address(ArgVal, VDAddr.getAlignment()); Arg = CtorCGF.Builder.CreateElementBitCast(Arg, CtorCGF.ConvertTypeForMem(ASTTy)); CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), /*IsInitializer=*/true); ArgVal = CtorCGF.EmitLoadOfScalar( CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); CtorCGF.FinishFunction(); Ctor = Fn; } if (VD->getType().isDestructedType() != QualType::DK_none) { // Generate function that emits destructor call for the threadprivate copy // of the variable VD CodeGenFunction DtorCGF(CGM); FunctionArgList Args; ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, CGM.getContext().VoidPtrTy); Args.push_back(&Dst); auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), /*isVariadic=*/false); auto FTy = CGM.getTypes().GetFunctionType(FI); auto Fn = CGM.CreateGlobalInitOrDestructFunction( FTy, ".__kmpc_global_dtor_.", FI, Loc); DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, SourceLocation()); auto ArgVal = DtorCGF.EmitLoadOfScalar( DtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); DtorCGF.FinishFunction(); Dtor = Fn; } // Do not emit init function if it is not required. if (!Ctor && !Dtor) return nullptr; llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; auto CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, /*isVarArg=*/false)->getPointerTo(); // Copying constructor for the threadprivate variable. // Must be NULL - reserved by runtime, but currently it requires that this // parameter is always NULL. Otherwise it fires assertion. CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); if (Ctor == nullptr) { auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, /*isVarArg=*/false)->getPointerTo(); Ctor = llvm::Constant::getNullValue(CtorTy); } if (Dtor == nullptr) { auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg=*/false)->getPointerTo(); Dtor = llvm::Constant::getNullValue(DtorTy); } if (!CGF) { auto InitFunctionTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( InitFunctionTy, ".__omp_threadprivate_init_.", CGM.getTypes().arrangeNullaryFunction()); CodeGenFunction InitCGF(CGM); FunctionArgList ArgList; InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, CGM.getTypes().arrangeNullaryFunction(), ArgList, Loc); emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); InitCGF.FinishFunction(); return InitFunction; } emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); } return nullptr; } /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { /// ThenGen(); /// } else { /// ElseGen(); /// } static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen) { CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. bool CondConstant; if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { CodeGenFunction::RunCleanupsScope Scope(CGF); if (CondConstant) { ThenGen(CGF); } else { ElseGen(CGF); } return; } // Otherwise, the condition did not fold, or we couldn't elide it. Just // emit the conditional branch. auto ThenBlock = CGF.createBasicBlock("omp_if.then"); auto ElseBlock = CGF.createBasicBlock("omp_if.else"); auto ContBlock = CGF.createBasicBlock("omp_if.end"); CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); // Emit the 'then' code. CGF.EmitBlock(ThenBlock); { CodeGenFunction::RunCleanupsScope ThenScope(CGF); ThenGen(CGF); } CGF.EmitBranch(ContBlock); // Emit the 'else' code if present. { // There is no need to emit line number for unconditional branch. auto NL = ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(ElseBlock); } { CodeGenFunction::RunCleanupsScope ThenScope(CGF); ElseGen(CGF); } { // There is no need to emit line number for unconditional branch. auto NL = ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBranch(ContBlock); } // Emit the continuation block for code after the if. CGF.EmitBlock(ContBlock, /*IsFinished=*/true); } void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef CapturedVars, const Expr *IfCond) { if (!CGF.HaveInsertPoint()) return; auto *RTLoc = emitUpdateLocation(CGF, Loc); auto &&ThenGen = [this, OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF) { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); llvm::Value *Args[] = { RTLoc, CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; llvm::SmallVector RealArgs; RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF) { auto ThreadID = getThreadID(CGF, Loc); // Build calls: // __kmpc_serialized_parallel(&Loc, GTid); llvm::Value *Args[] = {RTLoc, ThreadID}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); // OutlinedFn(>id, &zero, CapturedStruct); auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); Address ZeroAddr = CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); }; if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); } else { CodeGenFunction::RunCleanupsScope Scope(CGF); ThenGen(CGF); } } // If we're inside an (outlined) parallel region, use the region info's // thread-ID variable (it is passed in a first argument of the outlined function // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in // regular serial code region, get thread ID by calling kmp_int32 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and // return the address of that temp. Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc) { if (auto OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) if (OMPRegionInfo->getThreadIDVariable()) return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); auto ThreadID = getThreadID(CGF, Loc); auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); CGF.EmitStoreOfScalar(ThreadID, CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); return ThreadIDTemp; } llvm::Constant * CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, const llvm::Twine &Name) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << Name; auto RuntimeName = Out.str(); auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; if (Elem.second) { assert(Elem.second->getType()->getPointerElementType() == Ty && "OMP internal variable has different type than requested"); return &*Elem.second; } return Elem.second = new llvm::GlobalVariable( CGM.getModule(), Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), Elem.first()); } llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { llvm::Twine Name(".gomp_critical_user_", CriticalName); return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); } namespace { template class CallEndCleanup final : public EHScopeStack::Cleanup { llvm::Value *Callee; llvm::Value *Args[N]; public: CallEndCleanup(llvm::Value *Callee, ArrayRef CleanupArgs) : Callee(Callee) { assert(CleanupArgs.size() == N); std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); } void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { if (!CGF.HaveInsertPoint()) return; CGF.EmitRuntimeCall(Callee, Args); } }; } // anonymous namespace void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint) { // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); // CriticalOpGen(); // __kmpc_end_critical(ident_t *, gtid, Lock); // Prepare arguments and build a call to __kmpc_critical if (!CGF.HaveInsertPoint()) return; CodeGenFunction::RunCleanupsScope Scope(CGF); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), getCriticalRegionLock(CriticalName)}; if (Hint) { llvm::SmallVector ArgsWithHint(std::begin(Args), std::end(Args)); auto *HintVal = CGF.EmitScalarExpr(Hint); ArgsWithHint.push_back( CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false)); CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint), ArgsWithHint); } else CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); // Build a call to __kmpc_end_critical CGF.EHStack.pushCleanup::value>>( NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), llvm::makeArrayRef(Args)); emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); } static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, OpenMPDirectiveKind Kind, SourceLocation Loc, const RegionCodeGenTy &BodyOpGen) { llvm::Value *CallBool = CGF.EmitScalarConversion( IfCond, CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), CGF.getContext().BoolTy, Loc); auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); auto *ContBlock = CGF.createBasicBlock("omp_if.end"); // Generate the branch (If-stmt) CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); CGF.EmitBlock(ThenBlock); CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); // Emit the rest of bblocks/branches CGF.EmitBranch(ContBlock); CGF.EmitBlock(ContBlock, true); } void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // if(__kmpc_master(ident_t *, gtid)) { // MasterOpGen(); // __kmpc_end_master(ident_t *, gtid); // } // Prepare arguments and build a call to __kmpc_master llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; auto *IsMaster = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); typedef CallEndCleanup::value> MasterCallEndCleanup; emitIfStmt( CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { CodeGenFunction::RunCleanupsScope Scope(CGF); CGF.EHStack.pushCleanup( NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), llvm::makeArrayRef(Args)); MasterOpGen(CGF); }); } void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_omp_taskyield(loc, thread_id, 0); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); } void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // __kmpc_taskgroup(ident_t *, gtid); // TaskgroupOpGen(); // __kmpc_end_taskgroup(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_taskgroup { CodeGenFunction::RunCleanupsScope Scope(CGF); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); // Build a call to __kmpc_end_taskgroup CGF.EHStack.pushCleanup::value>>( NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), llvm::makeArrayRef(Args)); emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); } } /// Given an array of pointers to variables, project the address of a /// given variable. static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var) { // Pull out the pointer to the variable. Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); Addr = CGF.Builder.CreateElementBitCast( Addr, CGF.ConvertTypeForMem(Var->getType())); return Addr; } static llvm::Value *emitCopyprivateCopyFunction( CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef CopyprivateVars, ArrayRef DestExprs, ArrayRef SrcExprs, ArrayRef AssignmentOps) { auto &C = CGM.getContext(); // void copy_func(void *LHSArg, void *RHSArg); FunctionArgList Args; ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, C.VoidPtrTy); ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, C.VoidPtrTy); Args.push_back(&LHSArg); Args.push_back(&RHSArg); FunctionType::ExtInfo EI; auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( C.VoidTy, Args, EI, /*isVariadic=*/false); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, ".omp.copyprivate.copy_func", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); // Dest = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), CGF.getPointerAlign()); Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), CGF.getPointerAlign()); // *(Type0*)Dst[0] = *(Type0*)Src[0]; // *(Type1*)Dst[1] = *(Type1*)Src[1]; // ... // *(Typen*)Dst[n] = *(Typen*)Src[n]; for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { auto DestVar = cast(cast(DestExprs[I])->getDecl()); Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); auto SrcVar = cast(cast(SrcExprs[I])->getDecl()); Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); auto *VD = cast(CopyprivateVars[I])->getDecl(); QualType Type = VD->getType(); CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); } CGF.FinishFunction(); return Fn; } void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef CopyprivateVars, ArrayRef SrcExprs, ArrayRef DstExprs, ArrayRef AssignmentOps) { if (!CGF.HaveInsertPoint()) return; assert(CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()); auto &C = CGM.getContext(); // int32 did_it = 0; // if(__kmpc_single(ident_t *, gtid)) { // SingleOpGen(); // __kmpc_end_single(ident_t *, gtid); // did_it = 1; // } // call __kmpc_copyprivate(ident_t *, gtid, , , // , did_it); Address DidIt = Address::invalid(); if (!CopyprivateVars.empty()) { // int32 did_it = 0; auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); } // Prepare arguments and build a call to __kmpc_single llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; auto *IsSingle = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); typedef CallEndCleanup::value> SingleCallEndCleanup; emitIfStmt( CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { CodeGenFunction::RunCleanupsScope Scope(CGF); CGF.EHStack.pushCleanup( NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), llvm::makeArrayRef(Args)); SingleOpGen(CGF); if (DidIt.isValid()) { // did_it = 1; CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); } }); // call __kmpc_copyprivate(ident_t *, gtid, , , // , did_it); if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); auto CopyprivateArrayTy = C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. Address CopyprivateList = CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { Address Elem = CGF.Builder.CreateConstArrayGEP( CopyprivateList, I, CGF.getPointerSize()); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), Elem); } // Build function that copies private values from single region to all other // threads in the corresponding parallel region. auto *CpyFn = emitCopyprivateCopyFunction( CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy); Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, CGF.VoidPtrTy); auto *DidItVal = CGF.Builder.CreateLoad(DidIt); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), // ident_t * getThreadID(CGF, Loc), // i32 BufSize, // size_t CL.getPointer(), // void * CpyFn, // void (*) (void *, void *) DidItVal // i32 did_it }; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); } } void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) { if (!CGF.HaveInsertPoint()) return; // __kmpc_ordered(ident_t *, gtid); // OrderedOpGen(); // __kmpc_end_ordered(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_ordered CodeGenFunction::RunCleanupsScope Scope(CGF); if (IsThreads) { llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); // Build a call to __kmpc_end_ordered CGF.EHStack.pushCleanup::value>>( NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), llvm::makeArrayRef(Args)); } emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, bool ForceSimpleCall) { if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_cancel_barrier(loc, thread_id); // Build call __kmpc_barrier(loc, thread_id); OpenMPLocationFlags Flags = OMP_IDENT_KMPC; if (Kind == OMPD_for) { Flags = static_cast(Flags | OMP_IDENT_BARRIER_IMPL_FOR); } else if (Kind == OMPD_sections) { Flags = static_cast(Flags | OMP_IDENT_BARRIER_IMPL_SECTIONS); } else if (Kind == OMPD_single) { Flags = static_cast(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); } else if (Kind == OMPD_barrier) { Flags = static_cast(Flags | OMP_IDENT_BARRIER_EXPL); } else { Flags = static_cast(Flags | OMP_IDENT_BARRIER_IMPL); } // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, // thread_id); auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo); // Do not emit barrier call in the single directive emitted in some rare cases // for sections directives. if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single) return; llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; if (OMPRegionInfo) { if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { auto *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); if (EmitChecks) { // if (__kmpc_cancel_barrier()) { // exit from construct; // } auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); auto *ContBB = CGF.createBasicBlock(".cancel.continue"); auto *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // exit from construct; auto CancelDestination = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDestination); CGF.EmitBlock(ContBB, /*IsFinished=*/true); } return; } } CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); } /// \brief Schedule types for 'omp for' loops (these enumerators are taken from /// the enum sched_type in kmp.h). enum OpenMPSchedType { /// \brief Lower bound for default (unordered) versions. OMP_sch_lower = 32, OMP_sch_static_chunked = 33, OMP_sch_static = 34, OMP_sch_dynamic_chunked = 35, OMP_sch_guided_chunked = 36, OMP_sch_runtime = 37, OMP_sch_auto = 38, /// \brief Lower bound for 'ordered' versions. OMP_ord_lower = 64, OMP_ord_static_chunked = 65, OMP_ord_static = 66, OMP_ord_dynamic_chunked = 67, OMP_ord_guided_chunked = 68, OMP_ord_runtime = 69, OMP_ord_auto = 70, OMP_sch_default = OMP_sch_static, }; /// \brief Map the OpenMP loop schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered) { switch (ScheduleKind) { case OMPC_SCHEDULE_static: return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) : (Ordered ? OMP_ord_static : OMP_sch_static); case OMPC_SCHEDULE_dynamic: return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; case OMPC_SCHEDULE_guided: return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; case OMPC_SCHEDULE_runtime: return Ordered ? OMP_ord_runtime : OMP_sch_runtime; case OMPC_SCHEDULE_auto: return Ordered ? OMP_ord_auto : OMP_sch_auto; case OMPC_SCHEDULE_unknown: assert(!Chunked && "chunk was specified but schedule kind not known"); return Ordered ? OMP_ord_static : OMP_sch_static; } llvm_unreachable("Unexpected runtime schedule"); } bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const { auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); return Schedule == OMP_sch_static; } bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { auto Schedule = getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); return Schedule != OMP_sch_static; } void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPScheduleClauseKind ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, llvm::Value *UB, llvm::Value *Chunk) { if (!CGF.HaveInsertPoint()) return; OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); assert(Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); // Call __kmpc_dispatch_init( // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, // kmp_int[32|64] lower, kmp_int[32|64] upper, // kmp_int[32|64] stride, kmp_int[32|64] chunk); // If the Chunk was not specified in the clause - use default value 1. if (Chunk == nullptr) Chunk = CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), CGF.Builder.getInt32(Schedule), // Schedule type CGF.Builder.getIntN(IVSize, 0), // Lower UB, // Upper CGF.Builder.getIntN(IVSize, 1), // Stride Chunk // Chunk }; CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); } void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPScheduleClauseKind ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, Address IL, Address LB, Address UB, Address ST, llvm::Value *Chunk) { if (!CGF.HaveInsertPoint()) return; OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); assert(!Ordered); assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked); // Call __kmpc_for_static_init( // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, // kmp_int[32|64] incr, kmp_int[32|64] chunk); if (Chunk == nullptr) { assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && "expected static non-chunked schedule"); // If the Chunk was not specified in the clause - use default value 1. Chunk = CGF.Builder.getIntN(IVSize, 1); } else { assert((Schedule == OMP_sch_static_chunked || Schedule == OMP_ord_static_chunked) && "expected static chunked schedule"); } llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), CGF.Builder.getInt32(Schedule), // Schedule type IL.getPointer(), // &isLastIter LB.getPointer(), // &LB UB.getPointer(), // &UB ST.getPointer(), // &Stride CGF.Builder.getIntN(IVSize, 1), // Incr Chunk // Chunk }; CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); } void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), Args); } void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) { if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); } llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) { // Call __kmpc_dispatch_next( // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, // kmp_int[32|64] *p_stride); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), IL.getPointer(), // &isLastIter LB.getPointer(), // &Lower UB.getPointer(), // &Upper ST.getPointer() // &Stride }; llvm::Value *Call = CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); return CGF.EmitScalarConversion( Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), CGF.getContext().BoolTy, Loc); } void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), Args); } void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Constants for proc bind value accepted by the runtime. enum ProcBindTy { ProcBindFalse = 0, ProcBindTrue, ProcBindMaster, ProcBindClose, ProcBindSpread, ProcBindIntel, ProcBindDefault } RuntimeProcBind; switch (ProcBind) { case OMPC_PROC_BIND_master: RuntimeProcBind = ProcBindMaster; break; case OMPC_PROC_BIND_close: RuntimeProcBind = ProcBindClose; break; case OMPC_PROC_BIND_spread: RuntimeProcBind = ProcBindSpread; break; case OMPC_PROC_BIND_unknown: llvm_unreachable("Unsupported proc_bind value."); } // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); } void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Build call void __kmpc_flush(ident_t *loc) CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), emitUpdateLocation(CGF, Loc)); } namespace { /// \brief Indexes of fields for type kmp_task_t. enum KmpTaskTFields { /// \brief List of shared variables. KmpTaskTShareds, /// \brief Task routine. KmpTaskTRoutine, /// \brief Partition id for the untied tasks. KmpTaskTPartId, /// \brief Function with call of destructors for private variables. KmpTaskTDestructors, }; } // anonymous namespace bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { // FIXME: Add other entries type when they become supported. return OffloadEntriesTargetRegion.empty(); } /// \brief Initialize target region entry. void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, unsigned ColNum, unsigned Order) { assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " "only required for the device " "code generation."); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); ++OffloadingEntriesNum; } void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, unsigned ColNum, llvm::Constant *Addr, llvm::Constant *ID) { // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (CGM.getLangOpts().OpenMPIsDevice) { assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, ColNum) && "Entry must exist."); auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName] [LineNum][ColNum]; assert(Entry.isValid() && "Entry not initialized!"); Entry.setAddress(Addr); Entry.setID(ID); return; } else { OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = Entry; } } bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, unsigned ColNum) const { auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); if (PerDevice == OffloadEntriesTargetRegion.end()) return false; auto PerFile = PerDevice->second.find(FileID); if (PerFile == PerDevice->second.end()) return false; auto PerParentName = PerFile->second.find(ParentName); if (PerParentName == PerFile->second.end()) return false; auto PerLine = PerParentName->second.find(LineNum); if (PerLine == PerParentName->second.end()) return false; auto PerColumn = PerLine->second.find(ColNum); if (PerColumn == PerLine->second.end()) return false; // Fail if this entry is already registered. if (PerColumn->second.getAddress() || PerColumn->second.getID()) return false; return true; } void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( const OffloadTargetRegionEntryInfoActTy &Action) { // Scan all target region entries and perform the provided action. for (auto &D : OffloadEntriesTargetRegion) for (auto &F : D.second) for (auto &P : F.second) for (auto &L : P.second) for (auto &C : L.second) Action(D.first, F.first, P.first(), L.first, C.first, C.second); } /// \brief Create a Ctor/Dtor-like function whose body is emitted through /// \a Codegen. This is used to emit the two functions that register and /// unregister the descriptor of the current compilation unit. static llvm::Function * createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, const RegionCodeGenTy &Codegen) { auto &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, C.VoidPtrTy); Args.push_back(&DummyPtr); CodeGenFunction CGF(CGM); GlobalDecl(); auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( C.VoidTy, Args, FunctionType::ExtInfo(), /*isVariadic=*/false); auto FTy = CGM.getTypes().GetFunctionType(FI); auto *Fn = CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); Codegen(CGF); CGF.FinishFunction(); return Fn; } llvm::Function * CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { // If we don't have entries or if we are emitting code for the device, we // don't need to do anything. if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) return nullptr; auto &M = CGM.getModule(); auto &C = CGM.getContext(); // Get list of devices we care about auto &Devices = CGM.getLangOpts().OMPTargetTriples; // We should be creating an offloading descriptor only if there are devices // specified. assert(!Devices.empty() && "No OpenMP offloading devices??"); // Create the external variables that will point to the begin and end of the // host entries section. These will be defined by the linker. auto *OffloadEntryTy = CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( M, OffloadEntryTy, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, ".omp_offloading.entries_begin"); llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( M, OffloadEntryTy, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, ".omp_offloading.entries_end"); // Create all device images llvm::SmallVector DeviceImagesEntires; auto *DeviceImageTy = cast( CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); for (unsigned i = 0; i < Devices.size(); ++i) { StringRef T = Devices[i].getTriple(); auto *ImgBegin = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, Twine(".omp_offloading.img_start.") + Twine(T)); auto *ImgEnd = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, Twine(".omp_offloading.img_end.") + Twine(T)); llvm::Constant *Dev = llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd, HostEntriesBegin, HostEntriesEnd, nullptr); DeviceImagesEntires.push_back(Dev); } // Create device images global array. llvm::ArrayType *DeviceImagesInitTy = llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size()); llvm::Constant *DeviceImagesInit = llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires); llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable( M, DeviceImagesInitTy, /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, DeviceImagesInit, ".omp_offloading.device_images"); DeviceImages->setUnnamedAddr(true); // This is a Zero array to be used in the creation of the constant expressions llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), llvm::Constant::getNullValue(CGM.Int32Ty)}; // Create the target region descriptor. auto *BinaryDescriptorTy = cast( CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get( BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages, Index), HostEntriesBegin, HostEntriesEnd, nullptr); auto *Desc = new llvm::GlobalVariable( M, BinaryDescriptorTy, /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit, ".omp_offloading.descriptor"); // Emit code to register or unregister the descriptor at execution // startup or closing, respectively. // Create a variable to drive the registration and unregistration of the // descriptor, so we can reuse the logic that emits Ctors and Dtors. auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), IdentInfo, C.CharTy); auto *UnRegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), Desc); }); auto *RegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); }); return RegFn; } void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name, uint64_t Size) { auto *TgtOffloadEntryType = cast( CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); llvm::LLVMContext &C = CGM.getModule().getContext(); llvm::Module &M = CGM.getModule(); // Make sure the address has the right type. llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy); // Create constant string with the name. llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); llvm::GlobalVariable *Str = new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, StrPtrInit, ".omp_offloading.entry_name"); Str->setUnnamedAddr(true); llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); // Create the entry struct. llvm::Constant *EntryInit = llvm::ConstantStruct::get( TgtOffloadEntryType, AddrPtr, StrPtr, llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr); llvm::GlobalVariable *Entry = new llvm::GlobalVariable( M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage, EntryInit, ".omp_offloading.entry"); // The entry has to be created in the section the linker expects it to be. Entry->setSection(".omp_offloading.entries"); // We can't have any padding between symbols, so we need to have 1-byte // alignment. Entry->setAlignment(1); return; } void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Emit the offloading entries and metadata so that the device codegen side // can // easily figure out what to emit. The produced metadata looks like this: // // !omp_offload.info = !{!1, ...} // // Right now we only generate metadata for function that contain target // regions. // If we do not have entries, we dont need to do anything. if (OffloadEntriesInfoManager.empty()) return; llvm::Module &M = CGM.getModule(); llvm::LLVMContext &C = M.getContext(); SmallVector OrderedEntries(OffloadEntriesInfoManager.size()); // Create the offloading info metadata node. llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); // Auxiliar methods to create metadata values and strings. auto getMDInt = [&](unsigned v) { return llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); }; auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; // Create function that emits metadata for each target region entry; auto &&TargetRegionMetadataEmitter = [&]( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, unsigned Column, OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { llvm::SmallVector Ops; // Generate metadata for target regions. Each entry of this metadata // contains: // - Entry 0 -> Kind of this type of metadata (0). // - Entry 1 -> Device ID of the file where the entry was identified. // - Entry 2 -> File ID of the file where the entry was identified. // - Entry 3 -> Mangled name of the function where the entry was identified. // - Entry 4 -> Line in the file where the entry was identified. // - Entry 5 -> Column in the file where the entry was identified. // - Entry 6 -> Order the entry was created. // The first element of the metadata node is the kind. Ops.push_back(getMDInt(E.getKind())); Ops.push_back(getMDInt(DeviceID)); Ops.push_back(getMDInt(FileID)); Ops.push_back(getMDString(ParentName)); Ops.push_back(getMDInt(Line)); Ops.push_back(getMDInt(Column)); Ops.push_back(getMDInt(E.getOrder())); // Save this entry in the right position of the ordered entries array. OrderedEntries[E.getOrder()] = &E; // Add metadata to the named metadata node. MD->addOperand(llvm::MDNode::get(C, Ops)); }; OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( TargetRegionMetadataEmitter); for (auto *E : OrderedEntries) { assert(E && "All ordered entries must exist!"); if (auto *CE = dyn_cast( E)) { assert(CE->getID() && CE->getAddress() && "Entry ID and Addr are invalid!"); createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0); } else llvm_unreachable("Unsupported entry kind."); } } /// \brief Loads all the offload entries information from the host IR /// metadata. void CGOpenMPRuntime::loadOffloadInfoMetadata() { // If we are in target mode, load the metadata from the host IR. This code has // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). if (!CGM.getLangOpts().OpenMPIsDevice) return; if (CGM.getLangOpts().OMPHostIRFile.empty()) return; auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); if (Buf.getError()) return; llvm::LLVMContext C; auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C); if (ME.getError()) return; llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); if (!MD) return; for (auto I : MD->operands()) { llvm::MDNode *MN = cast(I); auto getMDInt = [&](unsigned Idx) { llvm::ConstantAsMetadata *V = cast(MN->getOperand(Idx)); return cast(V->getValue())->getZExtValue(); }; auto getMDString = [&](unsigned Idx) { llvm::MDString *V = cast(MN->getOperand(Idx)); return V->getString(); }; switch (getMDInt(0)) { default: llvm_unreachable("Unexpected metadata!"); break; case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: OFFLOAD_ENTRY_INFO_TARGET_REGION: OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), /*Column=*/getMDInt(5), /*Order=*/getMDInt(6)); break; } } } void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { if (!KmpRoutineEntryPtrTy) { // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. auto &C = CGM.getContext(); QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; FunctionProtoType::ExtProtoInfo EPI; KmpRoutineEntryPtrQTy = C.getPointerType( C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); } } static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy) { auto *Field = FieldDecl::Create( C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); Field->setAccess(AS_public); DC->addDecl(Field); return Field; } QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { // Make sure the type of the entry is already created. This is the type we // have to create: // struct __tgt_offload_entry{ // void *addr; // Pointer to the offload entry info. // // (function or global) // char *name; // Name of the function or global. // size_t size; // Size of the entry info (0 if it a function). // }; if (TgtOffloadEntryQTy.isNull()) { ASTContext &C = CGM.getContext(); auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); addFieldToRecordDecl(C, RD, C.getSizeType()); RD->completeDefinition(); TgtOffloadEntryQTy = C.getRecordType(RD); } return TgtOffloadEntryQTy; } QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { // These are the types we need to build: // struct __tgt_device_image{ // void *ImageStart; // Pointer to the target code start. // void *ImageEnd; // Pointer to the target code end. // // We also add the host entries to the device image, as it may be useful // // for the target runtime to have access to that information. // __tgt_offload_entry *EntriesBegin; // Begin of the table with all // // the entries. // __tgt_offload_entry *EntriesEnd; // End of the table with all the // // entries (non inclusive). // }; if (TgtDeviceImageQTy.isNull()) { ASTContext &C = CGM.getContext(); auto *RD = C.buildImplicitRecord("__tgt_device_image"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); RD->completeDefinition(); TgtDeviceImageQTy = C.getRecordType(RD); } return TgtDeviceImageQTy; } QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { // struct __tgt_bin_desc{ // int32_t NumDevices; // Number of devices supported. // __tgt_device_image *DeviceImages; // Arrays of device images // // (one per device). // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the // // entries. // __tgt_offload_entry *EntriesEnd; // End of the table with all the // // entries (non inclusive). // }; if (TgtBinaryDescriptorQTy.isNull()) { ASTContext &C = CGM.getContext(); auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); RD->startDefinition(); addFieldToRecordDecl( C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); RD->completeDefinition(); TgtBinaryDescriptorQTy = C.getRecordType(RD); } return TgtBinaryDescriptorQTy; } namespace { struct PrivateHelpersTy { PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) : Original(Original), PrivateCopy(PrivateCopy), PrivateElemInit(PrivateElemInit) {} const VarDecl *Original; const VarDecl *PrivateCopy; const VarDecl *PrivateElemInit; }; typedef std::pair PrivateDataTy; } // anonymous namespace static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef Privates) { if (!Privates.empty()) { auto &C = CGM.getContext(); // Build struct .kmp_privates_t. { // /* private vars */ // }; auto *RD = C.buildImplicitRecord(".kmp_privates.t"); RD->startDefinition(); for (auto &&Pair : Privates) { auto *VD = Pair.second.Original; auto Type = VD->getType(); Type = Type.getNonReferenceType(); auto *FD = addFieldToRecordDecl(C, RD, Type); if (VD->hasAttrs()) { for (specific_attr_iterator I(VD->getAttrs().begin()), E(VD->getAttrs().end()); I != E; ++I) FD->addAttr(*I); } } RD->completeDefinition(); return RD; } return nullptr; } static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy) { auto &C = CGM.getContext(); // Build struct kmp_task_t { // void * shareds; // kmp_routine_entry_t routine; // kmp_int32 part_id; // kmp_routine_entry_t destructors; // }; auto *RD = C.buildImplicitRecord("kmp_task_t"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); addFieldToRecordDecl(C, RD, KmpInt32Ty); addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); RD->completeDefinition(); return RD; } static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef Privates) { auto &C = CGM.getContext(); // Build struct kmp_task_t_with_privates { // kmp_task_t task_data; // .kmp_privates_t. privates; // }; auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); RD->startDefinition(); addFieldToRecordDecl(C, RD, KmpTaskTQTy); if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); } RD->completeDefinition(); return RD; } /// \brief Emit a proxy function which accepts kmp_task_t as the second /// argument. /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, /// tt->shareds); /// return 0; /// } /// \endcode static llvm::Value * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Value *TaskFunction, llvm::Value *TaskPrivatesMap) { auto &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy.withRestrict()); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); FunctionType::ExtInfo Info; auto &TaskEntryFnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, /*isVariadic=*/false); auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); auto *TaskEntry = llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, ".omp_task_entry.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, // tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); LValue TDBase = emitLoadOfPointerLValue( CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); auto *KmpTaskTWithPrivatesQTyRD = cast(KmpTaskTWithPrivatesQTy->getAsTagDecl()); LValue Base = CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); auto *KmpTaskTQTyRD = cast(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), CGF.ConvertTypeForMem(SharedsPtrTy)); auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); llvm::Value *PrivatesParam; if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivatesLVal.getPointer(), CGF.VoidPtrTy); } else { PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, SharedsParam}; CGF.EmitCallOrInvoke(TaskFunction, CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); CGF.FinishFunction(); return TaskEntry; } static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy) { auto &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy.withRestrict()); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); FunctionType::ExtInfo Info; auto &DestructorFnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, /*isVariadic=*/false); auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); auto *DestructorFn = llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, ".omp_task_destructor.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, DestructorFnInfo); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, Args); LValue Base = emitLoadOfPointerLValue( CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); auto *KmpTaskTWithPrivatesQTyRD = cast(KmpTaskTWithPrivatesQTy->getAsTagDecl()); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); Base = CGF.EmitLValueForField(Base, *FI); for (auto *Field : cast(FI->getType()->getAsTagDecl())->fields()) { if (auto DtorKind = Field->getType().isDestructedType()) { auto FieldLValue = CGF.EmitLValueForField(Base, Field); CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); } } CGF.FinishFunction(); return DestructorFn; } /// \brief Emit a privates mapping function for correct handling of private and /// firstprivate variables. /// \code /// void .omp_task_privates_map.(const .privates. *noalias privs, /// **noalias priv1,..., **noalias privn) { /// *priv1 = &.privates.priv1; /// ...; /// *privn = &.privates.privn; /// } /// \endcode static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef PrivateVars, ArrayRef FirstprivateVars, QualType PrivatesQTy, ArrayRef Privates) { auto &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl TaskPrivatesArg( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(PrivatesQTy).withConst().withRestrict()); Args.push_back(&TaskPrivatesArg); llvm::DenseMap PrivateVarsPos; unsigned Counter = 1; for (auto *E: PrivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict())); auto *VD = cast(cast(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } for (auto *E : FirstprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict())); auto *VD = cast(cast(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } FunctionType::ExtInfo Info; auto &TaskPrivatesMapFnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info, /*isVariadic=*/false); auto *TaskPrivatesMapTy = CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); auto *TaskPrivatesMap = llvm::Function::Create( TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, ".omp_task_privates_map.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, TaskPrivatesMapFnInfo); TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, TaskPrivatesMapFnInfo, Args); // *privi = &.privates.privi; LValue Base = emitLoadOfPointerLValue( CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()); auto *PrivatesQTyRD = cast(PrivatesQTy->getAsTagDecl()); Counter = 0; for (auto *Field : PrivatesQTyRD->fields()) { auto FieldLVal = CGF.EmitLValueForField(Base, Field); auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); auto RefLoadLVal = emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType()); CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); ++Counter; } CGF.FinishFunction(); return TaskPrivatesMap; } static int array_pod_sort_comparator(const PrivateDataTy *P1, const PrivateDataTy *P2) { return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); } void CGOpenMPRuntime::emitTaskCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, bool Tied, llvm::PointerIntPair Final, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, ArrayRef PrivateVars, ArrayRef PrivateCopies, ArrayRef FirstprivateVars, ArrayRef FirstprivateCopies, ArrayRef FirstprivateInits, ArrayRef> Dependences) { if (!CGF.HaveInsertPoint()) return; auto &C = CGM.getContext(); llvm::SmallVector Privates; // Aggregate privates and sort them by the alignment. auto I = PrivateCopies.begin(); for (auto *E : PrivateVars) { auto *VD = cast(cast(E)->getDecl()); Privates.push_back(std::make_pair( C.getDeclAlign(VD), PrivateHelpersTy(VD, cast(cast(*I)->getDecl()), /*PrivateElemInit=*/nullptr))); ++I; } I = FirstprivateCopies.begin(); auto IElemInitRef = FirstprivateInits.begin(); for (auto *E : FirstprivateVars) { auto *VD = cast(cast(E)->getDecl()); Privates.push_back(std::make_pair( C.getDeclAlign(VD), PrivateHelpersTy( VD, cast(cast(*I)->getDecl()), cast(cast(*IElemInitRef)->getDecl())))); ++I, ++IElemInitRef; } llvm::array_pod_sort(Privates.begin(), Privates.end(), array_pod_sort_comparator); auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); // Build type kmp_task_t (if not built yet). if (KmpTaskTQTy.isNull()) { KmpTaskTQTy = C.getRecordType( createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); } auto *KmpTaskTQTyRD = cast(KmpTaskTQTy->getAsTagDecl()); // Build particular struct kmp_task_t for the given task. auto *KmpTaskTWithPrivatesQTyRD = createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); QualType KmpTaskTWithPrivatesPtrQTy = C.getPointerType(KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy); QualType SharedsPtrTy = C.getPointerType(SharedsTy); // Emit initial values for private copies (if any). llvm::Value *TaskPrivatesMap = nullptr; auto *TaskPrivatesMapTy = std::next(cast(TaskFunction)->getArgumentList().begin(), 3) ->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); TaskPrivatesMap = emitTaskPrivateMappingFunction( CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskPrivatesMap, TaskPrivatesMapTy); } else { TaskPrivatesMap = llvm::ConstantPointerNull::get( cast(TaskPrivatesMapTy)); } // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, // kmp_task_t *tt); auto *TaskEntry = emitProxyTaskFunction( CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); // Task flags. Format is taken from // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, // description of kmp_tasking_flags struct. const unsigned TiedFlag = 0x1; const unsigned FinalFlag = 0x2; unsigned Flags = Tied ? TiedFlag : 0; auto *TaskFlags = Final.getPointer() ? CGF.Builder.CreateSelect(Final.getPointer(), CGF.Builder.getInt32(FinalFlag), CGF.Builder.getInt32(/*C=*/0)) : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskEntry, KmpRoutineEntryPtrTy)}; auto *NewTask = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( NewTask, KmpTaskTWithPrivatesPtrTy); LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, KmpTaskTWithPrivatesQTy); LValue TDBase = CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); // Fill the data in the resulting kmp_task_t record. // Copy shareds if there are any. Address KmpTaskSharedsPtr = Address::invalid(); if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { KmpTaskSharedsPtr = Address(CGF.EmitLoadOfScalar( CGF.EmitLValueForField( TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), Loc), CGF.getNaturalTypeAlignment(SharedsTy)); CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); } // Emit initial values for private copies (if any). bool NeedsCleanup = false; if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); FI = cast(FI->getType()->getAsTagDecl())->field_begin(); LValue SharedsBase; if (!FirstprivateVars.empty()) { SharedsBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), SharedsTy); } CodeGenFunction::CGCapturedStmtInfo CapturesInfo( cast(*D.getAssociatedStmt())); for (auto &&Pair : Privates) { auto *VD = Pair.second.PrivateCopy; auto *Init = VD->getAnyInitializer(); LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); if (Init) { if (auto *Elem = Pair.second.PrivateElemInit) { auto *OriginalVD = Pair.second.Original; auto *SharedField = CapturesInfo.lookup(OriginalVD); auto SharedRefLValue = CGF.EmitLValueForField(SharedsBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), AlignmentSource::Decl); QualType Type = OriginalVD->getType(); if (Type->isArrayType()) { // Initialize firstprivate array. if (!isa(Init) || CGF.isTrivialInitializer(Init)) { // Perform simple memcpy. CGF.EmitAggregateAssign(PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type); } else { // Initialize firstprivate array using element-by-element // intialization. CGF.EmitOMPAggregateAssign( PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, [&CGF, Elem, Init, &CapturesInfo]( Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the initialization. CodeGenFunction::OMPPrivateScope InitScope(CGF); InitScope.addPrivate(Elem, [SrcElement]() -> Address { return SrcElement; }); (void)InitScope.Privatize(); // Emit initialization for single element. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( CGF, &CapturesInfo); CGF.EmitAnyExprToMem(Init, DestElement, Init->getType().getQualifiers(), /*IsInitializer=*/false); }); } } else { CodeGenFunction::OMPPrivateScope InitScope(CGF); InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { return SharedRefLValue.getAddress(); }); (void)InitScope.Privatize(); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); } } else { CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); } } NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); ++FI; } } // Provide pointer to function with destructors for privates. llvm::Value *DestructorFn = NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy) : llvm::ConstantPointerNull::get( cast(KmpRoutineEntryPtrTy)); LValue Destructor = CGF.EmitLValueForField( TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( DestructorFn, KmpRoutineEntryPtrTy), Destructor); // Process list of dependences. Address DependenciesArray = Address::invalid(); unsigned NumDependencies = Dependences.size(); if (NumDependencies) { // Dependence kind for RTL. enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; RecordDecl *KmpDependInfoRD; QualType FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); if (KmpDependInfoTy.isNull()) { KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); KmpDependInfoRD->startDefinition(); addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); KmpDependInfoRD->completeDefinition(); KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); } else { KmpDependInfoRD = cast(KmpDependInfoTy->getAsTagDecl()); } CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); // Define type kmp_depend_info[]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[] deps; DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); for (unsigned i = 0; i < NumDependencies; ++i) { const Expr *E = Dependences[i].second; auto Addr = CGF.EmitLValue(E); llvm::Value *Size; QualType Ty = E->getType(); if (auto *ASE = dyn_cast(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); } else Size = getTypeSize(CGF, Ty); auto Base = CGF.MakeAddrLValue( CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), KmpDependInfoTy); // deps[i].base_addr = &; auto BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); CGF.EmitStoreOfScalar( CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), BaseAddrLVal); // deps[i].len = sizeof(); auto LenLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Len)); CGF.EmitStoreOfScalar(Size, LenLVal); // deps[i].flags = ; RTLDependenceKindTy DepKind; switch (Dependences[i].first) { case OMPC_DEPEND_in: DepKind = DepIn; break; // Out and InOut dependencies must use the same code. case OMPC_DEPEND_out: case OMPC_DEPEND_inout: DepKind = DepInOut; break; case OMPC_DEPEND_source: case OMPC_DEPEND_sink: case OMPC_DEPEND_unknown: llvm_unreachable("Unknown task dependence type"); } auto FlagsLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), FlagsLVal); } DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), CGF.VoidPtrTy); } // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() // libcall. // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t // *new_task); // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence // list is not empty auto *ThreadID = getThreadID(CGF, Loc); auto *UpLoc = emitUpdateLocation(CGF, Loc); llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; llvm::Value *DepTaskArgs[7]; if (NumDependencies) { DepTaskArgs[0] = UpLoc; DepTaskArgs[1] = ThreadID; DepTaskArgs[2] = NewTask; DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); DepTaskArgs[4] = DependenciesArray.getPointer(); DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } auto &&ThenCodeGen = [this, NumDependencies, &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) { // TODO: add check for untied tasks. if (NumDependencies) { CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); } else { CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); } }; typedef CallEndCleanup::value> IfCallEndCleanup; llvm::Value *DepWaitTaskArgs[6]; if (NumDependencies) { DepWaitTaskArgs[0] = UpLoc; DepWaitTaskArgs[1] = ThreadID; DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); DepWaitTaskArgs[3] = DependenciesArray.getPointer(); DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) { CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info // is specified. if (NumDependencies) CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), DepWaitTaskArgs); // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs); // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); CGF.EHStack.pushCleanup( NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), llvm::makeArrayRef(TaskArgs)); // Call proxy_task_entry(gtid, new_task); llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); }; if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); } else { CodeGenFunction::RunCleanupsScope Scope(CGF); ThenCodeGen(CGF); } } /// \brief Emit reduction operation for each element of array (required for /// array sections) LHS op = RHS. /// \param Type Type of array. /// \param LHSVar Variable on the left side of the reduction operation /// (references element of array in original variable). /// \param RHSVar Variable on the right side of the reduction operation /// (references element of array in original variable). /// \param RedOpGen Generator of reduction operation with use of LHSVar and /// RHSVar. static void EmitOMPAggregateReduction( CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref &RedOpGen, const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, const Expr *UpExpr = nullptr) { // Perform element-by-element initialization. QualType ElementTy; Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); // Drill down to the base element type on both arrays. auto ArrayTy = Type->getAsArrayTypeUnsafe(); auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); auto RHSBegin = RHSAddr.getPointer(); auto LHSBegin = LHSAddr.getPointer(); // Cast from pointer to array type to pointer to single element. auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); // The basic structure here is a while-do loop. auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); auto IsEmpty = CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); // Enter the loop body, making that address the current address. auto EntryBB = CGF.Builder.GetInsertBlock(); CGF.EmitBlock(BodyBB); CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); RHSElementPHI->addIncoming(RHSBegin, EntryBB); Address RHSElementCurrent = Address(RHSElementPHI, RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); LHSElementPHI->addIncoming(LHSBegin, EntryBB); Address LHSElementCurrent = Address(LHSElementPHI, LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); // Emit copy. CodeGenFunction::OMPPrivateScope Scope(CGF); Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); Scope.Privatize(); RedOpGen(CGF, XExpr, EExpr, UpExpr); Scope.ForceCleanup(); // Shift the address forward by one element. auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); // Check whether we've reached the end. auto Done = CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); // Done. CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } static llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, ArrayRef ReductionOps) { auto &C = CGM.getContext(); // void reduction_func(void *LHSArg, void *RHSArg); FunctionArgList Args; ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, C.VoidPtrTy); ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, C.VoidPtrTy); Args.push_back(&LHSArg); Args.push_back(&RHSArg); FunctionType::ExtInfo EI; auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( C.VoidTy, Args, EI, /*isVariadic=*/false); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, ".omp.reduction.reduction_func", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); // Dst = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), CGF.getPointerAlign()); Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), CGF.getPointerAlign()); // ... // *(Type*)lhs[i] = RedOp(*(Type*)lhs[i], *(Type*)rhs[i]); // ... CodeGenFunction::OMPPrivateScope Scope(CGF); auto IPriv = Privates.begin(); unsigned Idx = 0; for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { auto RHSVar = cast(cast(RHSExprs[I])->getDecl()); Scope.addPrivate(RHSVar, [&]() -> Address { return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); }); auto LHSVar = cast(cast(LHSExprs[I])->getDecl()); Scope.addPrivate(LHSVar, [&]() -> Address { return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); }); QualType PrivTy = (*IPriv)->getType(); if (PrivTy->isArrayType()) { // Get array size and emit VLA type. ++Idx; Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); CodeGenFunction::OpaqueValueMapping OpaqueMap( CGF, cast( CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()), RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); CGF.EmitVariablyModifiedType(PrivTy); } } Scope.Privatize(); IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { if ((*IPriv)->getType()->isArrayType()) { // Emit reduction for array section. auto *LHSVar = cast(cast(*ILHS)->getDecl()); auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { CGF.EmitIgnoredExpr(E); }); } else // Emit reduction for array subscript or single variable. CGF.EmitIgnoredExpr(E); ++IPriv, ++ILHS, ++IRHS; } Scope.ForceCleanup(); CGF.FinishFunction(); return Fn; } void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, ArrayRef ReductionOps, bool WithNowait, bool SimpleReduction) { if (!CGF.HaveInsertPoint()) return; // Next code should be emitted for reduction: // // static kmp_critical_name lock = { 0 }; // // void reduce_func(void *lhs[], void *rhs[]) { // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); // ... // *(Type-1*)lhs[-1] = ReductionOperation-1(*(Type-1*)lhs[-1], // *(Type-1*)rhs[-1]); // } // // ... // void *RedList[] = {&[0], ..., &[-1]}; // switch (__kmpc_reduce{_nowait}(, , , sizeof(RedList), // RedList, reduce_func, &)) { // case 1: // ... // [i] = RedOp(*[i], *[i]); // ... // __kmpc_end_reduce{_nowait}(, , &); // break; // case 2: // ... // Atomic([i] = RedOp(*[i], *[i])); // ... // [__kmpc_end_reduce(, , &);] // break; // default:; // } // // if SimpleReduction is true, only the next code is generated: // ... // [i] = RedOp(*[i], *[i]); // ... auto &C = CGM.getContext(); if (SimpleReduction) { CodeGenFunction::RunCleanupsScope Scope(CGF); auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { if ((*IPriv)->getType()->isArrayType()) { auto *LHSVar = cast(cast(*ILHS)->getDecl()); auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction( CGF, (*IPriv)->getType(), LHSVar, RHSVar, [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { CGF.EmitIgnoredExpr(E); }); } else CGF.EmitIgnoredExpr(E); ++IPriv, ++ILHS, ++IRHS; } return; } // 1. Build a list of reduction variables. // void *RedList[] = {[0], ..., [-1]}; auto Size = RHSExprs.size(); for (auto *E : Privates) { if (E->getType()->isArrayType()) // Reserve place for array size. ++Size; } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); QualType ReductionArrayTy = C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); auto IPriv = Privates.begin(); unsigned Idx = 0; for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), Elem); if ((*IPriv)->getType()->isArrayType()) { // Store array size. ++Idx; Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); CGF.Builder.CreateStore( CGF.Builder.CreateIntToPtr( CGF.Builder.CreateIntCast( CGF.getVLASize(CGF.getContext().getAsVariableArrayType( (*IPriv)->getType())) .first, CGF.SizeTy, /*isSigned=*/false), CGF.VoidPtrTy), Elem); } } // 2. Emit reduce_func(). auto *ReductionFn = emitReductionFunction( CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, LHSExprs, RHSExprs, ReductionOps); // 3. Create static kmp_critical_name lock = { 0 }; auto *Lock = getCriticalRegionLock(".reduction"); // 4. Build res = __kmpc_reduce{_nowait}(, , , sizeof(RedList), // RedList, reduce_func, &); auto *IdentTLoc = emitUpdateLocation( CGF, Loc, static_cast(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); auto *ThreadId = getThreadID(CGF, Loc); auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy); auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), CGF.VoidPtrTy); llvm::Value *Args[] = { IdentTLoc, // ident_t * ThreadId, // i32 CGF.Builder.getInt32(RHSExprs.size()), // i32 ReductionArrayTySize, // size_type sizeof(RedList) RL, // void *RedList ReductionFn, // void (*) (void *, void *) Lock // kmp_critical_name *& }; auto Res = CGF.EmitRuntimeCall( createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait : OMPRTL__kmpc_reduce), Args); // 5. Build switch(res) auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); // 6. Build case 1: // ... // [i] = RedOp(*[i], *[i]); // ... // __kmpc_end_reduce{_nowait}(, , &); // break; auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); CGF.EmitBlock(Case1BB); { CodeGenFunction::RunCleanupsScope Scope(CGF); // Add emission of __kmpc_end_reduce{_nowait}(, , &); llvm::Value *EndArgs[] = { IdentTLoc, // ident_t * ThreadId, // i32 Lock // kmp_critical_name *& }; CGF.EHStack .pushCleanup::value>>( NormalAndEHCleanup, createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait : OMPRTL__kmpc_end_reduce), llvm::makeArrayRef(EndArgs)); auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { if ((*IPriv)->getType()->isArrayType()) { // Emit reduction for array section. auto *LHSVar = cast(cast(*ILHS)->getDecl()); auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction( CGF, (*IPriv)->getType(), LHSVar, RHSVar, [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { CGF.EmitIgnoredExpr(E); }); } else // Emit reduction for array subscript or single variable. CGF.EmitIgnoredExpr(E); ++IPriv, ++ILHS, ++IRHS; } } CGF.EmitBranch(DefaultBB); // 7. Build case 2: // ... // Atomic([i] = RedOp(*[i], *[i])); // ... // break; auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); CGF.EmitBlock(Case2BB); { CodeGenFunction::RunCleanupsScope Scope(CGF); if (!WithNowait) { // Add emission of __kmpc_end_reduce(, , &); llvm::Value *EndArgs[] = { IdentTLoc, // ident_t * ThreadId, // i32 Lock // kmp_critical_name *& }; CGF.EHStack .pushCleanup::value>>( NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_reduce), llvm::makeArrayRef(EndArgs)); } auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); auto IPriv = Privates.begin(); for (auto *E : ReductionOps) { const Expr *XExpr = nullptr; const Expr *EExpr = nullptr; const Expr *UpExpr = nullptr; BinaryOperatorKind BO = BO_Comma; if (auto *BO = dyn_cast(E)) { if (BO->getOpcode() == BO_Assign) { XExpr = BO->getLHS(); UpExpr = BO->getRHS(); } } // Try to emit update expression as a simple atomic. auto *RHSExpr = UpExpr; if (RHSExpr) { // Analyze RHS part of the whole expression. if (auto *ACO = dyn_cast( RHSExpr->IgnoreParenImpCasts())) { // If this is a conditional operator, analyze its condition for // min/max reduction operator. RHSExpr = ACO->getCond(); } if (auto *BORHS = dyn_cast(RHSExpr->IgnoreParenImpCasts())) { EExpr = BORHS->getRHS(); BO = BORHS->getOpcode(); } } if (XExpr) { auto *VD = cast(cast(*ILHS)->getDecl()); auto &&AtomicRedGen = [this, BO, VD, IPriv, Loc](CodeGenFunction &CGF, const Expr *XExpr, const Expr *EExpr, const Expr *UpExpr) { LValue X = CGF.EmitLValue(XExpr); RValue E; if (EExpr) E = CGF.EmitAnyExpr(EExpr); CGF.EmitOMPAtomicSimpleUpdateExpr( X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, - [&CGF, UpExpr, VD, IPriv](RValue XRValue) { + [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address { - Address LHSTemp = CGF.CreateMemTemp(VD->getType()); - CGF.EmitStoreThroughLValue( - XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType())); - return LHSTemp; - }); + PrivateScope.addPrivate( + VD, [&CGF, VD, XRValue, Loc]() -> Address { + Address LHSTemp = CGF.CreateMemTemp(VD->getType()); + CGF.emitOMPSimpleStore( + CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, + VD->getType().getNonReferenceType(), Loc); + return LHSTemp; + }); (void)PrivateScope.Privatize(); return CGF.EmitAnyExpr(UpExpr); }); }; if ((*IPriv)->getType()->isArrayType()) { // Emit atomic reduction for array section. auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, AtomicRedGen, XExpr, EExpr, UpExpr); } else // Emit atomic reduction for array subscript or single variable. AtomicRedGen(CGF, XExpr, EExpr, UpExpr); } else { // Emit as a critical region. auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { emitCriticalRegion( CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc); }; if ((*IPriv)->getType()->isArrayType()) { auto *LHSVar = cast(cast(*ILHS)->getDecl()); auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, CritRedGen); } else CritRedGen(CGF, nullptr, nullptr, nullptr); } ++ILHS, ++IRHS, ++IPriv; } } CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; // Ignore return result until untied tasks are supported. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); } void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnerKind, const RegionCodeGenTy &CodeGen, bool HasCancel) { if (!CGF.HaveInsertPoint()) return; InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); } namespace { enum RTCancelKind { CancelNoreq = 0, CancelParallel = 1, CancelLoop = 2, CancelSections = 3, CancelTaskgroup = 4 }; } static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { RTCancelKind CancelKind = CancelNoreq; if (CancelRegion == OMPD_parallel) CancelKind = CancelParallel; else if (CancelRegion == OMPD_for) CancelKind = CancelLoop; else if (CancelRegion == OMPD_sections) CancelKind = CancelSections; else { assert(CancelRegion == OMPD_taskgroup); CancelKind = CancelTaskgroup; } return CancelKind; } void CGOpenMPRuntime::emitCancellationPointCall( CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) { if (!CGF.HaveInsertPoint()) return; // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 // global_tid, kmp_int32 cncl_kind); if (auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) { if (OMPRegionInfo->getDirectiveKind() == OMPD_single) return; if (OMPRegionInfo->hasCancel()) { llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. auto *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); // if (__kmpc_cancellationpoint()) { // __kmpc_cancel_barrier(); // exit from construct; // } auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); auto *ContBB = CGF.createBasicBlock(".cancel.continue"); auto *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // __kmpc_cancel_barrier(); emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); // exit from construct; auto CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDest); CGF.EmitBlock(ContBB, /*IsFinished=*/true); } } } void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) { if (!CGF.HaveInsertPoint()) return; // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, // kmp_int32 cncl_kind); if (auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) { if (OMPRegionInfo->getDirectiveKind() == OMPD_single) return; auto &&ThenGen = [this, Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF) { llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. auto *Result = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args); // if (__kmpc_cancel()) { // __kmpc_cancel_barrier(); // exit from construct; // } auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); auto *ContBB = CGF.createBasicBlock(".cancel.continue"); auto *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // __kmpc_cancel_barrier(); emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); // exit from construct; auto CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDest); CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {}); else ThenGen(CGF); } } /// \brief Obtain information that uniquely identifies a target entry. This /// consists of the file and device IDs as well as line and column numbers /// associated with the relevant entry source location. static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, unsigned &DeviceID, unsigned &FileID, unsigned &LineNum, unsigned &ColumnNum) { auto &SM = C.getSourceManager(); // The loc should be always valid and have a file ID (the user cannot use // #pragma directives in macros) assert(Loc.isValid() && "Source location is expected to be always valid."); assert(Loc.isFileID() && "Source location is expected to refer to a file."); PresumedLoc PLoc = SM.getPresumedLoc(Loc); assert(PLoc.isValid() && "Source location is expected to be always valid."); llvm::sys::fs::UniqueID ID; if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) llvm_unreachable("Source file with target region no longer exists!"); DeviceID = ID.getDevice(); FileID = ID.getFile(); LineNum = PLoc.getLine(); ColumnNum = PLoc.getColumn(); return; } void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry) { assert(!ParentName.empty() && "Invalid target region parent name!"); const CapturedStmt &CS = *cast(D.getAssociatedStmt()); // Emit target region as a standalone region. auto &&CodeGen = [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS.getCapturedStmt()); }; // Create a unique name for the proxy/entry function that using the source // location information of the current target region. The name will be // something like: // // .omp_offloading.DD_FFFF.PP.lBB.cCC // // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the // mangled name of the function that encloses the target region, BB is the // line number of the target region, and CC is the column number of the target // region. unsigned DeviceID; unsigned FileID; unsigned Line; unsigned Column; getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, Line, Column); SmallString<64> EntryFnName; { llvm::raw_svector_ostream OS(EntryFnName); OS << ".omp_offloading" << llvm::format(".%x", DeviceID) << llvm::format(".%x.", FileID) << ParentName << ".l" << Line << ".c" << Column; } CodeGenFunction CGF(CGM, true); CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); // If this target outline function is not an offload entry, we don't need to // register it. if (!IsOffloadEntry) return; // The target region ID is used by the runtime library to identify the current // target region, so it only has to be unique and not necessarily point to // anything. It could be the pointer to the outlined function that implements // the target region, but we aren't using that so that the compiler doesn't // need to keep that, and could therefore inline the host function if proven // worthwhile during optimization. In the other hand, if emitting code for the // device, the ID has to be the function address so that it can retrieved from // the offloading entry and launched by the runtime library. We also mark the // outlined function to have external linkage in case we are emitting code for // the device, because these functions will be entry points to the device. if (CGM.getLangOpts().OpenMPIsDevice) { OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); } else OutlinedFnID = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); // Register the information for the entry associated with this target region. OffloadEntriesInfoManager.registerTargetRegionEntryInfo( DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID); return; } void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device, ArrayRef CapturedVars) { if (!CGF.HaveInsertPoint()) return; /// \brief Values for bit flags used to specify the mapping type for /// offloading. enum OpenMPOffloadMappingFlags { /// \brief Allocate memory on the device and move data from host to device. OMP_MAP_TO = 0x01, /// \brief Allocate memory on the device and move data from device to host. OMP_MAP_FROM = 0x02, /// \brief The element passed to the device is a pointer. OMP_MAP_PTR = 0x20, /// \brief Pass the element to the device by value. OMP_MAP_BYCOPY = 0x80, }; enum OpenMPOffloadingReservedDeviceIDs { /// \brief Device ID if the device was not defined, runtime should get it /// from environment variables in the spec. OMP_DEVICEID_UNDEF = -1, }; assert(OutlinedFn && "Invalid outlined function!"); auto &Ctx = CGF.getContext(); // Fill up the arrays with the all the captured variables. SmallVector BasePointers; SmallVector Pointers; SmallVector Sizes; SmallVector MapTypes; bool hasVLACaptures = false; const CapturedStmt &CS = *cast(D.getAssociatedStmt()); auto RI = CS.getCapturedRecordDecl()->field_begin(); // auto II = CS.capture_init_begin(); auto CV = CapturedVars.begin(); for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), CE = CS.capture_end(); CI != CE; ++CI, ++RI, ++CV) { StringRef Name; QualType Ty; llvm::Value *BasePointer; llvm::Value *Pointer; llvm::Value *Size; unsigned MapType; // VLA sizes are passed to the outlined region by copy. if (CI->capturesVariableArrayType()) { BasePointer = Pointer = *CV; Size = getTypeSize(CGF, RI->getType()); // Copy to the device as an argument. No need to retrieve it. MapType = OMP_MAP_BYCOPY; hasVLACaptures = true; } else if (CI->capturesThis()) { BasePointer = Pointer = *CV; const PointerType *PtrTy = cast(RI->getType().getTypePtr()); Size = getTypeSize(CGF, PtrTy->getPointeeType()); // Default map type. MapType = OMP_MAP_TO | OMP_MAP_FROM; } else if (CI->capturesVariableByCopy()) { MapType = OMP_MAP_BYCOPY; if (!RI->getType()->isAnyPointerType()) { // If the field is not a pointer, we need to save the actual value and // load it as a void pointer. auto DstAddr = CGF.CreateMemTemp( Ctx.getUIntPtrType(), Twine(CI->getCapturedVar()->getName()) + ".casted"); LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); auto *SrcAddrVal = CGF.EmitScalarConversion( DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), Ctx.getPointerType(RI->getType()), SourceLocation()); LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType()); // Store the value using the source type pointer. CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV); // Load the value using the destination type pointer. BasePointer = Pointer = CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); } else { MapType |= OMP_MAP_PTR; BasePointer = Pointer = *CV; } Size = getTypeSize(CGF, RI->getType()); } else { assert(CI->capturesVariable() && "Expected captured reference."); BasePointer = Pointer = *CV; const ReferenceType *PtrTy = cast(RI->getType().getTypePtr()); QualType ElementType = PtrTy->getPointeeType(); Size = getTypeSize(CGF, ElementType); // The default map type for a scalar/complex type is 'to' because by // default the value doesn't have to be retrieved. For an aggregate type, // the default is 'tofrom'. MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) : OMP_MAP_TO; if (ElementType->isAnyPointerType()) MapType |= OMP_MAP_PTR; } BasePointers.push_back(BasePointer); Pointers.push_back(Pointer); Sizes.push_back(Size); MapTypes.push_back(MapType); } // Keep track on whether the host function has to be executed. auto OffloadErrorQType = Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); auto OffloadError = CGF.MakeAddrLValue( CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), OffloadErrorQType); CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), OffloadError); // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, hasVLACaptures, Device, OutlinedFnID, OffloadError, OffloadErrorQType](CodeGenFunction &CGF) { unsigned PointerNumVal = BasePointers.size(); llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); llvm::Value *BasePointersArray; llvm::Value *PointersArray; llvm::Value *SizesArray; llvm::Value *MapTypesArray; if (PointerNumVal) { llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); QualType PointerArrayType = Ctx.getConstantArrayType( Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0); BasePointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); PointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); // If we don't have any VLA types, we can use a constant array for the map // sizes, otherwise we need to fill up the arrays as we do for the // pointers. if (hasVLACaptures) { QualType SizeArrayType = Ctx.getConstantArrayType( Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0); SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); } else { // We expect all the sizes to be constant, so we collect them to create // a constant array. SmallVector ConstSizes; for (auto S : Sizes) ConstSizes.push_back(cast(S)); auto *SizesArrayInit = llvm::ConstantArray::get( llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); auto *SizesArrayGbl = new llvm::GlobalVariable( CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, SizesArrayInit, ".offload_sizes"); SizesArrayGbl->setUnnamedAddr(true); SizesArray = SizesArrayGbl; } // The map types are always constant so we don't need to generate code to // fill arrays. Instead, we create an array constant. llvm::Constant *MapTypesArrayInit = llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); auto *MapTypesArrayGbl = new llvm::GlobalVariable( CGM.getModule(), MapTypesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapTypesArrayInit, ".offload_maptypes"); MapTypesArrayGbl->setUnnamedAddr(true); MapTypesArray = MapTypesArrayGbl; for (unsigned i = 0; i < PointerNumVal; ++i) { llvm::Value *BPVal = BasePointers[i]; if (BPVal->getType()->isPointerTy()) BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); else { assert(BPVal->getType()->isIntegerTy() && "If not a pointer, the value type must be an integer."); BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); } llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, 0, i); Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(BPVal, BPAddr); llvm::Value *PVal = Pointers[i]; if (PVal->getType()->isPointerTy()) PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); else { assert(PVal->getType()->isIntegerTy() && "If not a pointer, the value type must be an integer."); PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); } llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0, i); Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(PVal, PAddr); if (hasVLACaptures) { llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, /*Idx0=*/0, /*Idx1=*/i); Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); CGF.Builder.CreateStore(CGF.Builder.CreateIntCast( Sizes[i], CGM.SizeTy, /*isSigned=*/true), SAddr); } } BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, /*Idx0=*/0, /*Idx1=*/0); PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, /*Idx0=*/0, /*Idx1=*/0); SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, /*Idx0=*/0, /*Idx1=*/0); MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray, /*Idx0=*/0, /*Idx1=*/0); } else { BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); MapTypesArray = llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); } // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target // region, so it only has to be unique and not necessarily point to // anything. It could be the pointer to the outlined function that // implements the target region, but we aren't using that so that the // compiler doesn't need to keep that, and could therefore inline the host // function if proven worthwhile during optimization. // From this point on, we need to have an ID of the target region defined. assert(OutlinedFnID && "Invalid outlined function ID!"); // Emit device ID if any. llvm::Value *DeviceID; if (Device) DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), CGM.Int32Ty, /*isSigned=*/true); else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); llvm::Value *OffloadingArgs[] = { DeviceID, OutlinedFnID, PointerNum, BasePointersArray, PointersArray, SizesArray, MapTypesArray}; auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), OffloadingArgs); CGF.EmitStoreOfScalar(Return, OffloadError); }; // Notify that the host version must be executed. auto &&ElseGen = [this, OffloadError, OffloadErrorQType](CodeGenFunction &CGF) { CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), OffloadError); }; // If we have a target function ID it means that we need to support // offloading, otherwise, just execute on the host. We need to execute on host // regardless of the conditional in the if clause if, e.g., the user do not // specify target triples. if (OutlinedFnID) { if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); } else { CodeGenFunction::RunCleanupsScope Scope(CGF); ThenGen(CGF); } } else { CodeGenFunction::RunCleanupsScope Scope(CGF); ElseGen(CGF); } // Check the error code and execute the host version if required. auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); CGF.EmitBlock(OffloadFailedBlock); CGF.Builder.CreateCall(OutlinedFn, BasePointers); CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); return; } void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName) { if (!S) return; // If we find a OMP target directive, codegen the outline function and // register the result. // FIXME: Add other directives with target when they become supported. bool isTargetDirective = isa(S); if (isTargetDirective) { auto *E = cast(S); unsigned DeviceID; unsigned FileID; unsigned Line; unsigned Column; getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, FileID, Line, Column); // Is this a target region that should not be emitted as an entry point? If // so just signal we are done with this target region. if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo( DeviceID, FileID, ParentName, Line, Column)) return; llvm::Function *Fn; llvm::Constant *Addr; emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, /*isOffloadEntry=*/true); assert(Fn && Addr && "Target region emission failed."); return; } if (const OMPExecutableDirective *E = dyn_cast(S)) { if (!E->getAssociatedStmt()) return; scanForTargetRegionsFunctions( cast(E->getAssociatedStmt())->getCapturedStmt(), ParentName); return; } // If this is a lambda function, look into its body. if (auto *L = dyn_cast(S)) S = L->getBody(); // Keep looking for target regions recursively. for (auto *II : S->children()) scanForTargetRegionsFunctions(II, ParentName); return; } bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { auto &FD = *cast(GD.getDecl()); // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. if (!CGM.getLangOpts().OpenMPIsDevice) return false; // Try to detect target regions in the function. scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); // We should not emit any function othen that the ones created during the // scanning. Therefore, we signal that this function is completely dealt // with. return true; } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { if (!CGM.getLangOpts().OpenMPIsDevice) return false; // Check if there are Ctors/Dtors in this declaration and look for target // regions in it. We use the complete variant to produce the kernel name // mangling. QualType RDTy = cast(GD.getDecl())->getType(); if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { for (auto *Ctor : RD->ctors()) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); } auto *Dtor = RD->getDestructor(); if (Dtor) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); } } // If we are in target mode we do not emit any global (declare target is not // implemented yet). Therefore we signal that GD was processed in this case. return true; } bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { auto *VD = GD.getDecl(); if (isa(VD)) return emitTargetFunctions(GD); return emitTargetGlobalVariable(GD); } llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { // If we have offloading in the current module, we need to emit the entries // now and register the offloading descriptor. createOffloadEntriesAndInfoMetadata(); // Create and register the offloading binary descriptors. This is the main // entity that captures all the information about offloading in the current // compilation unit. return createOffloadingBinaryDescriptorRegistration(); } Index: vendor/clang/dist/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- vendor/clang/dist/lib/CodeGen/CGStmtOpenMP.cpp (revision 294603) +++ vendor/clang/dist/lib/CodeGen/CGStmtOpenMP.cpp (revision 294604) @@ -1,2679 +1,2679 @@ //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This contains code to emit OpenMP nodes as LLVM code. // //===----------------------------------------------------------------------===// #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" using namespace clang; using namespace CodeGen; void CodeGenFunction::GenerateOpenMPCapturedVars( const CapturedStmt &S, SmallVectorImpl &CapturedVars) { const RecordDecl *RD = S.getCapturedRecordDecl(); auto CurField = RD->field_begin(); auto CurCap = S.captures().begin(); for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), E = S.capture_init_end(); I != E; ++I, ++CurField, ++CurCap) { if (CurField->hasCapturedVLAType()) { auto VAT = CurField->getCapturedVLAType(); auto *Val = VLASizeMap[VAT->getSizeExpr()]; CapturedVars.push_back(Val); } else if (CurCap->capturesThis()) CapturedVars.push_back(CXXThisValue); else if (CurCap->capturesVariableByCopy()) CapturedVars.push_back( EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); else { assert(CurCap->capturesVariable() && "Expected capture by reference."); CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); } } } static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, StringRef Name, LValue AddrLV, bool isReferenceType = false) { ASTContext &Ctx = CGF.getContext(); auto *CastedPtr = CGF.EmitScalarConversion( AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), Ctx.getPointerType(DstType), SourceLocation()); auto TmpAddr = CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) .getAddress(); // If we are dealing with references we need to return the address of the // reference instead of the reference of the value. if (isReferenceType) { QualType RefType = Ctx.getLValueReferenceType(DstType); auto *RefVal = TmpAddr.getPointer(); TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); CGF.EmitScalarInit(RefVal, TmpLVal); } return TmpAddr; } llvm::Function * CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { assert( CapturedStmtInfo && "CapturedStmtInfo should be set when generating the captured function"); const CapturedDecl *CD = S.getCapturedDecl(); const RecordDecl *RD = S.getCapturedRecordDecl(); assert(CD->hasBody() && "missing CapturedDecl body"); // Build the argument list. ASTContext &Ctx = CGM.getContext(); FunctionArgList Args; Args.append(CD->param_begin(), std::next(CD->param_begin(), CD->getContextParamPosition())); auto I = S.captures().begin(); for (auto *FD : RD->fields()) { QualType ArgType = FD->getType(); IdentifierInfo *II = nullptr; VarDecl *CapVar = nullptr; // If this is a capture by copy and the type is not a pointer, the outlined // function argument type should be uintptr and the value properly casted to // uintptr. This is necessary given that the runtime library is only able to // deal with pointers. We can pass in the same way the VLA type sizes to the // outlined function. if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || I->capturesVariableArrayType()) ArgType = Ctx.getUIntPtrType(); if (I->capturesVariable() || I->capturesVariableByCopy()) { CapVar = I->getCapturedVar(); II = CapVar->getIdentifier(); } else if (I->capturesThis()) II = &getContext().Idents.get("this"); else { assert(I->capturesVariableArrayType()); II = &getContext().Idents.get("vla"); } if (ArgType->isVariablyModifiedType()) ArgType = getContext().getVariableArrayDecayedType(ArgType); Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, FD->getLocation(), II, ArgType)); ++I; } Args.append( std::next(CD->param_begin(), CD->getContextParamPosition() + 1), CD->param_end()); // Create the function declaration. FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, /*IsVariadic=*/false); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Function *F = llvm::Function::Create( FuncLLVMTy, llvm::GlobalValue::InternalLinkage, CapturedStmtInfo->getHelperName(), &CGM.getModule()); CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); if (CD->isNothrow()) F->addFnAttr(llvm::Attribute::NoUnwind); // Generate the function. StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); I = S.captures().begin(); for (auto *FD : RD->fields()) { // If we are capturing a pointer by copy we don't need to do anything, just // use the value that we get from the arguments. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); ++Cnt, ++I; continue; } LValue ArgLVal = MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), AlignmentSource::Decl); if (FD->hasCapturedVLAType()) { LValue CastedArgLVal = MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), Args[Cnt]->getName(), ArgLVal), FD->getType(), AlignmentSource::Decl); auto *ExprArg = EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); auto VAT = FD->getCapturedVLAType(); VLASizeMap[VAT->getSizeExpr()] = ExprArg; } else if (I->capturesVariable()) { auto *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); Address ArgAddr = ArgLVal.getAddress(); if (!VarTy->isReferenceType()) { ArgAddr = EmitLoadOfReference( ArgAddr, ArgLVal.getType()->castAs()); } setAddrOfLocalVar( Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); } else if (I->capturesVariableByCopy()) { assert(!FD->getType()->isAnyPointerType() && "Not expecting a captured pointer."); auto *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); setAddrOfLocalVar(I->getCapturedVar(), castValueFromUintptr(*this, FD->getType(), Args[Cnt]->getName(), ArgLVal, VarTy->isReferenceType())); } else { // If 'this' is captured, load it into CXXThisValue. assert(I->capturesThis()); CXXThisValue = EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); } ++Cnt, ++I; } PGO.assignRegionCounters(GlobalDecl(CD), F); CapturedStmtInfo->EmitBody(*this, CD->getBody()); FinishFunction(CD->getBodyRBrace()); return F; } //===----------------------------------------------------------------------===// // OpenMP Directive Emission //===----------------------------------------------------------------------===// void CodeGenFunction::EmitOMPAggregateAssign( Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref &CopyGen) { // Perform element-by-element initialization. QualType ElementTy; // Drill down to the base element type on both arrays. auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); auto SrcBegin = SrcAddr.getPointer(); auto DestBegin = DestAddr.getPointer(); // Cast from pointer to array type to pointer to single element. auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); // The basic structure here is a while-do loop. auto BodyBB = createBasicBlock("omp.arraycpy.body"); auto DoneBB = createBasicBlock("omp.arraycpy.done"); auto IsEmpty = Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); // Enter the loop body, making that address the current address. auto EntryBB = Builder.GetInsertBlock(); EmitBlock(BodyBB); CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); llvm::PHINode *SrcElementPHI = Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); SrcElementPHI->addIncoming(SrcBegin, EntryBB); Address SrcElementCurrent = Address(SrcElementPHI, SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); llvm::PHINode *DestElementPHI = Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); DestElementPHI->addIncoming(DestBegin, EntryBB); Address DestElementCurrent = Address(DestElementPHI, DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); // Emit copy. CopyGen(DestElementCurrent, SrcElementCurrent); // Shift the address forward by one element. auto DestElementNext = Builder.CreateConstGEP1_32( DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); auto SrcElementNext = Builder.CreateConstGEP1_32( SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); // Check whether we've reached the end. auto Done = Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); Builder.CreateCondBr(Done, DoneBB, BodyBB); DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); // Done. EmitBlock(DoneBB, /*IsFinished=*/true); } /// \brief Emit initialization of arrays of complex types. /// \param DestAddr Address of the array. /// \param Type Type of array. /// \param Init Initial expression of array. static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, const Expr *Init) { // Perform element-by-element initialization. QualType ElementTy; // Drill down to the base element type on both arrays. auto ArrayTy = Type->getAsArrayTypeUnsafe(); auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); DestAddr = CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); auto DestBegin = DestAddr.getPointer(); // Cast from pointer to array type to pointer to single element. auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); // The basic structure here is a while-do loop. auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); auto IsEmpty = CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); // Enter the loop body, making that address the current address. auto EntryBB = CGF.Builder.GetInsertBlock(); CGF.EmitBlock(BodyBB); CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); DestElementPHI->addIncoming(DestBegin, EntryBB); Address DestElementCurrent = Address(DestElementPHI, DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); // Emit copy. { CodeGenFunction::RunCleanupsScope InitScope(CGF); CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), /*IsInitializer=*/false); } // Shift the address forward by one element. auto DestElementNext = CGF.Builder.CreateConstGEP1_32( DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); // Check whether we've reached the end. auto Done = CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); // Done. CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy) { if (OriginalType->isArrayType()) { auto *BO = dyn_cast(Copy); if (BO && BO->getOpcode() == BO_Assign) { // Perform simple memcpy for simple copying. EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); } else { // For arrays with complex element types perform element by element // copying. EmitOMPAggregateAssign( DestAddr, SrcAddr, OriginalType, [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { // Working with the single array element, so have to remap // destination and source variables to corresponding array // elements. CodeGenFunction::OMPPrivateScope Remap(*this); Remap.addPrivate(DestVD, [DestElement]() -> Address { return DestElement; }); Remap.addPrivate( SrcVD, [SrcElement]() -> Address { return SrcElement; }); (void)Remap.Privatize(); EmitIgnoredExpr(Copy); }); } } else { // Remap pseudo source variable to private copy. CodeGenFunction::OMPPrivateScope Remap(*this); Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); (void)Remap.Privatize(); // Emit copying of the whole variable. EmitIgnoredExpr(Copy); } } bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return false; llvm::DenseSet EmittedAsFirstprivate; for (const auto *C : D.getClausesOfKind()) { auto IRef = C->varlist_begin(); auto InitsRef = C->inits().begin(); for (auto IInit : C->private_copies()) { auto *OrigVD = cast(cast(*IRef)->getDecl()); if (EmittedAsFirstprivate.count(OrigVD) == 0) { EmittedAsFirstprivate.insert(OrigVD); auto *VD = cast(cast(IInit)->getDecl()); auto *VDInit = cast(cast(*InitsRef)->getDecl()); bool IsRegistered; DeclRefExpr DRE( const_cast(OrigVD), /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( OrigVD) != nullptr, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); Address OriginalAddr = EmitLValue(&DRE).getAddress(); QualType Type = OrigVD->getType(); if (Type->isArrayType()) { // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current // captured region. IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { auto Emission = EmitAutoVarAlloca(*VD); auto *Init = VD->getInit(); if (!isa(Init) || isTrivialInitializer(Init)) { // Perform simple memcpy. EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, Type); } else { EmitOMPAggregateAssign( Emission.getAllocatedAddress(), OriginalAddr, Type, [this, VDInit, Init](Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the initialization. RunCleanupsScope InitScope(*this); // Emit initialization for single element. setAddrOfLocalVar(VDInit, SrcElement); EmitAnyExprToMem(Init, DestElement, Init->getType().getQualifiers(), /*IsInitializer*/ false); LocalDeclMap.erase(VDInit); }); } EmitAutoVarCleanups(Emission); return Emission.getAllocatedAddress(); }); } else { IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { // Emit private VarDecl with copy init. // Remap temp VDInit variable to the address of the original // variable // (for proper handling of captured global variables). setAddrOfLocalVar(VDInit, OriginalAddr); EmitDecl(*VD); LocalDeclMap.erase(VDInit); return GetAddrOfLocalVar(VD); }); } assert(IsRegistered && "firstprivate var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; } ++IRef, ++InitsRef; } } return !EmittedAsFirstprivate.empty(); } void CodeGenFunction::EmitOMPPrivateClause( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return; llvm::DenseSet EmittedAsPrivate; for (const auto *C : D.getClausesOfKind()) { auto IRef = C->varlist_begin(); for (auto IInit : C->private_copies()) { auto *OrigVD = cast(cast(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { auto VD = cast(cast(IInit)->getDecl()); bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { // Emit private VarDecl with copy init. EmitDecl(*VD); return GetAddrOfLocalVar(VD); }); assert(IsRegistered && "private var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; } ++IRef; } } } bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { if (!HaveInsertPoint()) return false; // threadprivate_var1 = master_threadprivate_var1; // operator=(threadprivate_var2, master_threadprivate_var2); // ... // __kmpc_barrier(&loc, global_tid); llvm::DenseSet CopiedVars; llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; for (const auto *C : D.getClausesOfKind()) { auto IRef = C->varlist_begin(); auto ISrcRef = C->source_exprs().begin(); auto IDestRef = C->destination_exprs().begin(); for (auto *AssignOp : C->assignment_ops()) { auto *VD = cast(cast(*IRef)->getDecl()); QualType Type = VD->getType(); if (CopiedVars.insert(VD->getCanonicalDecl()).second) { // Get the address of the master variable. If we are emitting code with // TLS support, the address is passed from the master as field in the // captured declaration. Address MasterAddr = Address::invalid(); if (getLangOpts().OpenMPUseTLS && getContext().getTargetInfo().isTLSSupported()) { assert(CapturedStmtInfo->lookup(VD) && "Copyin threadprivates should have been captured!"); DeclRefExpr DRE(const_cast(VD), true, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); MasterAddr = EmitLValue(&DRE).getAddress(); LocalDeclMap.erase(VD); } else { MasterAddr = Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) : CGM.GetAddrOfGlobal(VD), getContext().getDeclAlign(VD)); } // Get the address of the threadprivate variable. Address PrivateAddr = EmitLValue(*IRef).getAddress(); if (CopiedVars.size() == 1) { // At first check if current thread is a master thread. If it is, no // need to copy data. CopyBegin = createBasicBlock("copyin.not.master"); CopyEnd = createBasicBlock("copyin.not.master.end"); Builder.CreateCondBr( Builder.CreateICmpNE( Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), CopyBegin, CopyEnd); EmitBlock(CopyBegin); } auto *SrcVD = cast(cast(*ISrcRef)->getDecl()); auto *DestVD = cast(cast(*IDestRef)->getDecl()); EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); } ++IRef; ++ISrcRef; ++IDestRef; } } if (CopyEnd) { // Exit out of copying procedure for non-master thread. EmitBlock(CopyEnd, /*IsFinished=*/true); return true; } return false; } bool CodeGenFunction::EmitOMPLastprivateClauseInit( const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return false; bool HasAtLeastOneLastprivate = false; llvm::DenseSet AlreadyEmittedVars; for (const auto *C : D.getClausesOfKind()) { HasAtLeastOneLastprivate = true; auto IRef = C->varlist_begin(); auto IDestRef = C->destination_exprs().begin(); for (auto *IInit : C->private_copies()) { // Keep the address of the original variable for future update at the end // of the loop. auto *OrigVD = cast(cast(*IRef)->getDecl()); if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { auto *DestVD = cast(cast(*IDestRef)->getDecl()); PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { DeclRefExpr DRE( const_cast(OrigVD), /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( OrigVD) != nullptr, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); return EmitLValue(&DRE).getAddress(); }); // Check if the variable is also a firstprivate: in this case IInit is // not generated. Initialization of this variable will happen in codegen // for 'firstprivate' clause. if (IInit) { auto *VD = cast(cast(IInit)->getDecl()); bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { // Emit private VarDecl with copy init. EmitDecl(*VD); return GetAddrOfLocalVar(VD); }); assert(IsRegistered && "lastprivate var already registered as private"); (void)IsRegistered; } } ++IRef, ++IDestRef; } } return HasAtLeastOneLastprivate; } void CodeGenFunction::EmitOMPLastprivateClauseFinal( const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { if (!HaveInsertPoint()) return; // Emit following code: // if () { // orig_var1 = private_orig_var1; // ... // orig_varn = private_orig_varn; // } llvm::BasicBlock *ThenBB = nullptr; llvm::BasicBlock *DoneBB = nullptr; if (IsLastIterCond) { ThenBB = createBasicBlock(".omp.lastprivate.then"); DoneBB = createBasicBlock(".omp.lastprivate.done"); Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); EmitBlock(ThenBB); } llvm::DenseMap LoopCountersAndUpdates; const Expr *LastIterVal = nullptr; const Expr *IVExpr = nullptr; const Expr *IncExpr = nullptr; if (auto *LoopDirective = dyn_cast(&D)) { if (isOpenMPWorksharingDirective(D.getDirectiveKind())) { LastIterVal = cast(cast( LoopDirective->getUpperBoundVariable()) ->getDecl()) ->getAnyInitializer(); IVExpr = LoopDirective->getIterationVariable(); IncExpr = LoopDirective->getInc(); auto IUpdate = LoopDirective->updates().begin(); for (auto *E : LoopDirective->counters()) { auto *D = cast(E)->getDecl()->getCanonicalDecl(); LoopCountersAndUpdates[D] = *IUpdate; ++IUpdate; } } } { llvm::DenseSet AlreadyEmittedVars; bool FirstLCV = true; for (const auto *C : D.getClausesOfKind()) { auto IRef = C->varlist_begin(); auto ISrcRef = C->source_exprs().begin(); auto IDestRef = C->destination_exprs().begin(); for (auto *AssignOp : C->assignment_ops()) { auto *PrivateVD = cast(cast(*IRef)->getDecl()); QualType Type = PrivateVD->getType(); auto *CanonicalVD = PrivateVD->getCanonicalDecl(); if (AlreadyEmittedVars.insert(CanonicalVD).second) { // If lastprivate variable is a loop control variable for loop-based // directive, update its value before copyin back to original // variable. if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) { if (FirstLCV && LastIterVal) { EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(), IVExpr->getType().getQualifiers(), /*IsInitializer=*/false); EmitIgnoredExpr(IncExpr); FirstLCV = false; } EmitIgnoredExpr(UpExpr); } auto *SrcVD = cast(cast(*ISrcRef)->getDecl()); auto *DestVD = cast(cast(*IDestRef)->getDecl()); // Get the address of the original variable. Address OriginalAddr = GetAddrOfLocalVar(DestVD); // Get the address of the private variable. Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); if (auto RefTy = PrivateVD->getType()->getAs()) PrivateAddr = Address(Builder.CreateLoad(PrivateAddr), getNaturalTypeAlignment(RefTy->getPointeeType())); EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); } ++IRef; ++ISrcRef; ++IDestRef; } } } if (IsLastIterCond) { EmitBlock(DoneBB, /*IsFinished=*/true); } } void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return; for (const auto *C : D.getClausesOfKind()) { auto ILHS = C->lhs_exprs().begin(); auto IRHS = C->rhs_exprs().begin(); auto IPriv = C->privates().begin(); for (auto IRef : C->varlists()) { auto *LHSVD = cast(cast(*ILHS)->getDecl()); auto *RHSVD = cast(cast(*IRHS)->getDecl()); auto *PrivateVD = cast(cast(*IPriv)->getDecl()); if (auto *OASE = dyn_cast(IRef)) { auto *Base = OASE->getBase()->IgnoreParenImpCasts(); while (auto *TempOASE = dyn_cast(Base)) Base = TempOASE->getBase()->IgnoreParenImpCasts(); while (auto *TempASE = dyn_cast(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); auto *DE = cast(Base); auto *OrigVD = cast(DE->getDecl()); auto OASELValueLB = EmitOMPArraySectionExpr(OASE); auto OASELValueUB = EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); auto OriginalBaseLValue = EmitLValue(DE); auto BaseLValue = OriginalBaseLValue; auto *Zero = Builder.getInt64(/*C=*/0); llvm::SmallVector Indexes; Indexes.push_back(Zero); auto *ItemTy = OASELValueLB.getPointer()->getType()->getPointerElementType(); auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); while (Ty != ItemTy) { Indexes.push_back(Zero); Ty = Ty->getPointerElementType(); } BaseLValue = MakeAddrLValue( Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), OASELValueLB.getAlignment()), OASELValueLB.getType(), OASELValueLB.getAlignmentSource()); // Store the address of the original variable associated with the LHS // implicit variable. PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { return OASELValueLB.getAddress(); }); // Emit reduction copy. bool IsRegistered = PrivateScope.addPrivate( OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB, OriginalBaseLValue]() -> Address { // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current // captured region. auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), OASELValueLB.getPointer()); Size = Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); CodeGenFunction::OpaqueValueMapping OpaqueMap( *this, cast( getContext() .getAsVariableArrayType(PrivateVD->getType()) ->getSizeExpr()), RValue::get(Size)); EmitVariablyModifiedType(PrivateVD->getType()); auto Emission = EmitAutoVarAlloca(*PrivateVD); auto Addr = Emission.getAllocatedAddress(); auto *Init = PrivateVD->getInit(); EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); EmitAutoVarCleanups(Emission); // Emit private VarDecl with reduction init. auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), OASELValueLB.getPointer()); auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( Ptr, OriginalBaseLValue.getPointer()->getType()); return Address(Ptr, OriginalBaseLValue.getAlignment()); }); assert(IsRegistered && "private var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { return GetAddrOfLocalVar(PrivateVD); }); } else if (auto *ASE = dyn_cast(IRef)) { auto *Base = ASE->getBase()->IgnoreParenImpCasts(); while (auto *TempASE = dyn_cast(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); auto *DE = cast(Base); auto *OrigVD = cast(DE->getDecl()); auto ASELValue = EmitLValue(ASE); auto OriginalBaseLValue = EmitLValue(DE); auto BaseLValue = OriginalBaseLValue; auto *Zero = Builder.getInt64(/*C=*/0); llvm::SmallVector Indexes; Indexes.push_back(Zero); auto *ItemTy = ASELValue.getPointer()->getType()->getPointerElementType(); auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); while (Ty != ItemTy) { Indexes.push_back(Zero); Ty = Ty->getPointerElementType(); } BaseLValue = MakeAddrLValue( Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), ASELValue.getAlignment()), ASELValue.getType(), ASELValue.getAlignmentSource()); // Store the address of the original variable associated with the LHS // implicit variable. PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { return ASELValue.getAddress(); }); // Emit reduction copy. bool IsRegistered = PrivateScope.addPrivate( OrigVD, [this, PrivateVD, BaseLValue, ASELValue, OriginalBaseLValue]() -> Address { // Emit private VarDecl with reduction init. EmitDecl(*PrivateVD); auto Addr = GetAddrOfLocalVar(PrivateVD); auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), ASELValue.getPointer()); auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( Ptr, OriginalBaseLValue.getPointer()->getType()); return Address(Ptr, OriginalBaseLValue.getAlignment()); }); assert(IsRegistered && "private var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { return GetAddrOfLocalVar(PrivateVD); }); } else { auto *OrigVD = cast(cast(IRef)->getDecl()); // Store the address of the original variable associated with the LHS // implicit variable. PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { DeclRefExpr DRE(const_cast(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, IRef->getType(), VK_LValue, IRef->getExprLoc()); return EmitLValue(&DRE).getAddress(); }); // Emit reduction copy. bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { // Emit private VarDecl with reduction init. EmitDecl(*PrivateVD); return GetAddrOfLocalVar(PrivateVD); }); assert(IsRegistered && "private var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { return GetAddrOfLocalVar(PrivateVD); }); } ++ILHS, ++IRHS, ++IPriv; } } } void CodeGenFunction::EmitOMPReductionClauseFinal( const OMPExecutableDirective &D) { if (!HaveInsertPoint()) return; llvm::SmallVector Privates; llvm::SmallVector LHSExprs; llvm::SmallVector RHSExprs; llvm::SmallVector ReductionOps; bool HasAtLeastOneReduction = false; for (const auto *C : D.getClausesOfKind()) { HasAtLeastOneReduction = true; Privates.append(C->privates().begin(), C->privates().end()); LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); } if (HasAtLeastOneReduction) { // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, D.getSingleClause() || isOpenMPParallelDirective(D.getDirectiveKind()) || D.getDirectiveKind() == OMPD_simd, D.getDirectiveKind() == OMPD_simd); } } static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { auto CS = cast(S.getAssociatedStmt()); llvm::SmallVector CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); if (const auto *NumThreadsClause = S.getSingleClause()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), /*IgnoreResultAssign*/ true); CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( CGF, NumThreads, NumThreadsClause->getLocStart()); } if (const auto *ProcBindClause = S.getSingleClause()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); CGF.CGM.getOpenMPRuntime().emitProcBindClause( CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); } const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind()) { if (C->getNameModifier() == OMPD_unknown || C->getNameModifier() == OMPD_parallel) { IfCond = C->getCondition(); break; } } CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars, IfCond); } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF) { OMPPrivateScope PrivateScope(CGF); bool Copyins = CGF.EmitOMPCopyinClause(S); bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); if (Copyins || Firstprivates) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables or propagation master's thread // values of threadprivate variables to local instances of that variables // of all other implicit threads. CGF.CGM.getOpenMPRuntime().emitBarrierCall( CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); } CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); } void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit) { RunCleanupsScope BodyScope(*this); // Update counters values on current iteration. for (auto I : D.updates()) { EmitIgnoredExpr(I); } // Update the linear variables. for (const auto *C : D.getClausesOfKind()) { for (auto U : C->updates()) { EmitIgnoredExpr(U); } } // On a continue in the body, jump to the end. auto Continue = getJumpDestInCurrentScope("omp.body.continue"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); // Emit loop body. EmitStmt(D.getBody()); // The end (updates/cleanups). EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); // TODO: Update lastprivates if the SeparateIter flag is true. // This will be implemented in a follow-up OMPLastprivateClause patch, but // result should be still correct without it, as we do not make these // variables private yet. } void CodeGenFunction::EmitOMPInnerLoop( const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, const Expr *IncExpr, const llvm::function_ref &BodyGen, const llvm::function_ref &PostIncGen) { auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); // Start the loop with a block that tests the condition. auto CondBlock = createBasicBlock("omp.inner.for.cond"); EmitBlock(CondBlock); LoopStack.push(CondBlock); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. auto ExitBlock = LoopExit.getBlock(); if (RequiresCleanup) ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); auto LoopBody = createBasicBlock("omp.inner.for.body"); // Emit condition. EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); EmitBranchThroughCleanup(LoopExit); } EmitBlock(LoopBody); incrementProfileCounter(&S); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); BodyGen(*this); // Emit "IV = IV + 1" and a back-edge to the condition block. EmitBlock(Continue.getBlock()); EmitIgnoredExpr(IncExpr); PostIncGen(*this); BreakContinueStack.pop_back(); EmitBranch(CondBlock); LoopStack.pop(); // Emit the fall-through block. EmitBlock(LoopExit.getBlock()); } void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { if (!HaveInsertPoint()) return; // Emit inits for the linear variables. for (const auto *C : D.getClausesOfKind()) { for (auto Init : C->inits()) { auto *VD = cast(cast(Init)->getDecl()); auto *OrigVD = cast( cast(VD->getInit()->IgnoreImpCasts())->getDecl()); DeclRefExpr DRE(const_cast(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, VD->getInit()->getType(), VK_LValue, VD->getInit()->getExprLoc()); AutoVarEmission Emission = EmitAutoVarAlloca(*VD); EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), /*capturedByInit=*/false); EmitAutoVarCleanups(Emission); } // Emit the linear steps for the linear clauses. // If a step is not constant, it is pre-calculated before the loop. if (auto CS = cast_or_null(C->getCalcStep())) if (auto SaveRef = cast(CS->getLHS())) { EmitVarDecl(*cast(SaveRef->getDecl())); // Emit calculation of the linear step. EmitIgnoredExpr(CS); } } } static void emitLinearClauseFinal(CodeGenFunction &CGF, const OMPLoopDirective &D) { if (!CGF.HaveInsertPoint()) return; // Emit the final values of the linear variables. for (const auto *C : D.getClausesOfKind()) { auto IC = C->varlist_begin(); for (auto F : C->finals()) { auto *OrigVD = cast(cast(*IC)->getDecl()); DeclRefExpr DRE(const_cast(OrigVD), CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); CodeGenFunction::OMPPrivateScope VarScope(CGF); VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); (void)VarScope.Privatize(); CGF.EmitIgnoredExpr(F); ++IC; } } } static void emitAlignedClause(CodeGenFunction &CGF, const OMPExecutableDirective &D) { if (!CGF.HaveInsertPoint()) return; for (const auto *Clause : D.getClausesOfKind()) { unsigned ClauseAlignment = 0; if (auto AlignmentExpr = Clause->getAlignment()) { auto AlignmentCI = cast(CGF.EmitScalarExpr(AlignmentExpr)); ClauseAlignment = static_cast(AlignmentCI->getZExtValue()); } for (auto E : Clause->varlists()) { unsigned Alignment = ClauseAlignment; if (Alignment == 0) { // OpenMP [2.8.1, Description] // If no optional parameter is specified, implementation-defined default // alignments for SIMD instructions on the target platforms are assumed. Alignment = CGF.getContext() .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( E->getType()->getPointeeType())) .getQuantity(); } assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && "alignment is not power of 2"); if (Alignment != 0) { llvm::Value *PtrValue = CGF.EmitScalarExpr(E); CGF.EmitAlignmentAssumption(PtrValue, Alignment); } } } } static void emitPrivateLoopCounters(CodeGenFunction &CGF, CodeGenFunction::OMPPrivateScope &LoopScope, ArrayRef Counters, ArrayRef PrivateCounters) { if (!CGF.HaveInsertPoint()) return; auto I = PrivateCounters.begin(); for (auto *E : Counters) { auto *VD = cast(cast(E)->getDecl()); auto *PrivateVD = cast(cast(*I)->getDecl()); Address Addr = Address::invalid(); (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { // Emit var without initialization. auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); CGF.EmitAutoVarCleanups(VarEmission); Addr = VarEmission.getAllocatedAddress(); return Addr; }); (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); ++I; } } static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { if (!CGF.HaveInsertPoint()) return; { CodeGenFunction::OMPPrivateScope PreCondScope(CGF); emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), S.private_counters()); (void)PreCondScope.Privatize(); // Get initial values of real counters. for (auto I : S.inits()) { CGF.EmitIgnoredExpr(I); } } // Check that loop is executed at least one time. CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); } static void emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { if (!CGF.HaveInsertPoint()) return; for (const auto *C : D.getClausesOfKind()) { auto CurPrivate = C->privates().begin(); for (auto *E : C->varlists()) { auto *VD = cast(cast(E)->getDecl()); auto *PrivateVD = cast(cast(*CurPrivate)->getDecl()); bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { // Emit private VarDecl with copy init. CGF.EmitVarDecl(*PrivateVD); return CGF.GetAddrOfLocalVar(PrivateVD); }); assert(IsRegistered && "linear var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; ++CurPrivate; } } } static void emitSimdlenSafelenClause(CodeGenFunction &CGF, const OMPExecutableDirective &D, bool IsMonotonic) { if (!CGF.HaveInsertPoint()) return; if (const auto *C = D.getSingleClause()) { RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), /*ignoreResult=*/true); llvm::ConstantInt *Val = cast(Len.getScalarVal()); CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); // In presence of finite 'safelen', it may be unsafe to mark all // the memory instructions parallel, because loop-carried // dependences of 'safelen' iterations are possible. if (!IsMonotonic) CGF.LoopStack.setParallel(!D.getSingleClause()); } else if (const auto *C = D.getSingleClause()) { RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), /*ignoreResult=*/true); llvm::ConstantInt *Val = cast(Len.getScalarVal()); CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); // In presence of finite 'safelen', it may be unsafe to mark all // the memory instructions parallel, because loop-carried // dependences of 'safelen' iterations are possible. CGF.LoopStack.setParallel(false); } } void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic) { // Walk clauses and process safelen/lastprivate. LoopStack.setParallel(!IsMonotonic); LoopStack.setVectorizeEnable(true); emitSimdlenSafelenClause(*this, D, IsMonotonic); } void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { if (!HaveInsertPoint()) return; auto IC = D.counters().begin(); for (auto F : D.finals()) { auto *OrigVD = cast(cast((*IC))->getDecl()); if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { DeclRefExpr DRE(const_cast(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); Address OrigAddr = EmitLValue(&DRE).getAddress(); OMPPrivateScope VarScope(*this); VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); (void)VarScope.Privatize(); EmitIgnoredExpr(F); } ++IC; } emitLinearClauseFinal(*this, D); } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF) { // if (PreCond) { // for (IV in 0..LastIteration) BODY; // ; // } // // Emit: if (PreCond) - begin. // If the condition constant folds and can be elided, avoid emitting the // whole loop. bool CondConstant; llvm::BasicBlock *ContBlock = nullptr; if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { if (!CondConstant) return; } else { auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); ContBlock = CGF.createBasicBlock("simd.if.end"); emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, CGF.getProfileCount(&S)); CGF.EmitBlock(ThenBlock); CGF.incrementProfileCounter(&S); } // Emit the loop iteration variable. const Expr *IVExpr = S.getIterationVariable(); const VarDecl *IVDecl = cast(cast(IVExpr)->getDecl()); CGF.EmitVarDecl(*IVDecl); CGF.EmitIgnoredExpr(S.getInit()); // Emit the iterations count variable. // If it is not a variable, Sema decided to calculate iterations count on // each iteration (e.g., it is foldable into a constant). if (auto LIExpr = dyn_cast(S.getLastIteration())) { CGF.EmitVarDecl(*cast(LIExpr->getDecl())); // Emit calculation of the iterations count. CGF.EmitIgnoredExpr(S.getCalcLastIteration()); } CGF.EmitOMPSimdInit(S); emitAlignedClause(CGF, S); CGF.EmitOMPLinearClauseInit(S); bool HasLastprivateClause; { OMPPrivateScope LoopScope(CGF); emitPrivateLoopCounters(CGF, LoopScope, S.counters(), S.private_counters()); emitPrivateLinearVars(CGF, S, LoopScope); CGF.EmitOMPPrivateClause(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { CGF.EmitOMPLoopBody(S, JumpDest()); CGF.EmitStopPoint(&S); }, [](CodeGenFunction &) {}); // Emit final copy of the lastprivate variables at the end of loops. if (HasLastprivateClause) { CGF.EmitOMPLastprivateClauseFinal(S); } CGF.EmitOMPReductionClauseFinal(S); } CGF.EmitOMPSimdFinal(S); // Emit: if (PreCond) - end. if (ContBlock) { CGF.EmitBranch(ContBlock); CGF.EmitBlock(ContBlock, true); } }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } void CodeGenFunction::EmitOMPForOuterLoop( OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); assert((Ordered || !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && "static non-chunked schedule does not need outer loop"); // Emit outer loop. // // OpenMP [2.7.1, Loop Construct, Description, table 2-1] // When schedule(dynamic,chunk_size) is specified, the iterations are // distributed to threads in the team in chunks as the threads request them. // Each thread executes a chunk of iterations, then requests another chunk, // until no chunks remain to be distributed. Each chunk contains chunk_size // iterations, except for the last chunk to be distributed, which may have // fewer iterations. When no chunk_size is specified, it defaults to 1. // // When schedule(guided,chunk_size) is specified, the iterations are assigned // to threads in the team in chunks as the executing threads request them. // Each thread executes a chunk of iterations, then requests another chunk, // until no chunks remain to be assigned. For a chunk_size of 1, the size of // each chunk is proportional to the number of unassigned iterations divided // by the number of threads in the team, decreasing to 1. For a chunk_size // with value k (greater than 1), the size of each chunk is determined in the // same way, with the restriction that the chunks do not contain fewer than k // iterations (except for the last chunk to be assigned, which may have fewer // than k iterations). // // When schedule(auto) is specified, the decision regarding scheduling is // delegated to the compiler and/or runtime system. The programmer gives the // implementation the freedom to choose any possible mapping of iterations to // threads in the team. // // When schedule(runtime) is specified, the decision regarding scheduling is // deferred until run time, and the schedule and chunk size are taken from the // run-sched-var ICV. If the ICV is set to auto, the schedule is // implementation defined // // while(__kmpc_dispatch_next(&LB, &UB)) { // idx = LB; // while (idx <= UB) { BODY; ++idx; // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. // } // inner loop // } // // OpenMP [2.7.1, Loop Construct, Description, table 2-1] // When schedule(static, chunk_size) is specified, iterations are divided into // chunks of size chunk_size, and the chunks are assigned to the threads in // the team in a round-robin fashion in the order of the thread number. // // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { // while (idx <= UB) { BODY; ++idx; } // inner loop // LB = LB + ST; // UB = UB + ST; // } // const Expr *IVExpr = S.getIterationVariable(); const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); if (DynamicOrOrdered) { llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, UBVal, Chunk); } else { RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); } auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); // Start the loop with a block that tests the condition. auto CondBlock = createBasicBlock("omp.dispatch.cond"); EmitBlock(CondBlock); LoopStack.push(CondBlock); llvm::Value *BoolCondVal = nullptr; if (!DynamicOrOrdered) { // UB = min(UB, GlobalUB) EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB EmitIgnoredExpr(S.getInit()); // IV < UB BoolCondVal = EvaluateExprAsBool(S.getCond()); } else { BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, LB, UB, ST); } // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. auto ExitBlock = LoopExit.getBlock(); if (LoopScope.requiresCleanups()) ExitBlock = createBasicBlock("omp.dispatch.cleanup"); auto LoopBody = createBasicBlock("omp.dispatch.body"); Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); EmitBranchThroughCleanup(LoopExit); } EmitBlock(LoopBody); // Emit "IV = LB" (in case of static schedule, we have already calculated new // LB for loop condition and emitted it above). if (DynamicOrOrdered) EmitIgnoredExpr(S.getInit()); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); // Generate !llvm.loop.parallel metadata for loads and stores for loops // with dynamic/guided scheduling and without ordered clause. if (!isOpenMPSimdDirective(S.getDirectiveKind())) LoopStack.setParallel(!IsMonotonic); else EmitOMPSimdInit(S, IsMonotonic); SourceLocation Loc = S.getLocStart(); EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S, LoopExit](CodeGenFunction &CGF) { CGF.EmitOMPLoopBody(S, LoopExit); CGF.EmitStopPoint(&S); }, [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { if (Ordered) { CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( CGF, Loc, IVSize, IVSigned); } }); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); if (!DynamicOrOrdered) { // Emit "LB = LB + Stride", "UB = UB + Stride". EmitIgnoredExpr(S.getNextLowerBound()); EmitIgnoredExpr(S.getNextUpperBound()); } EmitBranch(CondBlock); LoopStack.pop(); // Emit the fall-through block. EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. if (!DynamicOrOrdered) RT.emitForStaticFinish(*this, S.getLocEnd()); } /// \brief Emit a helper variable and return corresponding lvalue. static LValue EmitOMPHelperVar(CodeGenFunction &CGF, const DeclRefExpr *Helper) { auto VDecl = cast(Helper->getDecl()); CGF.EmitVarDecl(*VDecl); return CGF.EmitLValue(Helper); } namespace { struct ScheduleKindModifiersTy { OpenMPScheduleClauseKind Kind; OpenMPScheduleClauseModifier M1; OpenMPScheduleClauseModifier M2; ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2) : Kind(Kind), M1(M1), M2(M2) {} }; } // namespace static std::pair emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, bool OuterRegion) { // Detect the loop schedule kind and chunk. auto ScheduleKind = OMPC_SCHEDULE_unknown; OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown; llvm::Value *Chunk = nullptr; if (const auto *C = S.getSingleClause()) { ScheduleKind = C->getScheduleKind(); M1 = C->getFirstScheduleModifier(); M2 = C->getSecondScheduleModifier(); if (const auto *Ch = C->getChunkSize()) { if (auto *ImpRef = cast_or_null(C->getHelperChunkSize())) { if (OuterRegion) { const VarDecl *ImpVar = cast(ImpRef->getDecl()); CGF.EmitVarDecl(*ImpVar); CGF.EmitStoreThroughLValue( CGF.EmitAnyExpr(Ch), CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar), ImpVar->getType())); } else { Ch = ImpRef; } } if (!C->getHelperChunkSize() || !OuterRegion) { Chunk = CGF.EmitScalarExpr(Ch); Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(), S.getIterationVariable()->getType(), S.getLocStart()); } } } return std::make_pair(Chunk, ScheduleKindModifiersTy(ScheduleKind, M1, M2)); } bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // Emit the loop iteration variable. auto IVExpr = cast(S.getIterationVariable()); auto IVDecl = cast(IVExpr->getDecl()); EmitVarDecl(*IVDecl); // Emit the iterations count variable. // If it is not a variable, Sema decided to calculate iterations count on each // iteration (e.g., it is foldable into a constant). if (auto LIExpr = dyn_cast(S.getLastIteration())) { EmitVarDecl(*cast(LIExpr->getDecl())); // Emit calculation of the iterations count. EmitIgnoredExpr(S.getCalcLastIteration()); } auto &RT = CGM.getOpenMPRuntime(); bool HasLastprivateClause; // Check pre-condition. { // Skip the entire loop if we don't meet the precondition. // If the condition constant folds and can be elided, avoid emitting the // whole loop. bool CondConstant; llvm::BasicBlock *ContBlock = nullptr; if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { if (!CondConstant) return false; } else { auto *ThenBlock = createBasicBlock("omp.precond.then"); ContBlock = createBasicBlock("omp.precond.end"); emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, getProfileCount(&S)); EmitBlock(ThenBlock); incrementProfileCounter(&S); } emitAlignedClause(*this, S); EmitOMPLinearClauseInit(S); // Emit 'then' code. { // Emit helper vars inits. LValue LB = EmitOMPHelperVar(*this, cast(S.getLowerBoundVariable())); LValue UB = EmitOMPHelperVar(*this, cast(S.getUpperBoundVariable())); LValue ST = EmitOMPHelperVar(*this, cast(S.getStrideVariable())); LValue IL = EmitOMPHelperVar(*this, cast(S.getIsLastIterVariable())); OMPPrivateScope LoopScope(*this); if (EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables. CGM.getOpenMPRuntime().emitBarrierCall( *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); } EmitOMPPrivateClause(S, LoopScope); HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPReductionClauseInit(S, LoopScope); emitPrivateLoopCounters(*this, LoopScope, S.counters(), S.private_counters()); emitPrivateLinearVars(*this, S, LoopScope); (void)LoopScope.Privatize(); // Detect the loop schedule kind and chunk. llvm::Value *Chunk; OpenMPScheduleClauseKind ScheduleKind; auto ScheduleInfo = emitScheduleClause(*this, S, /*OuterRegion=*/false); Chunk = ScheduleInfo.first; ScheduleKind = ScheduleInfo.second.Kind; const OpenMPScheduleClauseModifier M1 = ScheduleInfo.second.M1; const OpenMPScheduleClauseModifier M2 = ScheduleInfo.second.M2; const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); const bool Ordered = S.getSingleClause() != nullptr; // OpenMP 4.5, 2.7.1 Loop Construct, Description. // If the static schedule kind is specified or if the ordered clause is // specified, and if no monotonic modifier is specified, the effect will // be as if the monotonic modifier was specified. if (RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && !Ordered) { if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, /*IsMonotonic=*/true); // OpenMP [2.7.1, Loop Construct, Description, table 2-1] // When no chunk_size is specified, the iteration space is divided into // chunks that are approximately equal in size, and at most one chunk is // distributed to each thread. Note that the size of the chunks is // unspecified in this case. RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), UB.getAddress(), ST.getAddress()); auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; EmitIgnoredExpr(S.getInit()); // while (idx <= UB) { BODY; ++idx; } EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S, LoopExit](CodeGenFunction &CGF) { CGF.EmitOMPLoopBody(S, LoopExit); CGF.EmitStopPoint(&S); }, [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. RT.emitForStaticFinish(*this, S.getLocStart()); } else { const bool IsMonotonic = Ordered || ScheduleKind == OMPC_SCHEDULE_static || ScheduleKind == OMPC_SCHEDULE_unknown || M1 == OMPC_SCHEDULE_MODIFIER_monotonic || M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), Chunk); } EmitOMPReductionClauseFinal(S); // Emit final copy of the lastprivate variables if IsLastIter != 0. if (HasLastprivateClause) EmitOMPLastprivateClauseFinal( S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); } if (isOpenMPSimdDirective(S.getDirectiveKind())) { EmitOMPSimdFinal(S); } // We're now done with the loop, so jump to the continuation block. if (ContBlock) { EmitBranch(ContBlock); EmitBlock(ContBlock, true); } } return HasLastprivateClause; } void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { HasLastprivates = CGF.EmitOMPWorksharingLoop(S); }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, S.hasCancel()); // Emit an implicit barrier at the end. if (!S.getSingleClause() || HasLastprivates) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); } } void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { HasLastprivates = CGF.EmitOMPWorksharingLoop(S); }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); // Emit an implicit barrier at the end. if (!S.getSingleClause() || HasLastprivates) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); } } static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, const Twine &Name, llvm::Value *Init = nullptr) { auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); if (Init) CGF.EmitScalarInit(Init, LVal); return LVal; } OpenMPDirectiveKind CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { auto *Stmt = cast(S.getAssociatedStmt())->getCapturedStmt(); auto *CS = dyn_cast(Stmt); if (CS && CS->size() > 1) { bool HasLastprivates = false; auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) { auto &C = CGF.CGM.getContext(); auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Emit helper vars inits. LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", CGF.Builder.getInt32(0)); auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1); LValue UB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", CGF.Builder.getInt32(1)); LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", CGF.Builder.getInt32(0)); // Loop counter. LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); // Generate condition for loop. BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, S.getLocStart(), /*fpContractable=*/false); // Increment for loop counter. UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, S.getLocStart()); auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) { // Iterate through all sections and emit a switch construct: // switch (IV) { // case 0: // ; // break; // ... // case - 1: // - 1]>; // break; // } // .omp.sections.exit: auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); auto *SwitchStmt = CGF.Builder.CreateSwitch( CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, CS->size()); unsigned CaseNumber = 0; for (auto *SubStmt : CS->children()) { auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); CGF.EmitBlock(CaseBB); SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); CGF.EmitStmt(SubStmt); CGF.EmitBranch(ExitBB); ++CaseNumber; } CGF.EmitBlock(ExitBB, /*IsFinished=*/true); }; CodeGenFunction::OMPPrivateScope LoopScope(CGF); if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables. CGF.CGM.getOpenMPRuntime().emitBarrierCall( CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); } CGF.EmitOMPPrivateClause(S, LoopScope); HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); (void)LoopScope.Privatize(); // Emit static non-chunked loop. CGF.CGM.getOpenMPRuntime().emitForStaticInit( CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), UB.getAddress(), ST.getAddress()); // UB = min(UB, GlobalUB); auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); auto *MinUBGlobalUB = CGF.Builder.CreateSelect( CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); // IV = LB; CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); // while (idx <= UB) { BODY; ++idx; } CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, [](CodeGenFunction &) {}); // Tell the runtime we are done. CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); CGF.EmitOMPReductionClauseFinal(S); // Emit final copy of the lastprivate variables if IsLastIter != 0. if (HasLastprivates) CGF.EmitOMPLastprivateClauseFinal( S, CGF.Builder.CreateIsNotNull( CGF.EmitLoadOfScalar(IL, S.getLocStart()))); }; bool HasCancel = false; if (auto *OSD = dyn_cast(&S)) HasCancel = OSD->hasCancel(); else if (auto *OPSD = dyn_cast(&S)) HasCancel = OPSD->hasCancel(); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, HasCancel); // Emit barrier for lastprivates only if 'sections' directive has 'nowait' // clause. Otherwise the barrier will be generated by the codegen for the // directive. if (HasLastprivates && S.getSingleClause()) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown); } return OMPD_sections; } // If only one section is found - no need to generate loop, emit as a single // region. bool HasFirstprivates; // No need to generate reductions for sections with single section region, we // can use original shared variables for all operations. bool HasReductions = S.hasClausesOfKind(); // No need to generate lastprivates for sections with single section region, // we can use original shared variable for all calculations with barrier at // the end of the sections. bool HasLastprivates = S.hasClausesOfKind(); auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) { CodeGenFunction::OMPPrivateScope SingleScope(CGF); HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); CGF.EmitOMPPrivateClause(S, SingleScope); (void)SingleScope.Privatize(); CGF.EmitStmt(Stmt); }; CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None); // Emit barrier for firstprivates, lastprivates or reductions only if // 'sections' directive has 'nowait' clause. Otherwise the barrier will be // generated by the codegen for the directive. if ((HasFirstprivates || HasLastprivates || HasReductions) && S.getSingleClause()) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); } return OMPD_single; } void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); OpenMPDirectiveKind EmittedAs = EmitSections(S); // Emit an implicit barrier at the end. if (!S.getSingleClause()) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); } } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, S.hasCancel()); } void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { llvm::SmallVector CopyprivateVars; llvm::SmallVector DestExprs; llvm::SmallVector SrcExprs; llvm::SmallVector AssignmentOps; // Check if there are any 'copyprivate' clauses associated with this // 'single' // construct. // Build a list of copyprivate variables along with helper expressions // (, , = expressions) for (const auto *C : S.getClausesOfKind()) { CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); DestExprs.append(C->destination_exprs().begin(), C->destination_exprs().end()); SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); AssignmentOps.append(C->assignment_ops().begin(), C->assignment_ops().end()); } LexicalScope Scope(*this, S.getSourceRange()); // Emit code for 'single' region along with 'copyprivate' clauses bool HasFirstprivates; auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) { CodeGenFunction::OMPPrivateScope SingleScope(CGF); HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); CGF.EmitOMPPrivateClause(S, SingleScope); (void)SingleScope.Privatize(); CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); }; CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), CopyprivateVars, DestExprs, SrcExprs, AssignmentOps); // Emit an implicit barrier at the end (to avoid data race on firstprivate // init or if no 'nowait' clause was specified and no 'copyprivate' clause). if ((!S.getSingleClause() || HasFirstprivates) && CopyprivateVars.empty()) { CGM.getOpenMPRuntime().emitBarrierCall( *this, S.getLocStart(), S.getSingleClause() ? OMPD_unknown : OMPD_single); } } void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); }; CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); }; Expr *Hint = nullptr; if (auto *HintClause = S.getSingleClause()) Hint = HintClause->getHint(); CGM.getOpenMPRuntime().emitCriticalRegion(*this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart(), Hint); } void CodeGenFunction::EmitOMPParallelForDirective( const OMPParallelForDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. LexicalScope Scope(*this, S.getSourceRange()); (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); } void CodeGenFunction::EmitOMPParallelForSimdDirective( const OMPParallelForSimdDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. LexicalScope Scope(*this, S.getSourceRange()); (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); } void CodeGenFunction::EmitOMPParallelSectionsDirective( const OMPParallelSectionsDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'sections' directive. LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { (void)CGF.EmitSections(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); } void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Emit outlined function for task construct. LexicalScope Scope(*this, S.getSourceRange()); auto CS = cast(S.getAssociatedStmt()); auto CapturedStruct = GenerateCapturedStmtArgument(*CS); auto *I = CS->getCapturedDecl()->param_begin(); auto *PartId = std::next(I); // The first function argument for tasks is a thread id, the second one is a // part id (0 for tied tasks, >=0 for untied task). llvm::DenseSet EmittedAsPrivate; // Get list of private variables. llvm::SmallVector PrivateVars; llvm::SmallVector PrivateCopies; for (const auto *C : S.getClausesOfKind()) { auto IRef = C->varlist_begin(); for (auto *IInit : C->private_copies()) { auto *OrigVD = cast(cast(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { PrivateVars.push_back(*IRef); PrivateCopies.push_back(IInit); } ++IRef; } } EmittedAsPrivate.clear(); // Get list of firstprivate variables. llvm::SmallVector FirstprivateVars; llvm::SmallVector FirstprivateCopies; llvm::SmallVector FirstprivateInits; for (const auto *C : S.getClausesOfKind()) { auto IRef = C->varlist_begin(); auto IElemInitRef = C->inits().begin(); for (auto *IInit : C->private_copies()) { auto *OrigVD = cast(cast(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { FirstprivateVars.push_back(*IRef); FirstprivateCopies.push_back(IInit); FirstprivateInits.push_back(*IElemInitRef); } ++IRef, ++IElemInitRef; } } // Build list of dependences. llvm::SmallVector, 8> Dependences; for (const auto *C : S.getClausesOfKind()) { for (auto *IRef : C->varlists()) { Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); } } auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( CodeGenFunction &CGF) { // Set proper addresses for generated private copies. auto *CS = cast(S.getAssociatedStmt()); OMPPrivateScope Scope(CGF); if (!PrivateVars.empty() || !FirstprivateVars.empty()) { auto *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); auto *PrivatesPtr = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); // Map privates. llvm::SmallVector, 16> PrivatePtrs; llvm::SmallVector CallArgs; CallArgs.push_back(PrivatesPtr); for (auto *E : PrivateVars) { auto *VD = cast(cast(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } for (auto *E : FirstprivateVars) { auto *VD = cast(cast(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } CGF.EmitRuntimeCall(CopyFn, CallArgs); for (auto &&Pair : PrivatePtrs) { Address Replacement(CGF.Builder.CreateLoad(Pair.second), CGF.getContext().getDeclAlign(Pair.first)); Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } } (void)Scope.Privatize(); if (*PartId) { // TODO: emit code for untied tasks. } CGF.EmitStmt(CS->getCapturedStmt()); }; auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, OMPD_task, CodeGen); // Check if we should emit tied or untied task. bool Tied = !S.getSingleClause(); // Check if the task is final llvm::PointerIntPair Final; if (const auto *Clause = S.getSingleClause()) { // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. auto *Cond = Clause->getCondition(); bool CondConstant; if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) Final.setInt(CondConstant); else Final.setPointer(EvaluateExprAsBool(Cond)); } else { // By default the task is not final. Final.setInt(/*IntVal=*/false); } auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind()) { if (C->getNameModifier() == OMPD_unknown || C->getNameModifier() == OMPD_task) { IfCond = C->getCondition(); break; } } CGM.getOpenMPRuntime().emitTaskCall( *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, FirstprivateCopies, FirstprivateInits, Dependences); } void CodeGenFunction::EmitOMPTaskyieldDirective( const OMPTaskyieldDirective &S) { CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); } void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); } void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); } void CodeGenFunction::EmitOMPTaskgroupDirective( const OMPTaskgroupDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); }; CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); } void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef { if (const auto *FlushClause = S.getSingleClause()) { return llvm::makeArrayRef(FlushClause->varlist_begin(), FlushClause->varlist_end()); } return llvm::None; }(), S.getLocStart()); } void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { llvm_unreachable("CodeGen for 'omp distribute' is not supported yet."); } static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, const CapturedStmt *S) { CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; CGF.CapturedStmtInfo = &CapStmtInfo; auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); Fn->addFnAttr(llvm::Attribute::NoInline); return Fn; } void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { if (!S.getAssociatedStmt()) return; LexicalScope Scope(*this, S.getSourceRange()); auto *C = S.getSingleClause(); auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { if (C) { auto CS = cast(S.getAssociatedStmt()); llvm::SmallVector CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); } else { CGF.EmitStmt( cast(S.getAssociatedStmt())->getCapturedStmt()); } }; CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); } static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, QualType DestType, SourceLocation Loc) { assert(CGF.hasScalarEvaluationKind(DestType) && "DestType must have scalar evaluation kind."); assert(!Val.isAggregate() && "Must be a scalar or complex."); return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, Loc) : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, DestType, Loc); } static CodeGenFunction::ComplexPairTy convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, QualType DestType, SourceLocation Loc) { assert(CGF.getEvaluationKind(DestType) == TEK_Complex && "DestType must have complex evaluation kind."); CodeGenFunction::ComplexPairTy ComplexVal; if (Val.isScalar()) { // Convert the input element to the element type of the complex. auto DestElementType = DestType->castAs()->getElementType(); auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType, Loc); ComplexVal = CodeGenFunction::ComplexPairTy( ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); } else { assert(Val.isComplex() && "Must be a scalar or complex."); auto SrcElementType = SrcType->castAs()->getElementType(); auto DestElementType = DestType->castAs()->getElementType(); ComplexVal.first = CGF.EmitScalarConversion( Val.getComplexVal().first, SrcElementType, DestElementType, Loc); ComplexVal.second = CGF.EmitScalarConversion( Val.getComplexVal().second, SrcElementType, DestElementType, Loc); } return ComplexVal; } static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, LValue LVal, RValue RVal) { if (LVal.isGlobalReg()) { CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); } else { CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic, LVal.isVolatile(), /*IsInit=*/false); } } -static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal, - QualType RValTy, SourceLocation Loc) { - switch (CGF.getEvaluationKind(LVal.getType())) { +void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, + QualType RValTy, SourceLocation Loc) { + switch (getEvaluationKind(LVal.getType())) { case TEK_Scalar: - CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue( - CGF, RVal, RValTy, LVal.getType(), Loc)), - LVal); + EmitStoreThroughLValue(RValue::get(convertToScalarValue( + *this, RVal, RValTy, LVal.getType(), Loc)), + LVal); break; case TEK_Complex: - CGF.EmitStoreOfComplex( - convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal, + EmitStoreOfComplex( + convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, /*isInit=*/false); break; case TEK_Aggregate: llvm_unreachable("Must be a scalar or complex."); } } static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, const Expr *X, const Expr *V, SourceLocation Loc) { // v = x; assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); LValue XLValue = CGF.EmitLValue(X); LValue VLValue = CGF.EmitLValue(V); RValue Res = XLValue.isGlobalReg() ? CGF.EmitLoadOfLValue(XLValue, Loc) : CGF.EmitAtomicLoad(XLValue, Loc, IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic, XLValue.isVolatile()); // OpenMP, 2.12.6, atomic Construct // Any atomic construct with a seq_cst clause forces the atomically // performed operation to include an implicit flush operation without a // list. if (IsSeqCst) CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); - emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc); + CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); } static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, const Expr *X, const Expr *E, SourceLocation Loc) { // x = expr; assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); // OpenMP, 2.12.6, atomic Construct // Any atomic construct with a seq_cst clause forces the atomically // performed operation to include an implicit flush operation without a // list. if (IsSeqCst) CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); } static std::pair emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update, BinaryOperatorKind BO, llvm::AtomicOrdering AO, bool IsXLHSInRHSPart) { auto &Context = CGF.CGM.getContext(); // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' // expression is simple and atomic is allowed for the given type for the // target platform. if (BO == BO_Comma || !Update.isScalar() || !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || (!isa(Update.getScalarVal()) && (Update.getScalarVal()->getType() != X.getAddress().getElementType())) || !X.getAddress().getElementType()->isIntegerTy() || !Context.getTargetInfo().hasBuiltinAtomic( Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) return std::make_pair(false, RValue::get(nullptr)); llvm::AtomicRMWInst::BinOp RMWOp; switch (BO) { case BO_Add: RMWOp = llvm::AtomicRMWInst::Add; break; case BO_Sub: if (!IsXLHSInRHSPart) return std::make_pair(false, RValue::get(nullptr)); RMWOp = llvm::AtomicRMWInst::Sub; break; case BO_And: RMWOp = llvm::AtomicRMWInst::And; break; case BO_Or: RMWOp = llvm::AtomicRMWInst::Or; break; case BO_Xor: RMWOp = llvm::AtomicRMWInst::Xor; break; case BO_LT: RMWOp = X.getType()->hasSignedIntegerRepresentation() ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min : llvm::AtomicRMWInst::Max) : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin : llvm::AtomicRMWInst::UMax); break; case BO_GT: RMWOp = X.getType()->hasSignedIntegerRepresentation() ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max : llvm::AtomicRMWInst::Min) : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax : llvm::AtomicRMWInst::UMin); break; case BO_Assign: RMWOp = llvm::AtomicRMWInst::Xchg; break; case BO_Mul: case BO_Div: case BO_Rem: case BO_Shl: case BO_Shr: case BO_LAnd: case BO_LOr: return std::make_pair(false, RValue::get(nullptr)); case BO_PtrMemD: case BO_PtrMemI: case BO_LE: case BO_GE: case BO_EQ: case BO_NE: case BO_AddAssign: case BO_SubAssign: case BO_AndAssign: case BO_OrAssign: case BO_XorAssign: case BO_MulAssign: case BO_DivAssign: case BO_RemAssign: case BO_ShlAssign: case BO_ShrAssign: case BO_Comma: llvm_unreachable("Unsupported atomic update operation"); } auto *UpdateVal = Update.getScalarVal(); if (auto *IC = dyn_cast(UpdateVal)) { UpdateVal = CGF.Builder.CreateIntCast( IC, X.getAddress().getElementType(), X.getType()->hasSignedIntegerRepresentation()); } auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); return std::make_pair(true, RValue::get(Res)); } std::pair CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, llvm::AtomicOrdering AO, SourceLocation Loc, const llvm::function_ref &CommonGen) { // Update expressions are allowed to have the following forms: // x binop= expr; -> xrval + expr; // x++, ++x -> xrval + 1; // x--, --x -> xrval - 1; // x = x binop expr; -> xrval binop expr // x = expr Op x; - > expr binop xrval; auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); if (!Res.first) { if (X.isGlobalReg()) { // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop // 'xrval'. EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); } else { // Perform compare-and-swap procedure. EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); } } return Res; } static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, const Expr *X, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, SourceLocation Loc) { assert(isa(UE->IgnoreImpCasts()) && "Update expr in 'atomic update' must be a binary operator."); auto *BOUE = cast(UE->IgnoreImpCasts()); // Update expressions are allowed to have the following forms: // x binop= expr; -> xrval + expr; // x++, ++x -> xrval + 1; // x--, --x -> xrval - 1; // x = x binop expr; -> xrval binop expr // x = expr Op x; - > expr binop xrval; assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; auto *LHS = cast(BOUE->getLHS()->IgnoreImpCasts()); auto *RHS = cast(BOUE->getRHS()->IgnoreImpCasts()); auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; auto Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); return CGF.EmitAnyExpr(UE); }; (void)CGF.EmitOMPAtomicSimpleUpdateExpr( XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); // OpenMP, 2.12.6, atomic Construct // Any atomic construct with a seq_cst clause forces the atomically // performed operation to include an implicit flush operation without a // list. if (IsSeqCst) CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); } static RValue convertToType(CodeGenFunction &CGF, RValue Value, QualType SourceType, QualType ResType, SourceLocation Loc) { switch (CGF.getEvaluationKind(ResType)) { case TEK_Scalar: return RValue::get( convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); case TEK_Complex: { auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); return RValue::getComplex(Res.first, Res.second); } case TEK_Aggregate: break; } llvm_unreachable("Must be a scalar or complex."); } static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, bool IsPostfixUpdate, const Expr *V, const Expr *X, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, SourceLocation Loc) { assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); RValue NewVVal; LValue VLValue = CGF.EmitLValue(V); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; QualType NewVValType; if (UE) { // 'x' is updated with some additional value. assert(isa(UE->IgnoreImpCasts()) && "Update expr in 'atomic capture' must be a binary operator."); auto *BOUE = cast(UE->IgnoreImpCasts()); // Update expressions are allowed to have the following forms: // x binop= expr; -> xrval + expr; // x++, ++x -> xrval + 1; // x--, --x -> xrval - 1; // x = x binop expr; -> xrval binop expr // x = expr Op x; - > expr binop xrval; auto *LHS = cast(BOUE->getLHS()->IgnoreImpCasts()); auto *RHS = cast(BOUE->getRHS()->IgnoreImpCasts()); auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; NewVValType = XRValExpr->getType(); auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); RValue Res = CGF.EmitAnyExpr(UE); NewVVal = IsPostfixUpdate ? XRValue : Res; return Res; }; auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); if (Res.first) { // 'atomicrmw' instruction was generated. if (IsPostfixUpdate) { // Use old value from 'atomicrmw'. NewVVal = Res.second; } else { // 'atomicrmw' does not provide new value, so evaluate it using old // value of 'x'. CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); NewVVal = CGF.EmitAnyExpr(UE); } } } else { // 'x' is simply rewritten with some 'expr'. NewVValType = X->getType().getNonReferenceType(); ExprRValue = convertToType(CGF, ExprRValue, E->getType(), X->getType().getNonReferenceType(), Loc); auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { NewVVal = XRValue; return ExprRValue; }; // Try to perform atomicrmw xchg, otherwise simple exchange. auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, Loc, Gen); if (Res.first) { // 'atomicrmw' instruction was generated. NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; } } // Emit post-update store to 'v' of old/new 'x' value. - emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc); + CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); // OpenMP, 2.12.6, atomic Construct // Any atomic construct with a seq_cst clause forces the atomically // performed operation to include an implicit flush operation without a // list. if (IsSeqCst) CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); } static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, bool IsSeqCst, bool IsPostfixUpdate, const Expr *X, const Expr *V, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, SourceLocation Loc) { switch (Kind) { case OMPC_read: EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); break; case OMPC_write: EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); break; case OMPC_unknown: case OMPC_update: EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); break; case OMPC_capture: EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, IsXLHSInRHSPart, Loc); break; case OMPC_if: case OMPC_final: case OMPC_num_threads: case OMPC_private: case OMPC_firstprivate: case OMPC_lastprivate: case OMPC_reduction: case OMPC_safelen: case OMPC_simdlen: case OMPC_collapse: case OMPC_default: case OMPC_seq_cst: case OMPC_shared: case OMPC_linear: case OMPC_aligned: case OMPC_copyin: case OMPC_copyprivate: case OMPC_flush: case OMPC_proc_bind: case OMPC_schedule: case OMPC_ordered: case OMPC_nowait: case OMPC_untied: case OMPC_threadprivate: case OMPC_depend: case OMPC_mergeable: case OMPC_device: case OMPC_threads: case OMPC_simd: case OMPC_map: case OMPC_num_teams: case OMPC_thread_limit: case OMPC_priority: case OMPC_grainsize: case OMPC_nogroup: case OMPC_num_tasks: case OMPC_hint: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { bool IsSeqCst = S.getSingleClause(); OpenMPClauseKind Kind = OMPC_unknown; for (auto *C : S.clauses()) { // Find first clause (skip seq_cst clause, if it is first). if (C->getClauseKind() != OMPC_seq_cst) { Kind = C->getClauseKind(); break; } } const auto *CS = S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); if (const auto *EWC = dyn_cast(CS)) { enterFullExpression(EWC); } // Processing for statements under 'atomic capture'. if (const auto *Compound = dyn_cast(CS)) { for (const auto *C : Compound->body()) { if (const auto *EWC = dyn_cast(C)) { enterFullExpression(EWC); } } } LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) { CGF.EmitStopPoint(CS); EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), S.getV(), S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); } void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); const CapturedStmt &CS = *cast(S.getAssociatedStmt()); llvm::SmallVector CapturedVars; GenerateOpenMPCapturedVars(CS, CapturedVars); llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; if (auto *C = S.getSingleClause()) { IfCond = C->getCondition(); } // Check if we have any device clause associated with the directive. const Expr *Device = nullptr; if (auto *C = S.getSingleClause()) { Device = C->getDevice(); } // Check if we have an if clause whose conditional always evaluates to false // or if we do not have any targets specified. If so the target region is not // an offload entry point. bool IsOffloadEntry = true; if (IfCond) { bool Val; if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) IsOffloadEntry = false; } if (CGM.getLangOpts().OMPTargetTriples.empty()) IsOffloadEntry = false; assert(CurFuncDecl && "No parent declaration for target region!"); StringRef ParentName; // In case we have Ctors/Dtors we use the complete type variant to produce // the mangling of the device outlined kernel. if (auto *D = dyn_cast(CurFuncDecl)) ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); else if (auto *D = dyn_cast(CurFuncDecl)) ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); else ParentName = CGM.getMangledName(GlobalDecl(cast(CurFuncDecl))); CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, IsOffloadEntry); CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, CapturedVars); } void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); } void CodeGenFunction::EmitOMPCancellationPointDirective( const OMPCancellationPointDirective &S) { CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), S.getCancelRegion()); } void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind()) { if (C->getNameModifier() == OMPD_unknown || C->getNameModifier() == OMPD_cancel) { IfCond = C->getCondition(); break; } } CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, S.getCancelRegion()); } CodeGenFunction::JumpDest CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { if (Kind == OMPD_parallel || Kind == OMPD_task) return ReturnBlock; assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); return BreakContinueStack.back().BreakBlock; } // Generate the instructions for '#pragma omp target data' directive. void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { // emit the code inside the construct for now auto CS = cast(S.getAssociatedStmt()); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_target_data, [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); } void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { // emit the code inside the construct for now auto CS = cast(S.getAssociatedStmt()); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_taskloop, [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); } void CodeGenFunction::EmitOMPTaskLoopSimdDirective( const OMPTaskLoopSimdDirective &S) { // emit the code inside the construct for now auto CS = cast(S.getAssociatedStmt()); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_taskloop_simd, [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); } Index: vendor/clang/dist/lib/CodeGen/CodeGenFunction.h =================================================================== --- vendor/clang/dist/lib/CodeGen/CodeGenFunction.h (revision 294603) +++ vendor/clang/dist/lib/CodeGen/CodeGenFunction.h (revision 294604) @@ -1,3312 +1,3314 @@ //===-- CodeGenFunction.h - Per-Function state for LLVM CodeGen -*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This is the internal per-function state used for llvm translation. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_LIB_CODEGEN_CODEGENFUNCTION_H #define LLVM_CLANG_LIB_CODEGEN_CODEGENFUNCTION_H #include "CGBuilder.h" #include "CGDebugInfo.h" #include "CGLoopInfo.h" #include "CGValue.h" #include "CodeGenModule.h" #include "CodeGenPGO.h" #include "EHScopeStack.h" #include "clang/AST/CharUnits.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/Type.h" #include "clang/Basic/ABI.h" #include "clang/Basic/CapturedStmt.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/TargetInfo.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" namespace llvm { class BasicBlock; class LLVMContext; class MDNode; class Module; class SwitchInst; class Twine; class Value; class CallSite; } namespace clang { class ASTContext; class BlockDecl; class CXXDestructorDecl; class CXXForRangeStmt; class CXXTryStmt; class Decl; class LabelDecl; class EnumConstantDecl; class FunctionDecl; class FunctionProtoType; class LabelStmt; class ObjCContainerDecl; class ObjCInterfaceDecl; class ObjCIvarDecl; class ObjCMethodDecl; class ObjCImplementationDecl; class ObjCPropertyImplDecl; class TargetInfo; class TargetCodeGenInfo; class VarDecl; class ObjCForCollectionStmt; class ObjCAtTryStmt; class ObjCAtThrowStmt; class ObjCAtSynchronizedStmt; class ObjCAutoreleasePoolStmt; namespace CodeGen { class CodeGenTypes; class CGFunctionInfo; class CGRecordLayout; class CGBlockInfo; class CGCXXABI; class BlockByrefHelpers; class BlockByrefInfo; class BlockFlags; class BlockFieldFlags; /// The kind of evaluation to perform on values of a particular /// type. Basically, is the code in CGExprScalar, CGExprComplex, or /// CGExprAgg? /// /// TODO: should vectors maybe be split out into their own thing? enum TypeEvaluationKind { TEK_Scalar, TEK_Complex, TEK_Aggregate }; /// CodeGenFunction - This class organizes the per-function state that is used /// while generating LLVM code. class CodeGenFunction : public CodeGenTypeCache { CodeGenFunction(const CodeGenFunction &) = delete; void operator=(const CodeGenFunction &) = delete; friend class CGCXXABI; public: /// A jump destination is an abstract label, branching to which may /// require a jump out through normal cleanups. struct JumpDest { JumpDest() : Block(nullptr), ScopeDepth(), Index(0) {} JumpDest(llvm::BasicBlock *Block, EHScopeStack::stable_iterator Depth, unsigned Index) : Block(Block), ScopeDepth(Depth), Index(Index) {} bool isValid() const { return Block != nullptr; } llvm::BasicBlock *getBlock() const { return Block; } EHScopeStack::stable_iterator getScopeDepth() const { return ScopeDepth; } unsigned getDestIndex() const { return Index; } // This should be used cautiously. void setScopeDepth(EHScopeStack::stable_iterator depth) { ScopeDepth = depth; } private: llvm::BasicBlock *Block; EHScopeStack::stable_iterator ScopeDepth; unsigned Index; }; CodeGenModule &CGM; // Per-module state. const TargetInfo &Target; typedef std::pair ComplexPairTy; LoopInfoStack LoopStack; CGBuilderTy Builder; /// \brief CGBuilder insert helper. This function is called after an /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, llvm::BasicBlock::iterator InsertPt) const; /// CurFuncDecl - Holds the Decl for the current outermost /// non-closure context. const Decl *CurFuncDecl; /// CurCodeDecl - This is the inner-most code context, which includes blocks. const Decl *CurCodeDecl; const CGFunctionInfo *CurFnInfo; QualType FnRetTy; llvm::Function *CurFn; /// CurGD - The GlobalDecl for the current function being compiled. GlobalDecl CurGD; /// PrologueCleanupDepth - The cleanup depth enclosing all the /// cleanups associated with the parameters. EHScopeStack::stable_iterator PrologueCleanupDepth; /// ReturnBlock - Unified return block. JumpDest ReturnBlock; /// ReturnValue - The temporary alloca to hold the return /// value. This is invalid iff the function has no return value. Address ReturnValue; /// AllocaInsertPoint - This is an instruction in the entry block before which /// we prefer to insert allocas. llvm::AssertingVH AllocaInsertPt; /// \brief API for captured statement code generation. class CGCapturedStmtInfo { public: explicit CGCapturedStmtInfo(CapturedRegionKind K = CR_Default) : Kind(K), ThisValue(nullptr), CXXThisFieldDecl(nullptr) {} explicit CGCapturedStmtInfo(const CapturedStmt &S, CapturedRegionKind K = CR_Default) : Kind(K), ThisValue(nullptr), CXXThisFieldDecl(nullptr) { RecordDecl::field_iterator Field = S.getCapturedRecordDecl()->field_begin(); for (CapturedStmt::const_capture_iterator I = S.capture_begin(), E = S.capture_end(); I != E; ++I, ++Field) { if (I->capturesThis()) CXXThisFieldDecl = *Field; else if (I->capturesVariable()) CaptureFields[I->getCapturedVar()] = *Field; } } virtual ~CGCapturedStmtInfo(); CapturedRegionKind getKind() const { return Kind; } virtual void setContextValue(llvm::Value *V) { ThisValue = V; } // \brief Retrieve the value of the context parameter. virtual llvm::Value *getContextValue() const { return ThisValue; } /// \brief Lookup the captured field decl for a variable. virtual const FieldDecl *lookup(const VarDecl *VD) const { return CaptureFields.lookup(VD); } bool isCXXThisExprCaptured() const { return getThisFieldDecl() != nullptr; } virtual FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; } static bool classof(const CGCapturedStmtInfo *) { return true; } /// \brief Emit the captured statement body. virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) { CGF.incrementProfileCounter(S); CGF.EmitStmt(S); } /// \brief Get the name of the capture helper. virtual StringRef getHelperName() const { return "__captured_stmt"; } private: /// \brief The kind of captured statement being generated. CapturedRegionKind Kind; /// \brief Keep the map between VarDecl and FieldDecl. llvm::SmallDenseMap CaptureFields; /// \brief The base address of the captured record, passed in as the first /// argument of the parallel region function. llvm::Value *ThisValue; /// \brief Captured 'this' type. FieldDecl *CXXThisFieldDecl; }; CGCapturedStmtInfo *CapturedStmtInfo; /// \brief RAII for correct setting/restoring of CapturedStmtInfo. class CGCapturedStmtRAII { private: CodeGenFunction &CGF; CGCapturedStmtInfo *PrevCapturedStmtInfo; public: CGCapturedStmtRAII(CodeGenFunction &CGF, CGCapturedStmtInfo *NewCapturedStmtInfo) : CGF(CGF), PrevCapturedStmtInfo(CGF.CapturedStmtInfo) { CGF.CapturedStmtInfo = NewCapturedStmtInfo; } ~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; } }; /// \brief Sanitizers enabled for this function. SanitizerSet SanOpts; /// \brief True if CodeGen currently emits code implementing sanitizer checks. bool IsSanitizerScope; /// \brief RAII object to set/unset CodeGenFunction::IsSanitizerScope. class SanitizerScope { CodeGenFunction *CGF; public: SanitizerScope(CodeGenFunction *CGF); ~SanitizerScope(); }; /// In C++, whether we are code generating a thunk. This controls whether we /// should emit cleanups. bool CurFuncIsThunk; /// In ARC, whether we should autorelease the return value. bool AutoreleaseResult; /// Whether we processed a Microsoft-style asm block during CodeGen. These can /// potentially set the return value. bool SawAsmBlock; /// True if the current function is an outlined SEH helper. This can be a /// finally block or filter expression. bool IsOutlinedSEHHelper; const CodeGen::CGBlockInfo *BlockInfo; llvm::Value *BlockPointer; llvm::DenseMap LambdaCaptureFields; FieldDecl *LambdaThisCaptureField; /// \brief A mapping from NRVO variables to the flags used to indicate /// when the NRVO has been applied to this variable. llvm::DenseMap NRVOFlags; EHScopeStack EHStack; llvm::SmallVector LifetimeExtendedCleanupStack; llvm::SmallVector SEHTryEpilogueStack; llvm::Instruction *CurrentFuncletPad = nullptr; /// Header for data within LifetimeExtendedCleanupStack. struct LifetimeExtendedCleanupHeader { /// The size of the following cleanup object. unsigned Size; /// The kind of cleanup to push: a value from the CleanupKind enumeration. CleanupKind Kind; size_t getSize() const { return Size; } CleanupKind getKind() const { return Kind; } }; /// i32s containing the indexes of the cleanup destinations. llvm::AllocaInst *NormalCleanupDest; unsigned NextCleanupDestIndex; /// FirstBlockInfo - The head of a singly-linked-list of block layouts. CGBlockInfo *FirstBlockInfo; /// EHResumeBlock - Unified block containing a call to llvm.eh.resume. llvm::BasicBlock *EHResumeBlock; /// The exception slot. All landing pads write the current exception pointer /// into this alloca. llvm::Value *ExceptionSlot; /// The selector slot. Under the MandatoryCleanup model, all landing pads /// write the current selector value into this alloca. llvm::AllocaInst *EHSelectorSlot; /// A stack of exception code slots. Entering an __except block pushes a slot /// on the stack and leaving pops one. The __exception_code() intrinsic loads /// a value from the top of the stack. SmallVector SEHCodeSlotStack; /// Value returned by __exception_info intrinsic. llvm::Value *SEHInfo = nullptr; /// Emits a landing pad for the current EH stack. llvm::BasicBlock *EmitLandingPad(); llvm::BasicBlock *getInvokeDestImpl(); template typename DominatingValue::saved_type saveValueInCond(T value) { return DominatingValue::save(*this, value); } public: /// ObjCEHValueStack - Stack of Objective-C exception values, used for /// rethrows. SmallVector ObjCEHValueStack; /// A class controlling the emission of a finally block. class FinallyInfo { /// Where the catchall's edge through the cleanup should go. JumpDest RethrowDest; /// A function to call to enter the catch. llvm::Constant *BeginCatchFn; /// An i1 variable indicating whether or not the @finally is /// running for an exception. llvm::AllocaInst *ForEHVar; /// An i8* variable into which the exception pointer to rethrow /// has been saved. llvm::AllocaInst *SavedExnVar; public: void enter(CodeGenFunction &CGF, const Stmt *Finally, llvm::Constant *beginCatchFn, llvm::Constant *endCatchFn, llvm::Constant *rethrowFn); void exit(CodeGenFunction &CGF); }; /// Returns true inside SEH __try blocks. bool isSEHTryScope() const { return !SEHTryEpilogueStack.empty(); } /// Returns true while emitting a cleanuppad. bool isCleanupPadScope() const { return CurrentFuncletPad && isa(CurrentFuncletPad); } /// pushFullExprCleanup - Push a cleanup to be run at the end of the /// current full-expression. Safe against the possibility that /// we're currently inside a conditionally-evaluated expression. template void pushFullExprCleanup(CleanupKind kind, As... A) { // If we're not in a conditional branch, or if none of the // arguments requires saving, then use the unconditional cleanup. if (!isInConditionalBranch()) return EHStack.pushCleanup(kind, A...); // Stash values in a tuple so we can guarantee the order of saves. typedef std::tuple::saved_type...> SavedTuple; SavedTuple Saved{saveValueInCond(A)...}; typedef EHScopeStack::ConditionalCleanup CleanupType; EHStack.pushCleanupTuple(kind, Saved); initFullExprCleanup(); } /// \brief Queue a cleanup to be pushed after finishing the current /// full-expression. template void pushCleanupAfterFullExpr(CleanupKind Kind, As... A) { assert(!isInConditionalBranch() && "can't defer conditional cleanup"); LifetimeExtendedCleanupHeader Header = { sizeof(T), Kind }; size_t OldSize = LifetimeExtendedCleanupStack.size(); LifetimeExtendedCleanupStack.resize( LifetimeExtendedCleanupStack.size() + sizeof(Header) + Header.Size); static_assert(sizeof(Header) % llvm::AlignOf::Alignment == 0, "Cleanup will be allocated on misaligned address"); char *Buffer = &LifetimeExtendedCleanupStack[OldSize]; new (Buffer) LifetimeExtendedCleanupHeader(Header); new (Buffer + sizeof(Header)) T(A...); } /// Set up the last cleaup that was pushed as a conditional /// full-expression cleanup. void initFullExprCleanup(); /// PushDestructorCleanup - Push a cleanup to call the /// complete-object destructor of an object of the given type at the /// given address. Does nothing if T is not a C++ class type with a /// non-trivial destructor. void PushDestructorCleanup(QualType T, Address Addr); /// PushDestructorCleanup - Push a cleanup to call the /// complete-object variant of the given destructor on the object at /// the given address. void PushDestructorCleanup(const CXXDestructorDecl *Dtor, Address Addr); /// PopCleanupBlock - Will pop the cleanup entry on the stack and /// process all branch fixups. void PopCleanupBlock(bool FallThroughIsBranchThrough = false); /// DeactivateCleanupBlock - Deactivates the given cleanup block. /// The block cannot be reactivated. Pops it if it's the top of the /// stack. /// /// \param DominatingIP - An instruction which is known to /// dominate the current IP (if set) and which lies along /// all paths of execution between the current IP and the /// the point at which the cleanup comes into scope. void DeactivateCleanupBlock(EHScopeStack::stable_iterator Cleanup, llvm::Instruction *DominatingIP); /// ActivateCleanupBlock - Activates an initially-inactive cleanup. /// Cannot be used to resurrect a deactivated cleanup. /// /// \param DominatingIP - An instruction which is known to /// dominate the current IP (if set) and which lies along /// all paths of execution between the current IP and the /// the point at which the cleanup comes into scope. void ActivateCleanupBlock(EHScopeStack::stable_iterator Cleanup, llvm::Instruction *DominatingIP); /// \brief Enters a new scope for capturing cleanups, all of which /// will be executed once the scope is exited. class RunCleanupsScope { EHScopeStack::stable_iterator CleanupStackDepth; size_t LifetimeExtendedCleanupStackSize; bool OldDidCallStackSave; protected: bool PerformCleanup; private: RunCleanupsScope(const RunCleanupsScope &) = delete; void operator=(const RunCleanupsScope &) = delete; protected: CodeGenFunction& CGF; public: /// \brief Enter a new cleanup scope. explicit RunCleanupsScope(CodeGenFunction &CGF) : PerformCleanup(true), CGF(CGF) { CleanupStackDepth = CGF.EHStack.stable_begin(); LifetimeExtendedCleanupStackSize = CGF.LifetimeExtendedCleanupStack.size(); OldDidCallStackSave = CGF.DidCallStackSave; CGF.DidCallStackSave = false; } /// \brief Exit this cleanup scope, emitting any accumulated /// cleanups. ~RunCleanupsScope() { if (PerformCleanup) { CGF.DidCallStackSave = OldDidCallStackSave; CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize); } } /// \brief Determine whether this scope requires any cleanups. bool requiresCleanups() const { return CGF.EHStack.stable_begin() != CleanupStackDepth; } /// \brief Force the emission of cleanups now, instead of waiting /// until this object is destroyed. void ForceCleanup() { assert(PerformCleanup && "Already forced cleanup"); CGF.DidCallStackSave = OldDidCallStackSave; CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize); PerformCleanup = false; } }; class LexicalScope : public RunCleanupsScope { SourceRange Range; SmallVector Labels; LexicalScope *ParentScope; LexicalScope(const LexicalScope &) = delete; void operator=(const LexicalScope &) = delete; public: /// \brief Enter a new cleanup scope. explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range) : RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) { CGF.CurLexicalScope = this; if (CGDebugInfo *DI = CGF.getDebugInfo()) DI->EmitLexicalBlockStart(CGF.Builder, Range.getBegin()); } void addLabel(const LabelDecl *label) { assert(PerformCleanup && "adding label to dead scope?"); Labels.push_back(label); } /// \brief Exit this cleanup scope, emitting any accumulated /// cleanups. ~LexicalScope() { if (CGDebugInfo *DI = CGF.getDebugInfo()) DI->EmitLexicalBlockEnd(CGF.Builder, Range.getEnd()); // If we should perform a cleanup, force them now. Note that // this ends the cleanup scope before rescoping any labels. if (PerformCleanup) { ApplyDebugLocation DL(CGF, Range.getEnd()); ForceCleanup(); } } /// \brief Force the emission of cleanups now, instead of waiting /// until this object is destroyed. void ForceCleanup() { CGF.CurLexicalScope = ParentScope; RunCleanupsScope::ForceCleanup(); if (!Labels.empty()) rescopeLabels(); } void rescopeLabels(); }; typedef llvm::DenseMap DeclMapTy; /// \brief The scope used to remap some variables as private in the OpenMP /// loop body (or other captured region emitted without outlining), and to /// restore old vars back on exit. class OMPPrivateScope : public RunCleanupsScope { DeclMapTy SavedLocals; DeclMapTy SavedPrivates; private: OMPPrivateScope(const OMPPrivateScope &) = delete; void operator=(const OMPPrivateScope &) = delete; public: /// \brief Enter a new OpenMP private scope. explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {} /// \brief Registers \a LocalVD variable as a private and apply \a /// PrivateGen function for it to generate corresponding private variable. /// \a PrivateGen returns an address of the generated private variable. /// \return true if the variable is registered as private, false if it has /// been privatized already. bool addPrivate(const VarDecl *LocalVD, llvm::function_ref PrivateGen) { assert(PerformCleanup && "adding private to dead scope"); // Only save it once. if (SavedLocals.count(LocalVD)) return false; // Copy the existing local entry to SavedLocals. auto it = CGF.LocalDeclMap.find(LocalVD); if (it != CGF.LocalDeclMap.end()) { SavedLocals.insert({LocalVD, it->second}); } else { SavedLocals.insert({LocalVD, Address::invalid()}); } // Generate the private entry. Address Addr = PrivateGen(); QualType VarTy = LocalVD->getType(); if (VarTy->isReferenceType()) { Address Temp = CGF.CreateMemTemp(VarTy); CGF.Builder.CreateStore(Addr.getPointer(), Temp); Addr = Temp; } SavedPrivates.insert({LocalVD, Addr}); return true; } /// \brief Privatizes local variables previously registered as private. /// Registration is separate from the actual privatization to allow /// initializers use values of the original variables, not the private one. /// This is important, for example, if the private variable is a class /// variable initialized by a constructor that references other private /// variables. But at initialization original variables must be used, not /// private copies. /// \return true if at least one variable was privatized, false otherwise. bool Privatize() { copyInto(SavedPrivates, CGF.LocalDeclMap); SavedPrivates.clear(); return !SavedLocals.empty(); } void ForceCleanup() { RunCleanupsScope::ForceCleanup(); copyInto(SavedLocals, CGF.LocalDeclMap); SavedLocals.clear(); } /// \brief Exit scope - all the mapped variables are restored. ~OMPPrivateScope() { if (PerformCleanup) ForceCleanup(); } private: /// Copy all the entries in the source map over the corresponding /// entries in the destination, which must exist. static void copyInto(const DeclMapTy &src, DeclMapTy &dest) { for (auto &pair : src) { if (!pair.second.isValid()) { dest.erase(pair.first); continue; } auto it = dest.find(pair.first); if (it != dest.end()) { it->second = pair.second; } else { dest.insert(pair); } } } }; /// \brief Takes the old cleanup stack size and emits the cleanup blocks /// that have been added. void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize); /// \brief Takes the old cleanup stack size and emits the cleanup blocks /// that have been added, then adds all lifetime-extended cleanups from /// the given position to the stack. void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize, size_t OldLifetimeExtendedStackSize); void ResolveBranchFixups(llvm::BasicBlock *Target); /// The given basic block lies in the current EH scope, but may be a /// target of a potentially scope-crossing jump; get a stable handle /// to which we can perform this jump later. JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target) { return JumpDest(Target, EHStack.getInnermostNormalCleanup(), NextCleanupDestIndex++); } /// The given basic block lies in the current EH scope, but may be a /// target of a potentially scope-crossing jump; get a stable handle /// to which we can perform this jump later. JumpDest getJumpDestInCurrentScope(StringRef Name = StringRef()) { return getJumpDestInCurrentScope(createBasicBlock(Name)); } /// EmitBranchThroughCleanup - Emit a branch from the current insert /// block through the normal cleanup handling code (if any) and then /// on to \arg Dest. void EmitBranchThroughCleanup(JumpDest Dest); /// isObviouslyBranchWithoutCleanups - Return true if a branch to the /// specified destination obviously has no cleanups to run. 'false' is always /// a conservatively correct answer for this method. bool isObviouslyBranchWithoutCleanups(JumpDest Dest) const; /// popCatchScope - Pops the catch scope at the top of the EHScope /// stack, emitting any required code (other than the catch handlers /// themselves). void popCatchScope(); llvm::BasicBlock *getEHResumeBlock(bool isCleanup); llvm::BasicBlock *getEHDispatchBlock(EHScopeStack::stable_iterator scope); llvm::BasicBlock *getMSVCDispatchBlock(EHScopeStack::stable_iterator scope); /// An object to manage conditionally-evaluated expressions. class ConditionalEvaluation { llvm::BasicBlock *StartBB; public: ConditionalEvaluation(CodeGenFunction &CGF) : StartBB(CGF.Builder.GetInsertBlock()) {} void begin(CodeGenFunction &CGF) { assert(CGF.OutermostConditional != this); if (!CGF.OutermostConditional) CGF.OutermostConditional = this; } void end(CodeGenFunction &CGF) { assert(CGF.OutermostConditional != nullptr); if (CGF.OutermostConditional == this) CGF.OutermostConditional = nullptr; } /// Returns a block which will be executed prior to each /// evaluation of the conditional code. llvm::BasicBlock *getStartingBlock() const { return StartBB; } }; /// isInConditionalBranch - Return true if we're currently emitting /// one branch or the other of a conditional expression. bool isInConditionalBranch() const { return OutermostConditional != nullptr; } void setBeforeOutermostConditional(llvm::Value *value, Address addr) { assert(isInConditionalBranch()); llvm::BasicBlock *block = OutermostConditional->getStartingBlock(); auto store = new llvm::StoreInst(value, addr.getPointer(), &block->back()); store->setAlignment(addr.getAlignment().getQuantity()); } /// An RAII object to record that we're evaluating a statement /// expression. class StmtExprEvaluation { CodeGenFunction &CGF; /// We have to save the outermost conditional: cleanups in a /// statement expression aren't conditional just because the /// StmtExpr is. ConditionalEvaluation *SavedOutermostConditional; public: StmtExprEvaluation(CodeGenFunction &CGF) : CGF(CGF), SavedOutermostConditional(CGF.OutermostConditional) { CGF.OutermostConditional = nullptr; } ~StmtExprEvaluation() { CGF.OutermostConditional = SavedOutermostConditional; CGF.EnsureInsertPoint(); } }; /// An object which temporarily prevents a value from being /// destroyed by aggressive peephole optimizations that assume that /// all uses of a value have been realized in the IR. class PeepholeProtection { llvm::Instruction *Inst; friend class CodeGenFunction; public: PeepholeProtection() : Inst(nullptr) {} }; /// A non-RAII class containing all the information about a bound /// opaque value. OpaqueValueMapping, below, is a RAII wrapper for /// this which makes individual mappings very simple; using this /// class directly is useful when you have a variable number of /// opaque values or don't want the RAII functionality for some /// reason. class OpaqueValueMappingData { const OpaqueValueExpr *OpaqueValue; bool BoundLValue; CodeGenFunction::PeepholeProtection Protection; OpaqueValueMappingData(const OpaqueValueExpr *ov, bool boundLValue) : OpaqueValue(ov), BoundLValue(boundLValue) {} public: OpaqueValueMappingData() : OpaqueValue(nullptr) {} static bool shouldBindAsLValue(const Expr *expr) { // gl-values should be bound as l-values for obvious reasons. // Records should be bound as l-values because IR generation // always keeps them in memory. Expressions of function type // act exactly like l-values but are formally required to be // r-values in C. return expr->isGLValue() || expr->getType()->isFunctionType() || hasAggregateEvaluationKind(expr->getType()); } static OpaqueValueMappingData bind(CodeGenFunction &CGF, const OpaqueValueExpr *ov, const Expr *e) { if (shouldBindAsLValue(ov)) return bind(CGF, ov, CGF.EmitLValue(e)); return bind(CGF, ov, CGF.EmitAnyExpr(e)); } static OpaqueValueMappingData bind(CodeGenFunction &CGF, const OpaqueValueExpr *ov, const LValue &lv) { assert(shouldBindAsLValue(ov)); CGF.OpaqueLValues.insert(std::make_pair(ov, lv)); return OpaqueValueMappingData(ov, true); } static OpaqueValueMappingData bind(CodeGenFunction &CGF, const OpaqueValueExpr *ov, const RValue &rv) { assert(!shouldBindAsLValue(ov)); CGF.OpaqueRValues.insert(std::make_pair(ov, rv)); OpaqueValueMappingData data(ov, false); // Work around an extremely aggressive peephole optimization in // EmitScalarConversion which assumes that all other uses of a // value are extant. data.Protection = CGF.protectFromPeepholes(rv); return data; } bool isValid() const { return OpaqueValue != nullptr; } void clear() { OpaqueValue = nullptr; } void unbind(CodeGenFunction &CGF) { assert(OpaqueValue && "no data to unbind!"); if (BoundLValue) { CGF.OpaqueLValues.erase(OpaqueValue); } else { CGF.OpaqueRValues.erase(OpaqueValue); CGF.unprotectFromPeepholes(Protection); } } }; /// An RAII object to set (and then clear) a mapping for an OpaqueValueExpr. class OpaqueValueMapping { CodeGenFunction &CGF; OpaqueValueMappingData Data; public: static bool shouldBindAsLValue(const Expr *expr) { return OpaqueValueMappingData::shouldBindAsLValue(expr); } /// Build the opaque value mapping for the given conditional /// operator if it's the GNU ?: extension. This is a common /// enough pattern that the convenience operator is really /// helpful. /// OpaqueValueMapping(CodeGenFunction &CGF, const AbstractConditionalOperator *op) : CGF(CGF) { if (isa(op)) // Leave Data empty. return; const BinaryConditionalOperator *e = cast(op); Data = OpaqueValueMappingData::bind(CGF, e->getOpaqueValue(), e->getCommon()); } OpaqueValueMapping(CodeGenFunction &CGF, const OpaqueValueExpr *opaqueValue, LValue lvalue) : CGF(CGF), Data(OpaqueValueMappingData::bind(CGF, opaqueValue, lvalue)) { } OpaqueValueMapping(CodeGenFunction &CGF, const OpaqueValueExpr *opaqueValue, RValue rvalue) : CGF(CGF), Data(OpaqueValueMappingData::bind(CGF, opaqueValue, rvalue)) { } void pop() { Data.unbind(CGF); Data.clear(); } ~OpaqueValueMapping() { if (Data.isValid()) Data.unbind(CGF); } }; private: CGDebugInfo *DebugInfo; bool DisableDebugInfo; /// DidCallStackSave - Whether llvm.stacksave has been called. Used to avoid /// calling llvm.stacksave for multiple VLAs in the same scope. bool DidCallStackSave; /// IndirectBranch - The first time an indirect goto is seen we create a block /// with an indirect branch. Every time we see the address of a label taken, /// we add the label to the indirect goto. Every subsequent indirect goto is /// codegen'd as a jump to the IndirectBranch's basic block. llvm::IndirectBrInst *IndirectBranch; /// LocalDeclMap - This keeps track of the LLVM allocas or globals for local C /// decls. DeclMapTy LocalDeclMap; /// SizeArguments - If a ParmVarDecl had the pass_object_size attribute, this /// will contain a mapping from said ParmVarDecl to its implicit "object_size" /// parameter. llvm::SmallDenseMap SizeArguments; /// Track escaped local variables with auto storage. Used during SEH /// outlining to produce a call to llvm.localescape. llvm::DenseMap EscapedLocals; /// LabelMap - This keeps track of the LLVM basic block for each C label. llvm::DenseMap LabelMap; // BreakContinueStack - This keeps track of where break and continue // statements should jump to. struct BreakContinue { BreakContinue(JumpDest Break, JumpDest Continue) : BreakBlock(Break), ContinueBlock(Continue) {} JumpDest BreakBlock; JumpDest ContinueBlock; }; SmallVector BreakContinueStack; CodeGenPGO PGO; /// Calculate branch weights appropriate for PGO data llvm::MDNode *createProfileWeights(uint64_t TrueCount, uint64_t FalseCount); llvm::MDNode *createProfileWeights(ArrayRef Weights); llvm::MDNode *createProfileWeightsForLoop(const Stmt *Cond, uint64_t LoopCount); public: /// Increment the profiler's counter for the given statement. void incrementProfileCounter(const Stmt *S) { if (CGM.getCodeGenOpts().ProfileInstrGenerate) PGO.emitCounterIncrement(Builder, S); PGO.setCurrentStmt(S); } /// Get the profiler's count for the given statement. uint64_t getProfileCount(const Stmt *S) { Optional Count = PGO.getStmtCount(S); if (!Count.hasValue()) return 0; return *Count; } /// Set the profiler's current count. void setCurrentProfileCount(uint64_t Count) { PGO.setCurrentRegionCount(Count); } /// Get the profiler's current count. This is generally the count for the most /// recently incremented counter. uint64_t getCurrentProfileCount() { return PGO.getCurrentRegionCount(); } private: /// SwitchInsn - This is nearest current switch instruction. It is null if /// current context is not in a switch. llvm::SwitchInst *SwitchInsn; /// The branch weights of SwitchInsn when doing instrumentation based PGO. SmallVector *SwitchWeights; /// CaseRangeBlock - This block holds if condition check for last case /// statement range in current switch instruction. llvm::BasicBlock *CaseRangeBlock; /// OpaqueLValues - Keeps track of the current set of opaque value /// expressions. llvm::DenseMap OpaqueLValues; llvm::DenseMap OpaqueRValues; // VLASizeMap - This keeps track of the associated size for each VLA type. // We track this by the size expression rather than the type itself because // in certain situations, like a const qualifier applied to an VLA typedef, // multiple VLA types can share the same size expression. // FIXME: Maybe this could be a stack of maps that is pushed/popped as we // enter/leave scopes. llvm::DenseMap VLASizeMap; /// A block containing a single 'unreachable' instruction. Created /// lazily by getUnreachableBlock(). llvm::BasicBlock *UnreachableBlock; /// Counts of the number return expressions in the function. unsigned NumReturnExprs; /// Count the number of simple (constant) return expressions in the function. unsigned NumSimpleReturnExprs; /// The last regular (non-return) debug location (breakpoint) in the function. SourceLocation LastStopPoint; public: /// A scope within which we are constructing the fields of an object which /// might use a CXXDefaultInitExpr. This stashes away a 'this' value to use /// if we need to evaluate a CXXDefaultInitExpr within the evaluation. class FieldConstructionScope { public: FieldConstructionScope(CodeGenFunction &CGF, Address This) : CGF(CGF), OldCXXDefaultInitExprThis(CGF.CXXDefaultInitExprThis) { CGF.CXXDefaultInitExprThis = This; } ~FieldConstructionScope() { CGF.CXXDefaultInitExprThis = OldCXXDefaultInitExprThis; } private: CodeGenFunction &CGF; Address OldCXXDefaultInitExprThis; }; /// The scope of a CXXDefaultInitExpr. Within this scope, the value of 'this' /// is overridden to be the object under construction. class CXXDefaultInitExprScope { public: CXXDefaultInitExprScope(CodeGenFunction &CGF) : CGF(CGF), OldCXXThisValue(CGF.CXXThisValue), OldCXXThisAlignment(CGF.CXXThisAlignment) { CGF.CXXThisValue = CGF.CXXDefaultInitExprThis.getPointer(); CGF.CXXThisAlignment = CGF.CXXDefaultInitExprThis.getAlignment(); } ~CXXDefaultInitExprScope() { CGF.CXXThisValue = OldCXXThisValue; CGF.CXXThisAlignment = OldCXXThisAlignment; } public: CodeGenFunction &CGF; llvm::Value *OldCXXThisValue; CharUnits OldCXXThisAlignment; }; private: /// CXXThisDecl - When generating code for a C++ member function, /// this will hold the implicit 'this' declaration. ImplicitParamDecl *CXXABIThisDecl; llvm::Value *CXXABIThisValue; llvm::Value *CXXThisValue; CharUnits CXXABIThisAlignment; CharUnits CXXThisAlignment; /// The value of 'this' to use when evaluating CXXDefaultInitExprs within /// this expression. Address CXXDefaultInitExprThis = Address::invalid(); /// CXXStructorImplicitParamDecl - When generating code for a constructor or /// destructor, this will hold the implicit argument (e.g. VTT). ImplicitParamDecl *CXXStructorImplicitParamDecl; llvm::Value *CXXStructorImplicitParamValue; /// OutermostConditional - Points to the outermost active /// conditional control. This is used so that we know if a /// temporary should be destroyed conditionally. ConditionalEvaluation *OutermostConditional; /// The current lexical scope. LexicalScope *CurLexicalScope; /// The current source location that should be used for exception /// handling code. SourceLocation CurEHLocation; /// BlockByrefInfos - For each __block variable, contains /// information about the layout of the variable. llvm::DenseMap BlockByrefInfos; llvm::BasicBlock *TerminateLandingPad; llvm::BasicBlock *TerminateHandler; llvm::BasicBlock *TrapBB; /// Add a kernel metadata node to the named metadata node 'opencl.kernels'. /// In the kernel metadata node, reference the kernel function and metadata /// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2): /// - A node for the vec_type_hint() qualifier contains string /// "vec_type_hint", an undefined value of the data type, /// and a Boolean that is true if the is integer and signed. /// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string /// "work_group_size_hint", and three 32-bit integers X, Y and Z. /// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string /// "reqd_work_group_size", and three 32-bit integers X, Y and Z. void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); public: CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); ~CodeGenFunction(); CodeGenTypes &getTypes() const { return CGM.getTypes(); } ASTContext &getContext() const { return CGM.getContext(); } CGDebugInfo *getDebugInfo() { if (DisableDebugInfo) return nullptr; return DebugInfo; } void disableDebugInfo() { DisableDebugInfo = true; } void enableDebugInfo() { DisableDebugInfo = false; } bool shouldUseFusedARCCalls() { return CGM.getCodeGenOpts().OptimizationLevel == 0; } const LangOptions &getLangOpts() const { return CGM.getLangOpts(); } /// Returns a pointer to the function's exception object and selector slot, /// which is assigned in every landing pad. Address getExceptionSlot(); Address getEHSelectorSlot(); /// Returns the contents of the function's exception object and selector /// slots. llvm::Value *getExceptionFromSlot(); llvm::Value *getSelectorFromSlot(); Address getNormalCleanupDestSlot(); llvm::BasicBlock *getUnreachableBlock() { if (!UnreachableBlock) { UnreachableBlock = createBasicBlock("unreachable"); new llvm::UnreachableInst(getLLVMContext(), UnreachableBlock); } return UnreachableBlock; } llvm::BasicBlock *getInvokeDest() { if (!EHStack.requiresLandingPad()) return nullptr; return getInvokeDestImpl(); } bool currentFunctionUsesSEHTry() const { const auto *FD = dyn_cast_or_null(CurCodeDecl); return FD && FD->usesSEHTry(); } const TargetInfo &getTarget() const { return Target; } llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); } //===--------------------------------------------------------------------===// // Cleanups //===--------------------------------------------------------------------===// typedef void Destroyer(CodeGenFunction &CGF, Address addr, QualType ty); void pushIrregularPartialArrayCleanup(llvm::Value *arrayBegin, Address arrayEndPointer, QualType elementType, CharUnits elementAlignment, Destroyer *destroyer); void pushRegularPartialArrayCleanup(llvm::Value *arrayBegin, llvm::Value *arrayEnd, QualType elementType, CharUnits elementAlignment, Destroyer *destroyer); void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type); void pushEHDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type); void pushDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray); void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray); void pushCallObjectDeleteCleanup(const FunctionDecl *OperatorDelete, llvm::Value *CompletePtr, QualType ElementType); void pushStackRestore(CleanupKind kind, Address SPMem); void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray); llvm::Function *generateDestroyHelper(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray, const VarDecl *VD); void emitArrayDestroy(llvm::Value *begin, llvm::Value *end, QualType elementType, CharUnits elementAlign, Destroyer *destroyer, bool checkZeroLength, bool useEHCleanup); Destroyer *getDestroyer(QualType::DestructionKind destructionKind); /// Determines whether an EH cleanup is required to destroy a type /// with the given destruction kind. bool needsEHCleanup(QualType::DestructionKind kind) { switch (kind) { case QualType::DK_none: return false; case QualType::DK_cxx_destructor: case QualType::DK_objc_weak_lifetime: return getLangOpts().Exceptions; case QualType::DK_objc_strong_lifetime: return getLangOpts().Exceptions && CGM.getCodeGenOpts().ObjCAutoRefCountExceptions; } llvm_unreachable("bad destruction kind"); } CleanupKind getCleanupKind(QualType::DestructionKind kind) { return (needsEHCleanup(kind) ? NormalAndEHCleanup : NormalCleanup); } //===--------------------------------------------------------------------===// // Objective-C //===--------------------------------------------------------------------===// void GenerateObjCMethod(const ObjCMethodDecl *OMD); void StartObjCMethod(const ObjCMethodDecl *MD, const ObjCContainerDecl *CD); /// GenerateObjCGetter - Synthesize an Objective-C property getter function. void GenerateObjCGetter(ObjCImplementationDecl *IMP, const ObjCPropertyImplDecl *PID); void generateObjCGetterBody(const ObjCImplementationDecl *classImpl, const ObjCPropertyImplDecl *propImpl, const ObjCMethodDecl *GetterMothodDecl, llvm::Constant *AtomicHelperFn); void GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP, ObjCMethodDecl *MD, bool ctor); /// GenerateObjCSetter - Synthesize an Objective-C property setter function /// for the given property. void GenerateObjCSetter(ObjCImplementationDecl *IMP, const ObjCPropertyImplDecl *PID); void generateObjCSetterBody(const ObjCImplementationDecl *classImpl, const ObjCPropertyImplDecl *propImpl, llvm::Constant *AtomicHelperFn); //===--------------------------------------------------------------------===// // Block Bits //===--------------------------------------------------------------------===// llvm::Value *EmitBlockLiteral(const BlockExpr *); llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); static void destroyBlockInfos(CGBlockInfo *info); llvm::Function *GenerateBlockFunction(GlobalDecl GD, const CGBlockInfo &Info, const DeclMapTy &ldm, bool IsLambdaConversionToBlock); llvm::Constant *GenerateCopyHelperFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateObjCAtomicSetterCopyHelperFunction( const ObjCPropertyImplDecl *PID); llvm::Constant *GenerateObjCAtomicGetterCopyHelperFunction( const ObjCPropertyImplDecl *PID); llvm::Value *EmitBlockCopyAndAutorelease(llvm::Value *Block, QualType Ty); void BuildBlockRelease(llvm::Value *DeclPtr, BlockFieldFlags flags); class AutoVarEmission; void emitByrefStructureInit(const AutoVarEmission &emission); void enterByrefCleanup(const AutoVarEmission &emission); void setBlockContextParameter(const ImplicitParamDecl *D, unsigned argNum, llvm::Value *ptr); Address LoadBlockStruct(); Address GetAddrOfBlockDecl(const VarDecl *var, bool ByRef); /// BuildBlockByrefAddress - Computes the location of the /// data in a variable which is declared as __block. Address emitBlockByrefAddress(Address baseAddr, const VarDecl *V, bool followForward = true); Address emitBlockByrefAddress(Address baseAddr, const BlockByrefInfo &info, bool followForward, const llvm::Twine &name); const BlockByrefInfo &getBlockByrefInfo(const VarDecl *var); void GenerateCode(GlobalDecl GD, llvm::Function *Fn, const CGFunctionInfo &FnInfo); /// \brief Emit code for the start of a function. /// \param Loc The location to be associated with the function. /// \param StartLoc The location of the function body. void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc = SourceLocation(), SourceLocation StartLoc = SourceLocation()); void EmitConstructorBody(FunctionArgList &Args); void EmitDestructorBody(FunctionArgList &Args); void emitImplicitAssignmentOperatorBody(FunctionArgList &Args); void EmitFunctionBody(FunctionArgList &Args, const Stmt *Body); void EmitBlockWithFallThrough(llvm::BasicBlock *BB, const Stmt *S); void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator, CallArgList &CallArgs); void EmitLambdaToBlockPointerBody(FunctionArgList &Args); void EmitLambdaBlockInvokeBody(); void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD); void EmitLambdaStaticInvokeFunction(const CXXMethodDecl *MD); void EmitAsanPrologueOrEpilogue(bool Prologue); /// \brief Emit the unified return block, trying to avoid its emission when /// possible. /// \return The debug location of the user written return statement if the /// return block is is avoided. llvm::DebugLoc EmitReturnBlock(); /// FinishFunction - Complete IR generation of the current function. It is /// legal to call this function even if there is no current insertion point. void FinishFunction(SourceLocation EndLoc=SourceLocation()); void StartThunk(llvm::Function *Fn, GlobalDecl GD, const CGFunctionInfo &FnInfo); void EmitCallAndReturnForThunk(llvm::Value *Callee, const ThunkInfo *Thunk); void FinishThunk(); /// Emit a musttail call for a thunk with a potentially adjusted this pointer. void EmitMustTailThunk(const CXXMethodDecl *MD, llvm::Value *AdjustedThisPtr, llvm::Value *Callee); /// Generate a thunk for the given method. void generateThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo, GlobalDecl GD, const ThunkInfo &Thunk); llvm::Function *GenerateVarArgsThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo, GlobalDecl GD, const ThunkInfo &Thunk); void EmitCtorPrologue(const CXXConstructorDecl *CD, CXXCtorType Type, FunctionArgList &Args); void EmitInitializerForField(FieldDecl *Field, LValue LHS, Expr *Init, ArrayRef ArrayIndexes); /// Struct with all informations about dynamic [sub]class needed to set vptr. struct VPtr { BaseSubobject Base; const CXXRecordDecl *NearestVBase; CharUnits OffsetFromNearestVBase; const CXXRecordDecl *VTableClass; }; /// Initialize the vtable pointer of the given subobject. void InitializeVTablePointer(const VPtr &vptr); typedef llvm::SmallVector VPtrsVector; typedef llvm::SmallPtrSet VisitedVirtualBasesSetTy; VPtrsVector getVTablePointers(const CXXRecordDecl *VTableClass); void getVTablePointers(BaseSubobject Base, const CXXRecordDecl *NearestVBase, CharUnits OffsetFromNearestVBase, bool BaseIsNonVirtualPrimaryBase, const CXXRecordDecl *VTableClass, VisitedVirtualBasesSetTy &VBases, VPtrsVector &vptrs); void InitializeVTablePointers(const CXXRecordDecl *ClassDecl); /// GetVTablePtr - Return the Value of the vtable pointer member pointed /// to by This. llvm::Value *GetVTablePtr(Address This, llvm::Type *VTableTy, const CXXRecordDecl *VTableClass); enum CFITypeCheckKind { CFITCK_VCall, CFITCK_NVCall, CFITCK_DerivedCast, CFITCK_UnrelatedCast, }; /// \brief Derived is the presumed address of an object of type T after a /// cast. If T is a polymorphic class type, emit a check that the virtual /// table for Derived belongs to a class derived from T. void EmitVTablePtrCheckForCast(QualType T, llvm::Value *Derived, bool MayBeNull, CFITypeCheckKind TCK, SourceLocation Loc); /// EmitVTablePtrCheckForCall - Virtual method MD is being called via VTable. /// If vptr CFI is enabled, emit a check that VTable is valid. void EmitVTablePtrCheckForCall(const CXXMethodDecl *MD, llvm::Value *VTable, CFITypeCheckKind TCK, SourceLocation Loc); /// EmitVTablePtrCheck - Emit a check that VTable is a valid virtual table for /// RD using llvm.bitset.test. void EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *VTable, CFITypeCheckKind TCK, SourceLocation Loc); /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given /// expr can be devirtualized. bool CanDevirtualizeMemberFunctionCall(const Expr *Base, const CXXMethodDecl *MD); /// EnterDtorCleanups - Enter the cleanups necessary to complete the /// given phase of destruction for a destructor. The end result /// should call destructors on members and base classes in reverse /// order of their construction. void EnterDtorCleanups(const CXXDestructorDecl *Dtor, CXXDtorType Type); /// ShouldInstrumentFunction - Return true if the current function should be /// instrumented with __cyg_profile_func_* calls bool ShouldInstrumentFunction(); /// EmitFunctionInstrumentation - Emit LLVM code to call the specified /// instrumentation function with the current function and the call site, if /// function instrumentation is enabled. void EmitFunctionInstrumentation(const char *Fn); /// EmitMCountInstrumentation - Emit call to .mcount. void EmitMCountInstrumentation(); /// EmitFunctionProlog - Emit the target specific LLVM code to load the /// arguments for the given function. This is also responsible for naming the /// LLVM function arguments. void EmitFunctionProlog(const CGFunctionInfo &FI, llvm::Function *Fn, const FunctionArgList &Args); /// EmitFunctionEpilog - Emit the target specific LLVM code to return the /// given temporary. void EmitFunctionEpilog(const CGFunctionInfo &FI, bool EmitRetDbgLoc, SourceLocation EndLoc); /// EmitStartEHSpec - Emit the start of the exception spec. void EmitStartEHSpec(const Decl *D); /// EmitEndEHSpec - Emit the end of the exception spec. void EmitEndEHSpec(const Decl *D); /// getTerminateLandingPad - Return a landing pad that just calls terminate. llvm::BasicBlock *getTerminateLandingPad(); /// getTerminateHandler - Return a handler (not a landing pad, just /// a catch handler) that just calls terminate. This is used when /// a terminate scope encloses a try. llvm::BasicBlock *getTerminateHandler(); llvm::Type *ConvertTypeForMem(QualType T); llvm::Type *ConvertType(QualType T); llvm::Type *ConvertType(const TypeDecl *T) { return ConvertType(getContext().getTypeDeclType(T)); } /// LoadObjCSelf - Load the value of self. This function is only valid while /// generating code for an Objective-C method. llvm::Value *LoadObjCSelf(); /// TypeOfSelfObject - Return type of object that this self represents. QualType TypeOfSelfObject(); /// hasAggregateLLVMType - Return true if the specified AST type will map into /// an aggregate LLVM type or is void. static TypeEvaluationKind getEvaluationKind(QualType T); static bool hasScalarEvaluationKind(QualType T) { return getEvaluationKind(T) == TEK_Scalar; } static bool hasAggregateEvaluationKind(QualType T) { return getEvaluationKind(T) == TEK_Aggregate; } /// createBasicBlock - Create an LLVM basic block. llvm::BasicBlock *createBasicBlock(const Twine &name = "", llvm::Function *parent = nullptr, llvm::BasicBlock *before = nullptr) { #ifdef NDEBUG return llvm::BasicBlock::Create(getLLVMContext(), "", parent, before); #else return llvm::BasicBlock::Create(getLLVMContext(), name, parent, before); #endif } /// getBasicBlockForLabel - Return the LLVM basicblock that the specified /// label maps to. JumpDest getJumpDestForLabel(const LabelDecl *S); /// SimplifyForwardingBlocks - If the given basic block is only a branch to /// another basic block, simplify it. This assumes that no other code could /// potentially reference the basic block. void SimplifyForwardingBlocks(llvm::BasicBlock *BB); /// EmitBlock - Emit the given block \arg BB and set it as the insert point, /// adding a fall-through branch from the current insert block if /// necessary. It is legal to call this function even if there is no current /// insertion point. /// /// IsFinished - If true, indicates that the caller has finished emitting /// branches to the given block and does not expect to emit code into it. This /// means the block can be ignored if it is unreachable. void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false); /// EmitBlockAfterUses - Emit the given block somewhere hopefully /// near its uses, and leave the insertion point in it. void EmitBlockAfterUses(llvm::BasicBlock *BB); /// EmitBranch - Emit a branch to the specified basic block from the current /// insert block, taking care to avoid creation of branches from dummy /// blocks. It is legal to call this function even if there is no current /// insertion point. /// /// This function clears the current insertion point. The caller should follow /// calls to this function with calls to Emit*Block prior to generation new /// code. void EmitBranch(llvm::BasicBlock *Block); /// HaveInsertPoint - True if an insertion point is defined. If not, this /// indicates that the current code being emitted is unreachable. bool HaveInsertPoint() const { return Builder.GetInsertBlock() != nullptr; } /// EnsureInsertPoint - Ensure that an insertion point is defined so that /// emitted IR has a place to go. Note that by definition, if this function /// creates a block then that block is unreachable; callers may do better to /// detect when no insertion point is defined and simply skip IR generation. void EnsureInsertPoint() { if (!HaveInsertPoint()) EmitBlock(createBasicBlock()); } /// ErrorUnsupported - Print out an error that codegen doesn't support the /// specified stmt yet. void ErrorUnsupported(const Stmt *S, const char *Type); //===--------------------------------------------------------------------===// // Helpers //===--------------------------------------------------------------------===// LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource AlignSource = AlignmentSource::Type) { return LValue::MakeAddr(Addr, T, getContext(), AlignSource, CGM.getTBAAInfo(T)); } LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource AlignSource = AlignmentSource::Type) { return LValue::MakeAddr(Address(V, Alignment), T, getContext(), AlignSource, CGM.getTBAAInfo(T)); } LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T); LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T); CharUnits getNaturalTypeAlignment(QualType T, AlignmentSource *Source = nullptr, bool forPointeeType = false); CharUnits getNaturalPointeeTypeAlignment(QualType T, AlignmentSource *Source = nullptr); Address EmitLoadOfReference(Address Ref, const ReferenceType *RefTy, AlignmentSource *Source = nullptr); LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy); /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. The caller is responsible for setting an appropriate alignment on /// the alloca. llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp"); Address CreateTempAlloca(llvm::Type *Ty, CharUnits align, const Twine &Name = "tmp"); /// CreateDefaultAlignedTempAlloca - This creates an alloca with the /// default ABI alignment of the given LLVM type. /// /// IMPORTANT NOTE: This is *not* generally the right alignment for /// any given AST type that happens to have been lowered to the /// given IR type. This should only ever be used for function-local, /// IR-driven manipulations like saving and restoring a value. Do /// not hand this address off to arbitrary IRGen routines, and especially /// do not pass it as an argument to a function that might expect a /// properly ABI-aligned value. Address CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp"); /// InitTempAlloca - Provide an initial value for the given alloca which /// will be observable at all locations in the function. /// /// The address should be something that was returned from one of /// the CreateTempAlloca or CreateMemTemp routines, and the /// initializer must be valid in the entry block (i.e. it must /// either be a constant or an argument value). void InitTempAlloca(Address Alloca, llvm::Value *Value); /// CreateIRTemp - Create a temporary IR object of the given type, with /// appropriate alignment. This routine should only be used when an temporary /// value needs to be stored into an alloca (for example, to avoid explicit /// PHI construction), but the type is the IR type, not the type appropriate /// for storing in memory. /// /// That is, this is exactly equivalent to CreateMemTemp, but calling /// ConvertType instead of ConvertTypeForMem. Address CreateIRTemp(QualType T, const Twine &Name = "tmp"); /// CreateMemTemp - Create a temporary memory object of the given type, with /// appropriate alignment. Address CreateMemTemp(QualType T, const Twine &Name = "tmp"); Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp"); /// CreateAggTemp - Create a temporary memory object for the given /// aggregate type. AggValueSlot CreateAggTemp(QualType T, const Twine &Name = "tmp") { return AggValueSlot::forAddr(CreateMemTemp(T, Name), T.getQualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased); } /// Emit a cast to void* in the appropriate address space. llvm::Value *EmitCastToVoidPtr(llvm::Value *value); /// EvaluateExprAsBool - Perform the usual unary conversions on the specified /// expression and compare the result against zero, returning an Int1Ty value. llvm::Value *EvaluateExprAsBool(const Expr *E); /// EmitIgnoredExpr - Emit an expression in a context which ignores the result. void EmitIgnoredExpr(const Expr *E); /// EmitAnyExpr - Emit code to compute the specified expression which can have /// any type. The result is returned as an RValue struct. If this is an /// aggregate expression, the aggloc/agglocvolatile arguments indicate where /// the result should be returned. /// /// \param ignoreResult True if the resulting value isn't used. RValue EmitAnyExpr(const Expr *E, AggValueSlot aggSlot = AggValueSlot::ignored(), bool ignoreResult = false); // EmitVAListRef - Emit a "reference" to a va_list; this is either the address // or the value of the expression, depending on how va_list is defined. Address EmitVAListRef(const Expr *E); /// Emit a "reference" to a __builtin_ms_va_list; this is /// always the value of the expression, because a __builtin_ms_va_list is a /// pointer to a char. Address EmitMSVAListRef(const Expr *E); /// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will /// always be accessible even if no aggregate location is provided. RValue EmitAnyExprToTemp(const Expr *E); /// EmitAnyExprToMem - Emits the code necessary to evaluate an /// arbitrary expression into the given memory location. void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer); void EmitAnyExprToExn(const Expr *E, Address Addr); /// EmitExprAsInit - Emits the code necessary to initialize a /// location in memory with the given initializer. void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit); /// hasVolatileMember - returns true if aggregate type has a volatile /// member. bool hasVolatileMember(QualType T) { if (const RecordType *RT = T->getAs()) { const RecordDecl *RD = cast(RT->getDecl()); return RD->hasVolatileMember(); } return false; } /// EmitAggregateCopy - Emit an aggregate assignment. /// /// The difference to EmitAggregateCopy is that tail padding is not copied. /// This is required for correctness when assigning non-POD structures in C++. void EmitAggregateAssign(Address DestPtr, Address SrcPtr, QualType EltTy) { bool IsVolatile = hasVolatileMember(EltTy); EmitAggregateCopy(DestPtr, SrcPtr, EltTy, IsVolatile, true); } void EmitAggregateCopyCtor(Address DestPtr, Address SrcPtr, QualType DestTy, QualType SrcTy) { EmitAggregateCopy(DestPtr, SrcPtr, SrcTy, /*IsVolatile=*/false, /*IsAssignment=*/false); } /// EmitAggregateCopy - Emit an aggregate copy. /// /// \param isVolatile - True iff either the source or the destination is /// volatile. /// \param isAssignment - If false, allow padding to be copied. This often /// yields more efficient. void EmitAggregateCopy(Address DestPtr, Address SrcPtr, QualType EltTy, bool isVolatile=false, bool isAssignment = false); /// GetAddrOfLocalVar - Return the address of a local variable. Address GetAddrOfLocalVar(const VarDecl *VD) { auto it = LocalDeclMap.find(VD); assert(it != LocalDeclMap.end() && "Invalid argument to GetAddrOfLocalVar(), no decl!"); return it->second; } /// getOpaqueLValueMapping - Given an opaque value expression (which /// must be mapped to an l-value), return its mapping. const LValue &getOpaqueLValueMapping(const OpaqueValueExpr *e) { assert(OpaqueValueMapping::shouldBindAsLValue(e)); llvm::DenseMap::iterator it = OpaqueLValues.find(e); assert(it != OpaqueLValues.end() && "no mapping for opaque value!"); return it->second; } /// getOpaqueRValueMapping - Given an opaque value expression (which /// must be mapped to an r-value), return its mapping. const RValue &getOpaqueRValueMapping(const OpaqueValueExpr *e) { assert(!OpaqueValueMapping::shouldBindAsLValue(e)); llvm::DenseMap::iterator it = OpaqueRValues.find(e); assert(it != OpaqueRValues.end() && "no mapping for opaque value!"); return it->second; } /// getAccessedFieldNo - Given an encoded value and a result number, return /// the input field number being accessed. static unsigned getAccessedFieldNo(unsigned Idx, const llvm::Constant *Elts); llvm::BlockAddress *GetAddrOfLabel(const LabelDecl *L); llvm::BasicBlock *GetIndirectGotoBlock(); /// EmitNullInitialization - Generate code to set a value of the given type to /// null, If the type contains data member pointers, they will be initialized /// to -1 in accordance with the Itanium C++ ABI. void EmitNullInitialization(Address DestPtr, QualType Ty); /// Emits a call to an LLVM variable-argument intrinsic, either /// \c llvm.va_start or \c llvm.va_end. /// \param ArgValue A reference to the \c va_list as emitted by either /// \c EmitVAListRef or \c EmitMSVAListRef. /// \param IsStart If \c true, emits a call to \c llvm.va_start; otherwise, /// calls \c llvm.va_end. llvm::Value *EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart); /// Generate code to get an argument from the passed in pointer /// and update it accordingly. /// \param VE The \c VAArgExpr for which to generate code. /// \param VAListAddr Receives a reference to the \c va_list as emitted by /// either \c EmitVAListRef or \c EmitMSVAListRef. /// \returns A pointer to the argument. // FIXME: We should be able to get rid of this method and use the va_arg // instruction in LLVM instead once it works well enough. Address EmitVAArg(VAArgExpr *VE, Address &VAListAddr); /// emitArrayLength - Compute the length of an array, even if it's a /// VLA, and drill down to the base element type. llvm::Value *emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr); /// EmitVLASize - Capture all the sizes for the VLA expressions in /// the given variably-modified type and store them in the VLASizeMap. /// /// This function can be called with a null (unreachable) insert point. void EmitVariablyModifiedType(QualType Ty); /// getVLASize - Returns an LLVM value that corresponds to the size, /// in non-variably-sized elements, of a variable length array type, /// plus that largest non-variably-sized element type. Assumes that /// the type has already been emitted with EmitVariablyModifiedType. std::pair getVLASize(const VariableArrayType *vla); std::pair getVLASize(QualType vla); /// LoadCXXThis - Load the value of 'this'. This function is only valid while /// generating code for an C++ member function. llvm::Value *LoadCXXThis() { assert(CXXThisValue && "no 'this' value for this function"); return CXXThisValue; } Address LoadCXXThisAddress(); /// LoadCXXVTT - Load the VTT parameter to base constructors/destructors have /// virtual bases. // FIXME: Every place that calls LoadCXXVTT is something // that needs to be abstracted properly. llvm::Value *LoadCXXVTT() { assert(CXXStructorImplicitParamValue && "no VTT value for this function"); return CXXStructorImplicitParamValue; } /// GetAddressOfBaseOfCompleteClass - Convert the given pointer to a /// complete class to the given direct base. Address GetAddressOfDirectBaseInCompleteClass(Address Value, const CXXRecordDecl *Derived, const CXXRecordDecl *Base, bool BaseIsVirtual); static bool ShouldNullCheckClassCastValue(const CastExpr *Cast); /// GetAddressOfBaseClass - This function will add the necessary delta to the /// load of 'this' and returns address of the base class. Address GetAddressOfBaseClass(Address Value, const CXXRecordDecl *Derived, CastExpr::path_const_iterator PathBegin, CastExpr::path_const_iterator PathEnd, bool NullCheckValue, SourceLocation Loc); Address GetAddressOfDerivedClass(Address Value, const CXXRecordDecl *Derived, CastExpr::path_const_iterator PathBegin, CastExpr::path_const_iterator PathEnd, bool NullCheckValue); /// GetVTTParameter - Return the VTT parameter that should be passed to a /// base constructor/destructor with virtual bases. /// FIXME: VTTs are Itanium ABI-specific, so the definition should move /// to ItaniumCXXABI.cpp together with all the references to VTT. llvm::Value *GetVTTParameter(GlobalDecl GD, bool ForVirtualBase, bool Delegating); void EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor, CXXCtorType CtorType, const FunctionArgList &Args, SourceLocation Loc); // It's important not to confuse this and the previous function. Delegating // constructors are the C++0x feature. The constructor delegate optimization // is used to reduce duplication in the base and complete consturctors where // they are substantially the same. void EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor, const FunctionArgList &Args); void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, Address This, const CXXConstructExpr *E); /// Emit assumption load for all bases. Requires to be be called only on /// most-derived class and not under construction of the object. void EmitVTableAssumptionLoads(const CXXRecordDecl *ClassDecl, Address This); /// Emit assumption that vptr load == global vtable. void EmitVTableAssumptionLoad(const VPtr &vptr, Address This); void EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D, Address This, Address Src, const CXXConstructExpr *E); void EmitCXXAggrConstructorCall(const CXXConstructorDecl *D, const ConstantArrayType *ArrayTy, Address ArrayPtr, const CXXConstructExpr *E, bool ZeroInitialization = false); void EmitCXXAggrConstructorCall(const CXXConstructorDecl *D, llvm::Value *NumElements, Address ArrayPtr, const CXXConstructExpr *E, bool ZeroInitialization = false); static Destroyer destroyCXXObject; void EmitCXXDestructorCall(const CXXDestructorDecl *D, CXXDtorType Type, bool ForVirtualBase, bool Delegating, Address This); void EmitNewArrayInitializer(const CXXNewExpr *E, QualType elementType, llvm::Type *ElementTy, Address NewPtr, llvm::Value *NumElements, llvm::Value *AllocSizeWithoutCookie); void EmitCXXTemporary(const CXXTemporary *Temporary, QualType TempType, Address Ptr); llvm::Value *EmitLifetimeStart(uint64_t Size, llvm::Value *Addr); void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr); llvm::Value *EmitCXXNewExpr(const CXXNewExpr *E); void EmitCXXDeleteExpr(const CXXDeleteExpr *E); void EmitDeleteCall(const FunctionDecl *DeleteFD, llvm::Value *Ptr, QualType DeleteTy); RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const Expr *Arg, bool IsDelete); llvm::Value *EmitCXXTypeidExpr(const CXXTypeidExpr *E); llvm::Value *EmitDynamicCast(Address V, const CXXDynamicCastExpr *DCE); Address EmitCXXUuidofExpr(const CXXUuidofExpr *E); /// \brief Situations in which we might emit a check for the suitability of a /// pointer or glvalue. enum TypeCheckKind { /// Checking the operand of a load. Must be suitably sized and aligned. TCK_Load, /// Checking the destination of a store. Must be suitably sized and aligned. TCK_Store, /// Checking the bound value in a reference binding. Must be suitably sized /// and aligned, but is not required to refer to an object (until the /// reference is used), per core issue 453. TCK_ReferenceBinding, /// Checking the object expression in a non-static data member access. Must /// be an object within its lifetime. TCK_MemberAccess, /// Checking the 'this' pointer for a call to a non-static member function. /// Must be an object within its lifetime. TCK_MemberCall, /// Checking the 'this' pointer for a constructor call. TCK_ConstructorCall, /// Checking the operand of a static_cast to a derived pointer type. Must be /// null or an object within its lifetime. TCK_DowncastPointer, /// Checking the operand of a static_cast to a derived reference type. Must /// be an object within its lifetime. TCK_DowncastReference, /// Checking the operand of a cast to a base object. Must be suitably sized /// and aligned. TCK_Upcast, /// Checking the operand of a cast to a virtual base object. Must be an /// object within its lifetime. TCK_UpcastToVirtualBase }; /// \brief Whether any type-checking sanitizers are enabled. If \c false, /// calls to EmitTypeCheck can be skipped. bool sanitizePerformTypeCheck() const; /// \brief Emit a check that \p V is the address of storage of the /// appropriate size and alignment for an object of type \p Type. void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V, QualType Type, CharUnits Alignment = CharUnits::Zero(), bool SkipNullCheck = false); /// \brief Emit a check that \p Base points into an array object, which /// we can access at index \p Index. \p Accessed should be \c false if we /// this expression is used as an lvalue, for instance in "&Arr[Idx]". void EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index, QualType IndexType, bool Accessed); llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre); ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre); void EmitAlignmentAssumption(llvm::Value *PtrValue, unsigned Alignment, llvm::Value *OffsetValue = nullptr) { Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment, OffsetValue); } //===--------------------------------------------------------------------===// // Declaration Emission //===--------------------------------------------------------------------===// /// EmitDecl - Emit a declaration. /// /// This function can be called with a null (unreachable) insert point. void EmitDecl(const Decl &D); /// EmitVarDecl - Emit a local variable declaration. /// /// This function can be called with a null (unreachable) insert point. void EmitVarDecl(const VarDecl &D); void EmitScalarInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit); void EmitScalarInit(llvm::Value *init, LValue lvalue); typedef void SpecialInitFn(CodeGenFunction &Init, const VarDecl &D, llvm::Value *Address); /// \brief Determine whether the given initializer is trivial in the sense /// that it requires no code to be generated. bool isTrivialInitializer(const Expr *Init); /// EmitAutoVarDecl - Emit an auto variable declaration. /// /// This function can be called with a null (unreachable) insert point. void EmitAutoVarDecl(const VarDecl &D); class AutoVarEmission { friend class CodeGenFunction; const VarDecl *Variable; /// The address of the alloca. Invalid if the variable was emitted /// as a global constant. Address Addr; llvm::Value *NRVOFlag; /// True if the variable is a __block variable. bool IsByRef; /// True if the variable is of aggregate type and has a constant /// initializer. bool IsConstantAggregate; /// Non-null if we should use lifetime annotations. llvm::Value *SizeForLifetimeMarkers; struct Invalid {}; AutoVarEmission(Invalid) : Variable(nullptr), Addr(Address::invalid()) {} AutoVarEmission(const VarDecl &variable) : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr), IsByRef(false), IsConstantAggregate(false), SizeForLifetimeMarkers(nullptr) {} bool wasEmittedAsGlobal() const { return !Addr.isValid(); } public: static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); } bool useLifetimeMarkers() const { return SizeForLifetimeMarkers != nullptr; } llvm::Value *getSizeForLifetimeMarkers() const { assert(useLifetimeMarkers()); return SizeForLifetimeMarkers; } /// Returns the raw, allocated address, which is not necessarily /// the address of the object itself. Address getAllocatedAddress() const { return Addr; } /// Returns the address of the object within this declaration. /// Note that this does not chase the forwarding pointer for /// __block decls. Address getObjectAddress(CodeGenFunction &CGF) const { if (!IsByRef) return Addr; return CGF.emitBlockByrefAddress(Addr, Variable, /*forward*/ false); } }; AutoVarEmission EmitAutoVarAlloca(const VarDecl &var); void EmitAutoVarInit(const AutoVarEmission &emission); void EmitAutoVarCleanups(const AutoVarEmission &emission); void emitAutoVarTypeCleanup(const AutoVarEmission &emission, QualType::DestructionKind dtorKind); void EmitStaticVarDecl(const VarDecl &D, llvm::GlobalValue::LinkageTypes Linkage); class ParamValue { llvm::Value *Value; unsigned Alignment; ParamValue(llvm::Value *V, unsigned A) : Value(V), Alignment(A) {} public: static ParamValue forDirect(llvm::Value *value) { return ParamValue(value, 0); } static ParamValue forIndirect(Address addr) { assert(!addr.getAlignment().isZero()); return ParamValue(addr.getPointer(), addr.getAlignment().getQuantity()); } bool isIndirect() const { return Alignment != 0; } llvm::Value *getAnyValue() const { return Value; } llvm::Value *getDirectValue() const { assert(!isIndirect()); return Value; } Address getIndirectAddress() const { assert(isIndirect()); return Address(Value, CharUnits::fromQuantity(Alignment)); } }; /// EmitParmDecl - Emit a ParmVarDecl or an ImplicitParamDecl. void EmitParmDecl(const VarDecl &D, ParamValue Arg, unsigned ArgNo); /// protectFromPeepholes - Protect a value that we're intending to /// store to the side, but which will probably be used later, from /// aggressive peepholing optimizations that might delete it. /// /// Pass the result to unprotectFromPeepholes to declare that /// protection is no longer required. /// /// There's no particular reason why this shouldn't apply to /// l-values, it's just that no existing peepholes work on pointers. PeepholeProtection protectFromPeepholes(RValue rvalue); void unprotectFromPeepholes(PeepholeProtection protection); //===--------------------------------------------------------------------===// // Statement Emission //===--------------------------------------------------------------------===// /// EmitStopPoint - Emit a debug stoppoint if we are emitting debug info. void EmitStopPoint(const Stmt *S); /// EmitStmt - Emit the code for the statement \arg S. It is legal to call /// this function even if there is no current insertion point. /// /// This function may clear the current insertion point; callers should use /// EnsureInsertPoint if they wish to subsequently generate code without first /// calling EmitBlock, EmitBranch, or EmitStmt. void EmitStmt(const Stmt *S); /// EmitSimpleStmt - Try to emit a "simple" statement which does not /// necessarily require an insertion point or debug information; typically /// because the statement amounts to a jump or a container of other /// statements. /// /// \return True if the statement was handled. bool EmitSimpleStmt(const Stmt *S); Address EmitCompoundStmt(const CompoundStmt &S, bool GetLast = false, AggValueSlot AVS = AggValueSlot::ignored()); Address EmitCompoundStmtWithoutScope(const CompoundStmt &S, bool GetLast = false, AggValueSlot AVS = AggValueSlot::ignored()); /// EmitLabel - Emit the block for the given label. It is legal to call this /// function even if there is no current insertion point. void EmitLabel(const LabelDecl *D); // helper for EmitLabelStmt. void EmitLabelStmt(const LabelStmt &S); void EmitAttributedStmt(const AttributedStmt &S); void EmitGotoStmt(const GotoStmt &S); void EmitIndirectGotoStmt(const IndirectGotoStmt &S); void EmitIfStmt(const IfStmt &S); void EmitWhileStmt(const WhileStmt &S, ArrayRef Attrs = None); void EmitDoStmt(const DoStmt &S, ArrayRef Attrs = None); void EmitForStmt(const ForStmt &S, ArrayRef Attrs = None); void EmitReturnStmt(const ReturnStmt &S); void EmitDeclStmt(const DeclStmt &S); void EmitBreakStmt(const BreakStmt &S); void EmitContinueStmt(const ContinueStmt &S); void EmitSwitchStmt(const SwitchStmt &S); void EmitDefaultStmt(const DefaultStmt &S); void EmitCaseStmt(const CaseStmt &S); void EmitCaseStmtRange(const CaseStmt &S); void EmitAsmStmt(const AsmStmt &S); void EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S); void EmitObjCAtTryStmt(const ObjCAtTryStmt &S); void EmitObjCAtThrowStmt(const ObjCAtThrowStmt &S); void EmitObjCAtSynchronizedStmt(const ObjCAtSynchronizedStmt &S); void EmitObjCAutoreleasePoolStmt(const ObjCAutoreleasePoolStmt &S); void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false); void ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false); void EmitCXXTryStmt(const CXXTryStmt &S); void EmitSEHTryStmt(const SEHTryStmt &S); void EmitSEHLeaveStmt(const SEHLeaveStmt &S); void EnterSEHTryStmt(const SEHTryStmt &S); void ExitSEHTryStmt(const SEHTryStmt &S); void startOutlinedSEHHelper(CodeGenFunction &ParentCGF, bool IsFilter, const Stmt *OutlinedStmt); llvm::Function *GenerateSEHFilterFunction(CodeGenFunction &ParentCGF, const SEHExceptStmt &Except); llvm::Function *GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF, const SEHFinallyStmt &Finally); void EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF, llvm::Value *ParentFP, llvm::Value *EntryEBP); llvm::Value *EmitSEHExceptionCode(); llvm::Value *EmitSEHExceptionInfo(); llvm::Value *EmitSEHAbnormalTermination(); /// Scan the outlined statement for captures from the parent function. For /// each capture, mark the capture as escaped and emit a call to /// llvm.localrecover. Insert the localrecover result into the LocalDeclMap. void EmitCapturedLocals(CodeGenFunction &ParentCGF, const Stmt *OutlinedStmt, bool IsFilter); /// Recovers the address of a local in a parent function. ParentVar is the /// address of the variable used in the immediate parent function. It can /// either be an alloca or a call to llvm.localrecover if there are nested /// outlined functions. ParentFP is the frame pointer of the outermost parent /// frame. Address recoverAddrOfEscapedLocal(CodeGenFunction &ParentCGF, Address ParentVar, llvm::Value *ParentFP); void EmitCXXForRangeStmt(const CXXForRangeStmt &S, ArrayRef Attrs = None); LValue InitCapturedStruct(const CapturedStmt &S); llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl &CapturedVars); + void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, + SourceLocation Loc); /// \brief Perform element by element copying of arrays with type \a /// OriginalType from \a SrcAddr to \a DestAddr using copying procedure /// generated by \a CopyGen. /// /// \param DestAddr Address of the destination array. /// \param SrcAddr Address of the source array. /// \param OriginalType Type of destination and source arrays. /// \param CopyGen Copying procedure that copies value of single array element /// to another single array element. void EmitOMPAggregateAssign( Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref &CopyGen); /// \brief Emit proper copying of data from one variable to another. /// /// \param OriginalType Original type of the copied variables. /// \param DestAddr Destination address. /// \param SrcAddr Source address. /// \param DestVD Destination variable used in \a CopyExpr (for arrays, has /// type of the base array element). /// \param SrcVD Source variable used in \a CopyExpr (for arrays, has type of /// the base array element). /// \param Copy Actual copygin expression for copying data from \a SrcVD to \a /// DestVD. void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy); /// \brief Emit atomic update code for constructs: \a X = \a X \a BO \a E or /// \a X = \a E \a BO \a E. /// /// \param X Value to be updated. /// \param E Update value. /// \param BO Binary operation for update operation. /// \param IsXLHSInRHSPart true if \a X is LHS in RHS part of the update /// expression, false otherwise. /// \param AO Atomic ordering of the generated atomic instructions. /// \param CommonGen Code generator for complex expressions that cannot be /// expressed through atomicrmw instruction. /// \returns if simple 'atomicrmw' instruction was /// generated, otherwise. std::pair EmitOMPAtomicSimpleUpdateExpr( LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, llvm::AtomicOrdering AO, SourceLocation Loc, const llvm::function_ref &CommonGen); bool EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); void EmitOMPPrivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); /// \brief Emit code for copyin clause in \a D directive. The next code is /// generated at the start of outlined functions for directives: /// \code /// threadprivate_var1 = master_threadprivate_var1; /// operator=(threadprivate_var2, master_threadprivate_var2); /// ... /// __kmpc_barrier(&loc, global_tid); /// \endcode /// /// \param D OpenMP directive possibly with 'copyin' clause(s). /// \returns true if at least one copyin variable is found, false otherwise. bool EmitOMPCopyinClause(const OMPExecutableDirective &D); /// \brief Emit initial code for lastprivate variables. If some variable is /// not also firstprivate, then the default initialization is used. Otherwise /// initialization of this variable is performed by EmitOMPFirstprivateClause /// method. /// /// \param D Directive that may have 'lastprivate' directives. /// \param PrivateScope Private scope for capturing lastprivate variables for /// proper codegen in internal captured statement. /// /// \returns true if there is at least one lastprivate variable, false /// otherwise. bool EmitOMPLastprivateClauseInit(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); /// \brief Emit final copying of lastprivate values to original variables at /// the end of the worksharing or simd directive. /// /// \param D Directive that has at least one 'lastprivate' directives. /// \param IsLastIterCond Boolean condition that must be set to 'i1 true' if /// it is the last iteration of the loop code in associated directive, or to /// 'i1 false' otherwise. If this item is nullptr, no final check is required. void EmitOMPLastprivateClauseFinal(const OMPExecutableDirective &D, llvm::Value *IsLastIterCond = nullptr); /// \brief Emit initial code for reduction variables. Creates reduction copies /// and initializes them with the values according to OpenMP standard. /// /// \param D Directive (possibly) with the 'reduction' clause. /// \param PrivateScope Private scope for capturing reduction variables for /// proper codegen in internal captured statement. /// void EmitOMPReductionClauseInit(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); /// \brief Emit final update of reduction values to original variables at /// the end of the directive. /// /// \param D Directive that has at least one 'reduction' directives. void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D); /// \brief Emit initial code for linear variables. Creates private copies /// and initializes them with the values according to OpenMP standard. /// /// \param D Directive (possibly) with the 'linear' clause. void EmitOMPLinearClauseInit(const OMPLoopDirective &D); void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPSectionsDirective(const OMPSectionsDirective &S); void EmitOMPSectionDirective(const OMPSectionDirective &S); void EmitOMPSingleDirective(const OMPSingleDirective &S); void EmitOMPMasterDirective(const OMPMasterDirective &S); void EmitOMPCriticalDirective(const OMPCriticalDirective &S); void EmitOMPParallelForDirective(const OMPParallelForDirective &S); void EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective &S); void EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective &S); void EmitOMPTaskDirective(const OMPTaskDirective &S); void EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &S); void EmitOMPBarrierDirective(const OMPBarrierDirective &S); void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S); void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S); void EmitOMPFlushDirective(const OMPFlushDirective &S); void EmitOMPOrderedDirective(const OMPOrderedDirective &S); void EmitOMPAtomicDirective(const OMPAtomicDirective &S); void EmitOMPTargetDirective(const OMPTargetDirective &S); void EmitOMPTargetDataDirective(const OMPTargetDataDirective &S); void EmitOMPTeamsDirective(const OMPTeamsDirective &S); void EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S); void EmitOMPCancelDirective(const OMPCancelDirective &S); void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); /// \brief Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. /// \param RequiresCleanup true, if directive has some associated private /// variables. /// \param LoopCond Bollean condition for loop continuation. /// \param IncExpr Increment expression for loop control variable. /// \param BodyGen Generator for the inner body of the inner loop. /// \param PostIncGen Genrator for post-increment code (required for ordered /// loop directvies). void EmitOMPInnerLoop( const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, const Expr *IncExpr, const llvm::function_ref &BodyGen, const llvm::function_ref &PostIncGen); JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind); private: /// Helpers for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); void EmitOMPSimdFinal(const OMPLoopDirective &D); /// \brief Emit code for the worksharing loop-based directive. /// \return true, if this construct has any lastprivate clause, false - /// otherwise. bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); /// \brief Emit code for sections directive. OpenMPDirectiveKind EmitSections(const OMPExecutableDirective &S); public: //===--------------------------------------------------------------------===// // LValue Expression Emission //===--------------------------------------------------------------------===// /// GetUndefRValue - Get an appropriate 'undef' rvalue for the given type. RValue GetUndefRValue(QualType Ty); /// EmitUnsupportedRValue - Emit a dummy r-value using the type of E /// and issue an ErrorUnsupported style diagnostic (using the /// provided Name). RValue EmitUnsupportedRValue(const Expr *E, const char *Name); /// EmitUnsupportedLValue - Emit a dummy l-value using the type of E and issue /// an ErrorUnsupported style diagnostic (using the provided Name). LValue EmitUnsupportedLValue(const Expr *E, const char *Name); /// EmitLValue - Emit code to compute a designator that specifies the location /// of the expression. /// /// This can return one of two things: a simple address or a bitfield /// reference. In either case, the LLVM Value* in the LValue structure is /// guaranteed to be an LLVM pointer type. /// /// If this returns a bitfield reference, nothing about the pointee type of /// the LLVM value is known: For example, it may not be a pointer to an /// integer. /// /// If this returns a normal address, and if the lvalue's C type is fixed /// size, this method guarantees that the returned pointer type will point to /// an LLVM type of the same size of the lvalue's type. If the lvalue has a /// variable length type, this is not possible. /// LValue EmitLValue(const Expr *E); /// \brief Same as EmitLValue but additionally we generate checking code to /// guard against undefined behavior. This is only suitable when we know /// that the address will be used to access the object. LValue EmitCheckedLValue(const Expr *E, TypeCheckKind TCK); RValue convertTempToRValue(Address addr, QualType type, SourceLocation Loc); void EmitAtomicInit(Expr *E, LValue lvalue); bool LValueIsSuitableForInlineAtomic(LValue Src); bool typeIsSuitableForInlineAtomic(QualType Ty, bool IsVolatile) const; RValue EmitAtomicLoad(LValue LV, SourceLocation SL, AggValueSlot Slot = AggValueSlot::ignored()); RValue EmitAtomicLoad(LValue lvalue, SourceLocation loc, llvm::AtomicOrdering AO, bool IsVolatile = false, AggValueSlot slot = AggValueSlot::ignored()); void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit); void EmitAtomicStore(RValue rvalue, LValue lvalue, llvm::AtomicOrdering AO, bool IsVolatile, bool isInit); std::pair EmitAtomicCompareExchange( LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent, bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored()); void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO, const llvm::function_ref &UpdateOp, bool IsVolatile); /// EmitToMemory - Change a scalar value from its value /// representation to its in-memory representation. llvm::Value *EmitToMemory(llvm::Value *Value, QualType Ty); /// EmitFromMemory - Change a scalar value from its memory /// representation to its value representation. llvm::Value *EmitFromMemory(llvm::Value *Value, QualType Ty); /// EmitLoadOfScalar - Load a scalar value from an address, taking /// care to appropriately convert from the memory representation to /// the LLVM value representation. llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource AlignSource = AlignmentSource::Type, llvm::MDNode *TBAAInfo = nullptr, QualType TBAABaseTy = QualType(), uint64_t TBAAOffset = 0, bool isNontemporal = false); /// EmitLoadOfScalar - Load a scalar value from an address, taking /// care to appropriately convert from the memory representation to /// the LLVM value representation. The l-value must be a simple /// l-value. llvm::Value *EmitLoadOfScalar(LValue lvalue, SourceLocation Loc); /// EmitStoreOfScalar - Store a scalar value to an address, taking /// care to appropriately convert from the memory representation to /// the LLVM value representation. void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource AlignSource = AlignmentSource::Type, llvm::MDNode *TBAAInfo = nullptr, bool isInit = false, QualType TBAABaseTy = QualType(), uint64_t TBAAOffset = 0, bool isNontemporal = false); /// EmitStoreOfScalar - Store a scalar value to an address, taking /// care to appropriately convert from the memory representation to /// the LLVM value representation. The l-value must be a simple /// l-value. The isInit flag indicates whether this is an initialization. /// If so, atomic qualifiers are ignored and the store is always non-atomic. void EmitStoreOfScalar(llvm::Value *value, LValue lvalue, bool isInit=false); /// EmitLoadOfLValue - Given an expression that represents a value lvalue, /// this method emits the address of the lvalue, then loads the result as an /// rvalue, returning the rvalue. RValue EmitLoadOfLValue(LValue V, SourceLocation Loc); RValue EmitLoadOfExtVectorElementLValue(LValue V); RValue EmitLoadOfBitfieldLValue(LValue LV); RValue EmitLoadOfGlobalRegLValue(LValue LV); /// EmitStoreThroughLValue - Store the specified rvalue into the specified /// lvalue, where both are guaranteed to the have the same type, and that type /// is 'Ty'. void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit = false); void EmitStoreThroughExtVectorComponentLValue(RValue Src, LValue Dst); void EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst); /// EmitStoreThroughBitfieldLValue - Store Src into Dst with same constraints /// as EmitStoreThroughLValue. /// /// \param Result [out] - If non-null, this will be set to a Value* for the /// bit-field contents after the store, appropriate for use as the result of /// an assignment to the bit-field. void EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, llvm::Value **Result=nullptr); /// Emit an l-value for an assignment (simple or compound) of complex type. LValue EmitComplexAssignmentLValue(const BinaryOperator *E); LValue EmitComplexCompoundAssignmentLValue(const CompoundAssignOperator *E); LValue EmitScalarCompoundAssignWithComplex(const CompoundAssignOperator *E, llvm::Value *&Result); // Note: only available for agg return types LValue EmitBinaryOperatorLValue(const BinaryOperator *E); LValue EmitCompoundAssignmentLValue(const CompoundAssignOperator *E); // Note: only available for agg return types LValue EmitCallExprLValue(const CallExpr *E); // Note: only available for agg return types LValue EmitVAArgExprLValue(const VAArgExpr *E); LValue EmitDeclRefLValue(const DeclRefExpr *E); LValue EmitStringLiteralLValue(const StringLiteral *E); LValue EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E); LValue EmitPredefinedLValue(const PredefinedExpr *E); LValue EmitUnaryOpLValue(const UnaryOperator *E); LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E, bool Accessed = false); LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound = true); LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E); LValue EmitMemberExpr(const MemberExpr *E); LValue EmitObjCIsaExpr(const ObjCIsaExpr *E); LValue EmitCompoundLiteralLValue(const CompoundLiteralExpr *E); LValue EmitInitListLValue(const InitListExpr *E); LValue EmitConditionalOperatorLValue(const AbstractConditionalOperator *E); LValue EmitCastLValue(const CastExpr *E); LValue EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E); LValue EmitOpaqueValueLValue(const OpaqueValueExpr *e); Address EmitExtVectorElementLValue(LValue V); RValue EmitRValueForField(LValue LV, const FieldDecl *FD, SourceLocation Loc); Address EmitArrayToPointerDecay(const Expr *Array, AlignmentSource *AlignSource = nullptr); class ConstantEmission { llvm::PointerIntPair ValueAndIsReference; ConstantEmission(llvm::Constant *C, bool isReference) : ValueAndIsReference(C, isReference) {} public: ConstantEmission() {} static ConstantEmission forReference(llvm::Constant *C) { return ConstantEmission(C, true); } static ConstantEmission forValue(llvm::Constant *C) { return ConstantEmission(C, false); } explicit operator bool() const { return ValueAndIsReference.getOpaqueValue() != nullptr; } bool isReference() const { return ValueAndIsReference.getInt(); } LValue getReferenceLValue(CodeGenFunction &CGF, Expr *refExpr) const { assert(isReference()); return CGF.MakeNaturalAlignAddrLValue(ValueAndIsReference.getPointer(), refExpr->getType()); } llvm::Constant *getValue() const { assert(!isReference()); return ValueAndIsReference.getPointer(); } }; ConstantEmission tryEmitAsConstant(DeclRefExpr *refExpr); RValue EmitPseudoObjectRValue(const PseudoObjectExpr *e, AggValueSlot slot = AggValueSlot::ignored()); LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e); llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar); LValue EmitLValueForField(LValue Base, const FieldDecl* Field); LValue EmitLValueForLambdaField(const FieldDecl *Field); /// EmitLValueForFieldInitialization - Like EmitLValueForField, except that /// if the Field is a reference, this will return the address of the reference /// and not the address of the value stored in the reference. LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl* Field); LValue EmitLValueForIvar(QualType ObjectTy, llvm::Value* Base, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers); LValue EmitCXXConstructLValue(const CXXConstructExpr *E); LValue EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E); LValue EmitLambdaLValue(const LambdaExpr *E); LValue EmitCXXTypeidLValue(const CXXTypeidExpr *E); LValue EmitCXXUuidofLValue(const CXXUuidofExpr *E); LValue EmitObjCMessageExprLValue(const ObjCMessageExpr *E); LValue EmitObjCIvarRefLValue(const ObjCIvarRefExpr *E); LValue EmitStmtExprLValue(const StmtExpr *E); LValue EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E); LValue EmitObjCSelectorLValue(const ObjCSelectorExpr *E); void EmitDeclRefExprDbgValue(const DeclRefExpr *E, llvm::Constant *Init); //===--------------------------------------------------------------------===// // Scalar Expression Emission //===--------------------------------------------------------------------===// /// EmitCall - Generate a call of the given function, expecting the given /// result type, and using the given argument list which specifies both the /// LLVM arguments and the types they were derived from. RValue EmitCall(const CGFunctionInfo &FnInfo, llvm::Value *Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, CGCalleeInfo CalleeInfo = CGCalleeInfo(), llvm::Instruction **callOrInvoke = nullptr); RValue EmitCall(QualType FnType, llvm::Value *Callee, const CallExpr *E, ReturnValueSlot ReturnValue, CGCalleeInfo CalleeInfo = CGCalleeInfo(), llvm::Value *Chain = nullptr); RValue EmitCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue = ReturnValueSlot()); void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl); llvm::CallInst *EmitRuntimeCall(llvm::Value *callee, const Twine &name = ""); llvm::CallInst *EmitRuntimeCall(llvm::Value *callee, ArrayRef args, const Twine &name = ""); llvm::CallInst *EmitNounwindRuntimeCall(llvm::Value *callee, const Twine &name = ""); llvm::CallInst *EmitNounwindRuntimeCall(llvm::Value *callee, ArrayRef args, const Twine &name = ""); llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee, ArrayRef Args, const Twine &Name = ""); llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee, ArrayRef args, const Twine &name = ""); llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee, const Twine &name = ""); void EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, ArrayRef args); llvm::Value *BuildAppleKextVirtualCall(const CXXMethodDecl *MD, NestedNameSpecifier *Qual, llvm::Type *Ty); llvm::Value *BuildAppleKextVirtualDestructorCall(const CXXDestructorDecl *DD, CXXDtorType Type, const CXXRecordDecl *RD); RValue EmitCXXMemberOrOperatorCall(const CXXMethodDecl *MD, llvm::Value *Callee, ReturnValueSlot ReturnValue, llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *E); RValue EmitCXXStructorCall(const CXXMethodDecl *MD, llvm::Value *Callee, ReturnValueSlot ReturnValue, llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *E, StructorType Type); RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E, ReturnValueSlot ReturnValue); RValue EmitCXXMemberOrOperatorMemberCallExpr(const CallExpr *CE, const CXXMethodDecl *MD, ReturnValueSlot ReturnValue, bool HasQualifier, NestedNameSpecifier *Qualifier, bool IsArrow, const Expr *Base); // Compute the object pointer. Address EmitCXXMemberDataPointerAddress(const Expr *E, Address base, llvm::Value *memberPtr, const MemberPointerType *memberPtrType, AlignmentSource *AlignSource = nullptr); RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, ReturnValueSlot ReturnValue); RValue EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E, const CXXMethodDecl *MD, ReturnValueSlot ReturnValue); RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E, ReturnValueSlot ReturnValue); RValue EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); RValue EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue); /// EmitTargetBuiltinExpr - Emit the given builtin call. Returns 0 if the call /// is unhandled by the current target. llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name = ""); llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl &Ops, Address PtrOp0, Address PtrOp1); llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E); llvm::Value *EmitNeonCall(llvm::Function *F, SmallVectorImpl &O, const char *name, unsigned shift = 0, bool rightshift = false); llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx); llvm::Value *EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift); llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name); llvm::Value *vectorWrapScalar16(llvm::Value *Op); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *BuildVector(ArrayRef Ops); llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E); llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E); llvm::Value *EmitObjCBoxedExpr(const ObjCBoxedExpr *E); llvm::Value *EmitObjCArrayLiteral(const ObjCArrayLiteral *E); llvm::Value *EmitObjCDictionaryLiteral(const ObjCDictionaryLiteral *E); llvm::Value *EmitObjCCollectionLiteral(const Expr *E, const ObjCMethodDecl *MethodWithObjects); llvm::Value *EmitObjCSelectorExpr(const ObjCSelectorExpr *E); RValue EmitObjCMessageExpr(const ObjCMessageExpr *E, ReturnValueSlot Return = ReturnValueSlot()); /// Retrieves the default cleanup kind for an ARC cleanup. /// Except under -fobjc-arc-eh, ARC cleanups are normal-only. CleanupKind getARCCleanupKind() { return CGM.getCodeGenOpts().ObjCAutoRefCountExceptions ? NormalAndEHCleanup : NormalCleanup; } // ARC primitives. void EmitARCInitWeak(Address addr, llvm::Value *value); void EmitARCDestroyWeak(Address addr); llvm::Value *EmitARCLoadWeak(Address addr); llvm::Value *EmitARCLoadWeakRetained(Address addr); llvm::Value *EmitARCStoreWeak(Address addr, llvm::Value *value, bool ignored); void EmitARCCopyWeak(Address dst, Address src); void EmitARCMoveWeak(Address dst, Address src); llvm::Value *EmitARCRetainAutorelease(QualType type, llvm::Value *value); llvm::Value *EmitARCRetainAutoreleaseNonBlock(llvm::Value *value); llvm::Value *EmitARCStoreStrong(LValue lvalue, llvm::Value *value, bool resultIgnored); llvm::Value *EmitARCStoreStrongCall(Address addr, llvm::Value *value, bool resultIgnored); llvm::Value *EmitARCRetain(QualType type, llvm::Value *value); llvm::Value *EmitARCRetainNonBlock(llvm::Value *value); llvm::Value *EmitARCRetainBlock(llvm::Value *value, bool mandatory); void EmitARCDestroyStrong(Address addr, ARCPreciseLifetime_t precise); void EmitARCRelease(llvm::Value *value, ARCPreciseLifetime_t precise); llvm::Value *EmitARCAutorelease(llvm::Value *value); llvm::Value *EmitARCAutoreleaseReturnValue(llvm::Value *value); llvm::Value *EmitARCRetainAutoreleaseReturnValue(llvm::Value *value); llvm::Value *EmitARCRetainAutoreleasedReturnValue(llvm::Value *value); std::pair EmitARCStoreAutoreleasing(const BinaryOperator *e); std::pair EmitARCStoreStrong(const BinaryOperator *e, bool ignored); llvm::Value *EmitObjCThrowOperand(const Expr *expr); llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr); llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr); llvm::Value *EmitARCExtendBlockObject(const Expr *expr); llvm::Value *EmitARCRetainScalarExpr(const Expr *expr); llvm::Value *EmitARCRetainAutoreleaseScalarExpr(const Expr *expr); void EmitARCIntrinsicUse(ArrayRef values); static Destroyer destroyARCStrongImprecise; static Destroyer destroyARCStrongPrecise; static Destroyer destroyARCWeak; void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr); llvm::Value *EmitObjCAutoreleasePoolPush(); llvm::Value *EmitObjCMRRAutoreleasePoolPush(); void EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr); void EmitObjCMRRAutoreleasePoolPop(llvm::Value *Ptr); /// \brief Emits a reference binding to the passed in expression. RValue EmitReferenceBindingToExpr(const Expr *E); //===--------------------------------------------------------------------===// // Expression Emission //===--------------------------------------------------------------------===// // Expressions are broken into three classes: scalar, complex, aggregate. /// EmitScalarExpr - Emit the computation of the specified expression of LLVM /// scalar type, returning the result. llvm::Value *EmitScalarExpr(const Expr *E , bool IgnoreResultAssign = false); /// Emit a conversion from the specified type to the specified destination /// type, both of which are LLVM scalar types. llvm::Value *EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc); /// Emit a conversion from the specified complex type to the specified /// destination type, where the destination type is an LLVM scalar type. llvm::Value *EmitComplexToScalarConversion(ComplexPairTy Src, QualType SrcTy, QualType DstTy, SourceLocation Loc); /// EmitAggExpr - Emit the computation of the specified expression /// of aggregate type. The result is computed into the given slot, /// which may be null to indicate that the value is not needed. void EmitAggExpr(const Expr *E, AggValueSlot AS); /// EmitAggExprToLValue - Emit the computation of the specified expression of /// aggregate type into a temporary LValue. LValue EmitAggExprToLValue(const Expr *E); /// EmitExtendGCLifetime - Given a pointer to an Objective-C object, /// make sure it survives garbage collection until this point. void EmitExtendGCLifetime(llvm::Value *object); /// EmitComplexExpr - Emit the computation of the specified expression of /// complex type, returning the result. ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal = false, bool IgnoreImag = false); /// EmitComplexExprIntoLValue - Emit the given expression of complex /// type and place its result into the specified l-value. void EmitComplexExprIntoLValue(const Expr *E, LValue dest, bool isInit); /// EmitStoreOfComplex - Store a complex number into the specified l-value. void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit); /// EmitLoadOfComplex - Load a complex number from the specified l-value. ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc); Address emitAddrOfRealComponent(Address complex, QualType complexType); Address emitAddrOfImagComponent(Address complex, QualType complexType); /// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the /// global variable that has already been created for it. If the initializer /// has a different type than GV does, this may free GV and return a different /// one. Otherwise it just returns GV. llvm::GlobalVariable * AddInitializerToStaticVarDecl(const VarDecl &D, llvm::GlobalVariable *GV); /// EmitCXXGlobalVarDeclInit - Create the initializer for a C++ /// variable with global storage. void EmitCXXGlobalVarDeclInit(const VarDecl &D, llvm::Constant *DeclPtr, bool PerformInit); llvm::Constant *createAtExitStub(const VarDecl &VD, llvm::Constant *Dtor, llvm::Constant *Addr); /// Call atexit() with a function that passes the given argument to /// the given function. void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::Constant *fn, llvm::Constant *addr); /// Emit code in this function to perform a guarded variable /// initialization. Guarded initializations are used when it's not /// possible to prove that an initialization will be done exactly /// once, e.g. with a static local variable or a static data member /// of a class template. void EmitCXXGuardedInit(const VarDecl &D, llvm::GlobalVariable *DeclPtr, bool PerformInit); /// GenerateCXXGlobalInitFunc - Generates code for initializing global /// variables. void GenerateCXXGlobalInitFunc(llvm::Function *Fn, ArrayRef CXXThreadLocals, Address Guard = Address::invalid()); /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global /// variables. void GenerateCXXGlobalDtorsFunc(llvm::Function *Fn, const std::vector > &DtorsAndObjects); void GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, const VarDecl *D, llvm::GlobalVariable *Addr, bool PerformInit); void EmitCXXConstructExpr(const CXXConstructExpr *E, AggValueSlot Dest); void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp); void enterFullExpression(const ExprWithCleanups *E) { if (E->getNumObjects() == 0) return; enterNonTrivialFullExpression(E); } void enterNonTrivialFullExpression(const ExprWithCleanups *E); void EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint = true); void EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Dest); RValue EmitAtomicExpr(AtomicExpr *E); //===--------------------------------------------------------------------===// // Annotations Emission //===--------------------------------------------------------------------===// /// Emit an annotation call (intrinsic or builtin). llvm::Value *EmitAnnotationCall(llvm::Value *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location); /// Emit local annotations for the local variable V, declared by D. void EmitVarAnnotations(const VarDecl *D, llvm::Value *V); /// Emit field annotations for the given field & value. Returns the /// annotation result. Address EmitFieldAnnotations(const FieldDecl *D, Address V); //===--------------------------------------------------------------------===// // Internal Helpers //===--------------------------------------------------------------------===// /// ContainsLabel - Return true if the statement contains a label in it. If /// this statement is not executed normally, it not containing a label means /// that we can just remove the code. static bool ContainsLabel(const Stmt *S, bool IgnoreCaseStmts = false); /// containsBreak - Return true if the statement contains a break out of it. /// If the statement (recursively) contains a switch or loop with a break /// inside of it, this is fine. static bool containsBreak(const Stmt *S); /// ConstantFoldsToSimpleInteger - If the specified expression does not fold /// to a constant, or if it does but contains a label, return false. If it /// constant folds return true and set the boolean result in Result. bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result); /// ConstantFoldsToSimpleInteger - If the specified expression does not fold /// to a constant, or if it does but contains a label, return false. If it /// constant folds return true and set the folded value. bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &Result); /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an /// if statement) to the specified blocks. Based on the condition, this might /// try to simplify the codegen of the conditional based on the branch. /// TrueCount should be the number of times we expect the condition to /// evaluate to true based on PGO data. void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount); /// \brief Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. llvm::Constant *EmitCheckTypeDescriptor(QualType T); /// \brief Convert a value into a format suitable for passing to a runtime /// sanitizer handler. llvm::Value *EmitCheckValue(llvm::Value *V); /// \brief Emit a description of a source location in a format suitable for /// passing to a runtime sanitizer handler. llvm::Constant *EmitCheckSourceLocation(SourceLocation Loc); /// \brief Create a basic block that will call a handler function in a /// sanitizer runtime with the provided arguments, and create a conditional /// branch to it. void EmitCheck(ArrayRef> Checked, StringRef CheckName, ArrayRef StaticArgs, ArrayRef DynamicArgs); /// \brief Emit a slow path cross-DSO CFI check which calls __cfi_slowpath /// if Cond if false. void EmitCfiSlowPathCheck(llvm::Value *Cond, llvm::ConstantInt *TypeId, llvm::Value *Ptr); /// \brief Create a basic block that will call the trap intrinsic, and emit a /// conditional branch to it, for the -ftrapv checks. void EmitTrapCheck(llvm::Value *Checked); /// \brief Emit a call to trap or debugtrap and attach function attribute /// "trap-func-name" if specified. llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID); /// \brief Create a check for a function parameter that may potentially be /// declared as non-null. void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, const FunctionDecl *FD, unsigned ParmNum); /// EmitCallArg - Emit a single call argument. void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType); /// EmitDelegateCallArg - We are performing a delegate call; that /// is, the current function is delegating to another one. Produce /// a r-value suitable for passing the given parameter. void EmitDelegateCallArg(CallArgList &args, const VarDecl *param, SourceLocation loc); /// SetFPAccuracy - Set the minimum required accuracy of the given floating /// point operation, expressed as the maximum relative error in ulp. void SetFPAccuracy(llvm::Value *Val, float Accuracy); private: llvm::MDNode *getRangeForLoadFromType(QualType Ty); void EmitReturnOfRValue(RValue RV, QualType Ty); void deferPlaceholderReplacement(llvm::Instruction *Old, llvm::Value *New); llvm::SmallVector, 4> DeferredReplacements; /// Set the address of a local variable. void setAddrOfLocalVar(const VarDecl *VD, Address Addr) { assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!"); LocalDeclMap.insert({VD, Addr}); } /// ExpandTypeFromArgs - Reconstruct a structure of type \arg Ty /// from function arguments into \arg Dst. See ABIArgInfo::Expand. /// /// \param AI - The first function argument of the expansion. void ExpandTypeFromArgs(QualType Ty, LValue Dst, SmallVectorImpl::iterator &AI); /// ExpandTypeToArgs - Expand an RValue \arg RV, with the LLVM type for \arg /// Ty, into individual arguments on the provided vector \arg IRCallArgs, /// starting at index \arg IRCallArgPos. See ABIArgInfo::Expand. void ExpandTypeToArgs(QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy, SmallVectorImpl &IRCallArgs, unsigned &IRCallArgPos); llvm::Value* EmitAsmInput(const TargetInfo::ConstraintInfo &Info, const Expr *InputExpr, std::string &ConstraintStr); llvm::Value* EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info, LValue InputValue, QualType InputType, std::string &ConstraintStr, SourceLocation Loc); /// \brief Attempts to statically evaluate the object size of E. If that /// fails, emits code to figure the size of E out for us. This is /// pass_object_size aware. llvm::Value *evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType); /// \brief Emits the size of E, as required by __builtin_object_size. This /// function is aware of pass_object_size parameters, and will act accordingly /// if E is a parameter with the pass_object_size attribute. llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType); public: #ifndef NDEBUG // Determine whether the given argument is an Objective-C method // that may have type parameters in its signature. static bool isObjCMethodWithTypeParams(const ObjCMethodDecl *method) { const DeclContext *dc = method->getDeclContext(); if (const ObjCInterfaceDecl *classDecl= dyn_cast(dc)) { return classDecl->getTypeParamListAsWritten(); } if (const ObjCCategoryDecl *catDecl = dyn_cast(dc)) { return catDecl->getTypeParamList(); } return false; } template static bool isObjCMethodWithTypeParams(const T *) { return false; } #endif /// EmitCallArgs - Emit call arguments for a function. template void EmitCallArgs(CallArgList &Args, const T *CallArgTypeInfo, llvm::iterator_range ArgRange, const FunctionDecl *CalleeDecl = nullptr, unsigned ParamsToSkip = 0) { SmallVector ArgTypes; CallExpr::const_arg_iterator Arg = ArgRange.begin(); assert((ParamsToSkip == 0 || CallArgTypeInfo) && "Can't skip parameters if type info is not provided"); if (CallArgTypeInfo) { #ifndef NDEBUG bool isGenericMethod = isObjCMethodWithTypeParams(CallArgTypeInfo); #endif // First, use the argument types that the type info knows about for (auto I = CallArgTypeInfo->param_type_begin() + ParamsToSkip, E = CallArgTypeInfo->param_type_end(); I != E; ++I, ++Arg) { assert(Arg != ArgRange.end() && "Running over edge of argument list!"); assert((isGenericMethod || ((*I)->isVariablyModifiedType() || (*I).getNonReferenceType()->isObjCRetainableType() || getContext() .getCanonicalType((*I).getNonReferenceType()) .getTypePtr() == getContext() .getCanonicalType((*Arg)->getType()) .getTypePtr())) && "type mismatch in call argument!"); ArgTypes.push_back(*I); } } // Either we've emitted all the call args, or we have a call to variadic // function. assert((Arg == ArgRange.end() || !CallArgTypeInfo || CallArgTypeInfo->isVariadic()) && "Extra arguments in non-variadic function!"); // If we still have any arguments, emit them using the type of the argument. for (auto *A : llvm::make_range(Arg, ArgRange.end())) ArgTypes.push_back(getVarArgType(A)); EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip); } void EmitCallArgs(CallArgList &Args, ArrayRef ArgTypes, llvm::iterator_range ArgRange, const FunctionDecl *CalleeDecl = nullptr, unsigned ParamsToSkip = 0); /// EmitPointerWithAlignment - Given an expression with a pointer /// type, emit the value and compute our best estimate of the /// alignment of the pointee. /// /// Note that this function will conservatively fall back on the type /// when it doesn't /// /// \param Source - If non-null, this will be initialized with /// information about the source of the alignment. Note that this /// function will conservatively fall back on the type when it /// doesn't recognize the expression, which means that sometimes /// /// a worst-case One /// reasonable way to use this information is when there's a /// language guarantee that the pointer must be aligned to some /// stricter value, and we're simply trying to ensure that /// sufficiently obvious uses of under-aligned objects don't get /// miscompiled; for example, a placement new into the address of /// a local variable. In such a case, it's quite reasonable to /// just ignore the returned alignment when it isn't from an /// explicit source. Address EmitPointerWithAlignment(const Expr *Addr, AlignmentSource *Source = nullptr); private: QualType getVarArgType(const Expr *Arg); const TargetCodeGenInfo &getTargetHooks() const { return CGM.getTargetCodeGenInfo(); } void EmitDeclMetadata(); BlockByrefHelpers *buildByrefHelpers(llvm::StructType &byrefType, const AutoVarEmission &emission); void AddObjCARCExceptionMetadata(llvm::Instruction *Inst); llvm::Value *GetValueForARMHint(unsigned BuiltinID); }; /// Helper class with most of the code for saving a value for a /// conditional expression cleanup. struct DominatingLLVMValue { typedef llvm::PointerIntPair saved_type; /// Answer whether the given value needs extra work to be saved. static bool needsSaving(llvm::Value *value) { // If it's not an instruction, we don't need to save. if (!isa(value)) return false; // If it's an instruction in the entry block, we don't need to save. llvm::BasicBlock *block = cast(value)->getParent(); return (block != &block->getParent()->getEntryBlock()); } /// Try to save the given value. static saved_type save(CodeGenFunction &CGF, llvm::Value *value) { if (!needsSaving(value)) return saved_type(value, false); // Otherwise, we need an alloca. auto align = CharUnits::fromQuantity( CGF.CGM.getDataLayout().getPrefTypeAlignment(value->getType())); Address alloca = CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save"); CGF.Builder.CreateStore(value, alloca); return saved_type(alloca.getPointer(), true); } static llvm::Value *restore(CodeGenFunction &CGF, saved_type value) { // If the value says it wasn't saved, trust that it's still dominating. if (!value.getInt()) return value.getPointer(); // Otherwise, it should be an alloca instruction, as set up in save(). auto alloca = cast(value.getPointer()); return CGF.Builder.CreateAlignedLoad(alloca, alloca->getAlignment()); } }; /// A partial specialization of DominatingValue for llvm::Values that /// might be llvm::Instructions. template struct DominatingPointer : DominatingLLVMValue { typedef T *type; static type restore(CodeGenFunction &CGF, saved_type value) { return static_cast(DominatingLLVMValue::restore(CGF, value)); } }; /// A specialization of DominatingValue for Address. template <> struct DominatingValue
{ typedef Address type; struct saved_type { DominatingLLVMValue::saved_type SavedValue; CharUnits Alignment; }; static bool needsSaving(type value) { return DominatingLLVMValue::needsSaving(value.getPointer()); } static saved_type save(CodeGenFunction &CGF, type value) { return { DominatingLLVMValue::save(CGF, value.getPointer()), value.getAlignment() }; } static type restore(CodeGenFunction &CGF, saved_type value) { return Address(DominatingLLVMValue::restore(CGF, value.SavedValue), value.Alignment); } }; /// A specialization of DominatingValue for RValue. template <> struct DominatingValue { typedef RValue type; class saved_type { enum Kind { ScalarLiteral, ScalarAddress, AggregateLiteral, AggregateAddress, ComplexAddress }; llvm::Value *Value; unsigned K : 3; unsigned Align : 29; saved_type(llvm::Value *v, Kind k, unsigned a = 0) : Value(v), K(k), Align(a) {} public: static bool needsSaving(RValue value); static saved_type save(CodeGenFunction &CGF, RValue value); RValue restore(CodeGenFunction &CGF); // implementations in CGCleanup.cpp }; static bool needsSaving(type value) { return saved_type::needsSaving(value); } static saved_type save(CodeGenFunction &CGF, type value) { return saved_type::save(CGF, value); } static type restore(CodeGenFunction &CGF, saved_type value) { return value.restore(CGF); } }; } // end namespace CodeGen } // end namespace clang #endif Index: vendor/clang/dist/lib/Sema/SemaTemplate.cpp =================================================================== --- vendor/clang/dist/lib/Sema/SemaTemplate.cpp (revision 294603) +++ vendor/clang/dist/lib/Sema/SemaTemplate.cpp (revision 294604) @@ -1,8472 +1,8471 @@ //===------- SemaTemplate.cpp - Semantic Analysis for C++ Templates -------===/ // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. //===----------------------------------------------------------------------===/ // // This file implements semantic analysis for C++ templates. //===----------------------------------------------------------------------===/ #include "TreeTransform.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/TypeVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/TargetInfo.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateDeduction.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" using namespace clang; using namespace sema; // Exported for use by Parser. SourceRange clang::getTemplateParamsRange(TemplateParameterList const * const *Ps, unsigned N) { if (!N) return SourceRange(); return SourceRange(Ps[0]->getTemplateLoc(), Ps[N-1]->getRAngleLoc()); } /// \brief Determine whether the declaration found is acceptable as the name /// of a template and, if so, return that template declaration. Otherwise, /// returns NULL. static NamedDecl *isAcceptableTemplateName(ASTContext &Context, NamedDecl *Orig, bool AllowFunctionTemplates) { NamedDecl *D = Orig->getUnderlyingDecl(); if (isa(D)) { if (!AllowFunctionTemplates && isa(D)) return nullptr; return Orig; } if (CXXRecordDecl *Record = dyn_cast(D)) { // C++ [temp.local]p1: // Like normal (non-template) classes, class templates have an // injected-class-name (Clause 9). The injected-class-name // can be used with or without a template-argument-list. When // it is used without a template-argument-list, it is // equivalent to the injected-class-name followed by the // template-parameters of the class template enclosed in // <>. When it is used with a template-argument-list, it // refers to the specified class template specialization, // which could be the current specialization or another // specialization. if (Record->isInjectedClassName()) { Record = cast(Record->getDeclContext()); if (Record->getDescribedClassTemplate()) return Record->getDescribedClassTemplate(); if (ClassTemplateSpecializationDecl *Spec = dyn_cast(Record)) return Spec->getSpecializedTemplate(); } return nullptr; } return nullptr; } void Sema::FilterAcceptableTemplateNames(LookupResult &R, bool AllowFunctionTemplates) { // The set of class templates we've already seen. llvm::SmallPtrSet ClassTemplates; LookupResult::Filter filter = R.makeFilter(); while (filter.hasNext()) { NamedDecl *Orig = filter.next(); NamedDecl *Repl = isAcceptableTemplateName(Context, Orig, AllowFunctionTemplates); if (!Repl) filter.erase(); else if (Repl != Orig) { // C++ [temp.local]p3: // A lookup that finds an injected-class-name (10.2) can result in an // ambiguity in certain cases (for example, if it is found in more than // one base class). If all of the injected-class-names that are found // refer to specializations of the same class template, and if the name // is used as a template-name, the reference refers to the class // template itself and not a specialization thereof, and is not // ambiguous. if (ClassTemplateDecl *ClassTmpl = dyn_cast(Repl)) if (!ClassTemplates.insert(ClassTmpl).second) { filter.erase(); continue; } // FIXME: we promote access to public here as a workaround to // the fact that LookupResult doesn't let us remember that we // found this template through a particular injected class name, // which means we end up doing nasty things to the invariants. // Pretending that access is public is *much* safer. filter.replace(Repl, AS_public); } } filter.done(); } bool Sema::hasAnyAcceptableTemplateNames(LookupResult &R, bool AllowFunctionTemplates) { for (LookupResult::iterator I = R.begin(), IEnd = R.end(); I != IEnd; ++I) if (isAcceptableTemplateName(Context, *I, AllowFunctionTemplates)) return true; return false; } TemplateNameKind Sema::isTemplateName(Scope *S, CXXScopeSpec &SS, bool hasTemplateKeyword, UnqualifiedId &Name, ParsedType ObjectTypePtr, bool EnteringContext, TemplateTy &TemplateResult, bool &MemberOfUnknownSpecialization) { assert(getLangOpts().CPlusPlus && "No template names in C!"); DeclarationName TName; MemberOfUnknownSpecialization = false; switch (Name.getKind()) { case UnqualifiedId::IK_Identifier: TName = DeclarationName(Name.Identifier); break; case UnqualifiedId::IK_OperatorFunctionId: TName = Context.DeclarationNames.getCXXOperatorName( Name.OperatorFunctionId.Operator); break; case UnqualifiedId::IK_LiteralOperatorId: TName = Context.DeclarationNames.getCXXLiteralOperatorName(Name.Identifier); break; default: return TNK_Non_template; } QualType ObjectType = ObjectTypePtr.get(); LookupResult R(*this, TName, Name.getLocStart(), LookupOrdinaryName); LookupTemplateName(R, S, SS, ObjectType, EnteringContext, MemberOfUnknownSpecialization); if (R.empty()) return TNK_Non_template; if (R.isAmbiguous()) { // Suppress diagnostics; we'll redo this lookup later. R.suppressDiagnostics(); // FIXME: we might have ambiguous templates, in which case we // should at least parse them properly! return TNK_Non_template; } TemplateName Template; TemplateNameKind TemplateKind; unsigned ResultCount = R.end() - R.begin(); if (ResultCount > 1) { // We assume that we'll preserve the qualifier from a function // template name in other ways. Template = Context.getOverloadedTemplateName(R.begin(), R.end()); TemplateKind = TNK_Function_template; // We'll do this lookup again later. R.suppressDiagnostics(); } else { TemplateDecl *TD = cast((*R.begin())->getUnderlyingDecl()); if (SS.isSet() && !SS.isInvalid()) { NestedNameSpecifier *Qualifier = SS.getScopeRep(); Template = Context.getQualifiedTemplateName(Qualifier, hasTemplateKeyword, TD); } else { Template = TemplateName(TD); } if (isa(TD)) { TemplateKind = TNK_Function_template; // We'll do this lookup again later. R.suppressDiagnostics(); } else { assert(isa(TD) || isa(TD) || isa(TD) || isa(TD) || isa(TD)); TemplateKind = isa(TD) ? TNK_Var_template : TNK_Type_template; } } TemplateResult = TemplateTy::make(Template); return TemplateKind; } bool Sema::DiagnoseUnknownTemplateName(const IdentifierInfo &II, SourceLocation IILoc, Scope *S, const CXXScopeSpec *SS, TemplateTy &SuggestedTemplate, TemplateNameKind &SuggestedKind) { // We can't recover unless there's a dependent scope specifier preceding the // template name. // FIXME: Typo correction? if (!SS || !SS->isSet() || !isDependentScopeSpecifier(*SS) || computeDeclContext(*SS)) return false; // The code is missing a 'template' keyword prior to the dependent template // name. NestedNameSpecifier *Qualifier = (NestedNameSpecifier*)SS->getScopeRep(); Diag(IILoc, diag::err_template_kw_missing) << Qualifier << II.getName() << FixItHint::CreateInsertion(IILoc, "template "); SuggestedTemplate = TemplateTy::make(Context.getDependentTemplateName(Qualifier, &II)); SuggestedKind = TNK_Dependent_template_name; return true; } void Sema::LookupTemplateName(LookupResult &Found, Scope *S, CXXScopeSpec &SS, QualType ObjectType, bool EnteringContext, bool &MemberOfUnknownSpecialization) { // Determine where to perform name lookup MemberOfUnknownSpecialization = false; DeclContext *LookupCtx = nullptr; bool isDependent = false; if (!ObjectType.isNull()) { // This nested-name-specifier occurs in a member access expression, e.g., // x->B::f, and we are looking into the type of the object. assert(!SS.isSet() && "ObjectType and scope specifier cannot coexist"); LookupCtx = computeDeclContext(ObjectType); isDependent = ObjectType->isDependentType(); assert((isDependent || !ObjectType->isIncompleteType() || ObjectType->castAs()->isBeingDefined()) && "Caller should have completed object type"); // Template names cannot appear inside an Objective-C class or object type. if (ObjectType->isObjCObjectOrInterfaceType()) { Found.clear(); return; } } else if (SS.isSet()) { // This nested-name-specifier occurs after another nested-name-specifier, // so long into the context associated with the prior nested-name-specifier. LookupCtx = computeDeclContext(SS, EnteringContext); isDependent = isDependentScopeSpecifier(SS); // The declaration context must be complete. if (LookupCtx && RequireCompleteDeclContext(SS, LookupCtx)) return; } bool ObjectTypeSearchedInScope = false; bool AllowFunctionTemplatesInLookup = true; if (LookupCtx) { // Perform "qualified" name lookup into the declaration context we // computed, which is either the type of the base of a member access // expression or the declaration context associated with a prior // nested-name-specifier. LookupQualifiedName(Found, LookupCtx); if (!ObjectType.isNull() && Found.empty()) { // C++ [basic.lookup.classref]p1: // In a class member access expression (5.2.5), if the . or -> token is // immediately followed by an identifier followed by a <, the // identifier must be looked up to determine whether the < is the // beginning of a template argument list (14.2) or a less-than operator. // The identifier is first looked up in the class of the object // expression. If the identifier is not found, it is then looked up in // the context of the entire postfix-expression and shall name a class // or function template. if (S) LookupName(Found, S); ObjectTypeSearchedInScope = true; AllowFunctionTemplatesInLookup = false; } } else if (isDependent && (!S || ObjectType.isNull())) { // We cannot look into a dependent object type or nested nme // specifier. MemberOfUnknownSpecialization = true; return; } else { // Perform unqualified name lookup in the current scope. LookupName(Found, S); if (!ObjectType.isNull()) AllowFunctionTemplatesInLookup = false; } if (Found.empty() && !isDependent) { // If we did not find any names, attempt to correct any typos. DeclarationName Name = Found.getLookupName(); Found.clear(); // Simple filter callback that, for keywords, only accepts the C++ *_cast auto FilterCCC = llvm::make_unique(); FilterCCC->WantTypeSpecifiers = false; FilterCCC->WantExpressionKeywords = false; FilterCCC->WantRemainingKeywords = false; FilterCCC->WantCXXNamedCasts = true; if (TypoCorrection Corrected = CorrectTypo( Found.getLookupNameInfo(), Found.getLookupKind(), S, &SS, std::move(FilterCCC), CTK_ErrorRecovery, LookupCtx)) { Found.setLookupName(Corrected.getCorrection()); if (auto *ND = Corrected.getFoundDecl()) Found.addDecl(ND); FilterAcceptableTemplateNames(Found); if (!Found.empty()) { if (LookupCtx) { std::string CorrectedStr(Corrected.getAsString(getLangOpts())); bool DroppedSpecifier = Corrected.WillReplaceSpecifier() && Name.getAsString() == CorrectedStr; diagnoseTypo(Corrected, PDiag(diag::err_no_member_template_suggest) << Name << LookupCtx << DroppedSpecifier << SS.getRange()); } else { diagnoseTypo(Corrected, PDiag(diag::err_no_template_suggest) << Name); } } } else { Found.setLookupName(Name); } } FilterAcceptableTemplateNames(Found, AllowFunctionTemplatesInLookup); if (Found.empty()) { if (isDependent) MemberOfUnknownSpecialization = true; return; } if (S && !ObjectType.isNull() && !ObjectTypeSearchedInScope && !getLangOpts().CPlusPlus11) { // C++03 [basic.lookup.classref]p1: // [...] If the lookup in the class of the object expression finds a // template, the name is also looked up in the context of the entire // postfix-expression and [...] // // Note: C++11 does not perform this second lookup. LookupResult FoundOuter(*this, Found.getLookupName(), Found.getNameLoc(), LookupOrdinaryName); LookupName(FoundOuter, S); FilterAcceptableTemplateNames(FoundOuter, /*AllowFunctionTemplates=*/false); if (FoundOuter.empty()) { // - if the name is not found, the name found in the class of the // object expression is used, otherwise } else if (!FoundOuter.getAsSingle() || FoundOuter.isAmbiguous()) { // - if the name is found in the context of the entire // postfix-expression and does not name a class template, the name // found in the class of the object expression is used, otherwise FoundOuter.clear(); } else if (!Found.isSuppressingDiagnostics()) { // - if the name found is a class template, it must refer to the same // entity as the one found in the class of the object expression, // otherwise the program is ill-formed. if (!Found.isSingleResult() || Found.getFoundDecl()->getCanonicalDecl() != FoundOuter.getFoundDecl()->getCanonicalDecl()) { Diag(Found.getNameLoc(), diag::ext_nested_name_member_ref_lookup_ambiguous) << Found.getLookupName() << ObjectType; Diag(Found.getRepresentativeDecl()->getLocation(), diag::note_ambig_member_ref_object_type) << ObjectType; Diag(FoundOuter.getFoundDecl()->getLocation(), diag::note_ambig_member_ref_scope); // Recover by taking the template that we found in the object // expression's type. } } } } /// ActOnDependentIdExpression - Handle a dependent id-expression that /// was just parsed. This is only possible with an explicit scope /// specifier naming a dependent type. ExprResult Sema::ActOnDependentIdExpression(const CXXScopeSpec &SS, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo, bool isAddressOfOperand, const TemplateArgumentListInfo *TemplateArgs) { DeclContext *DC = getFunctionLevelDeclContext(); if (!isAddressOfOperand && isa(DC) && cast(DC)->isInstance()) { QualType ThisType = cast(DC)->getThisType(Context); // Since the 'this' expression is synthesized, we don't need to // perform the double-lookup check. NamedDecl *FirstQualifierInScope = nullptr; return CXXDependentScopeMemberExpr::Create( Context, /*This*/ nullptr, ThisType, /*IsArrow*/ true, /*Op*/ SourceLocation(), SS.getWithLocInContext(Context), TemplateKWLoc, FirstQualifierInScope, NameInfo, TemplateArgs); } return BuildDependentDeclRefExpr(SS, TemplateKWLoc, NameInfo, TemplateArgs); } ExprResult Sema::BuildDependentDeclRefExpr(const CXXScopeSpec &SS, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo, const TemplateArgumentListInfo *TemplateArgs) { return DependentScopeDeclRefExpr::Create( Context, SS.getWithLocInContext(Context), TemplateKWLoc, NameInfo, TemplateArgs); } /// DiagnoseTemplateParameterShadow - Produce a diagnostic complaining /// that the template parameter 'PrevDecl' is being shadowed by a new /// declaration at location Loc. Returns true to indicate that this is /// an error, and false otherwise. void Sema::DiagnoseTemplateParameterShadow(SourceLocation Loc, Decl *PrevDecl) { assert(PrevDecl->isTemplateParameter() && "Not a template parameter"); // Microsoft Visual C++ permits template parameters to be shadowed. if (getLangOpts().MicrosoftExt) return; // C++ [temp.local]p4: // A template-parameter shall not be redeclared within its // scope (including nested scopes). Diag(Loc, diag::err_template_param_shadow) << cast(PrevDecl)->getDeclName(); Diag(PrevDecl->getLocation(), diag::note_template_param_here); return; } /// AdjustDeclIfTemplate - If the given decl happens to be a template, reset /// the parameter D to reference the templated declaration and return a pointer /// to the template declaration. Otherwise, do nothing to D and return null. TemplateDecl *Sema::AdjustDeclIfTemplate(Decl *&D) { if (TemplateDecl *Temp = dyn_cast_or_null(D)) { D = Temp->getTemplatedDecl(); return Temp; } return nullptr; } ParsedTemplateArgument ParsedTemplateArgument::getTemplatePackExpansion( SourceLocation EllipsisLoc) const { assert(Kind == Template && "Only template template arguments can be pack expansions here"); assert(getAsTemplate().get().containsUnexpandedParameterPack() && "Template template argument pack expansion without packs"); ParsedTemplateArgument Result(*this); Result.EllipsisLoc = EllipsisLoc; return Result; } static TemplateArgumentLoc translateTemplateArgument(Sema &SemaRef, const ParsedTemplateArgument &Arg) { switch (Arg.getKind()) { case ParsedTemplateArgument::Type: { TypeSourceInfo *DI; QualType T = SemaRef.GetTypeFromParser(Arg.getAsType(), &DI); if (!DI) DI = SemaRef.Context.getTrivialTypeSourceInfo(T, Arg.getLocation()); return TemplateArgumentLoc(TemplateArgument(T), DI); } case ParsedTemplateArgument::NonType: { Expr *E = static_cast(Arg.getAsExpr()); return TemplateArgumentLoc(TemplateArgument(E), E); } case ParsedTemplateArgument::Template: { TemplateName Template = Arg.getAsTemplate().get(); TemplateArgument TArg; if (Arg.getEllipsisLoc().isValid()) TArg = TemplateArgument(Template, Optional()); else TArg = Template; return TemplateArgumentLoc(TArg, Arg.getScopeSpec().getWithLocInContext( SemaRef.Context), Arg.getLocation(), Arg.getEllipsisLoc()); } } llvm_unreachable("Unhandled parsed template argument"); } /// \brief Translates template arguments as provided by the parser /// into template arguments used by semantic analysis. void Sema::translateTemplateArguments(const ASTTemplateArgsPtr &TemplateArgsIn, TemplateArgumentListInfo &TemplateArgs) { for (unsigned I = 0, Last = TemplateArgsIn.size(); I != Last; ++I) TemplateArgs.addArgument(translateTemplateArgument(*this, TemplateArgsIn[I])); } static void maybeDiagnoseTemplateParameterShadow(Sema &SemaRef, Scope *S, SourceLocation Loc, IdentifierInfo *Name) { NamedDecl *PrevDecl = SemaRef.LookupSingleName( S, Name, Loc, Sema::LookupOrdinaryName, Sema::ForRedeclaration); if (PrevDecl && PrevDecl->isTemplateParameter()) SemaRef.DiagnoseTemplateParameterShadow(Loc, PrevDecl); } /// ActOnTypeParameter - Called when a C++ template type parameter /// (e.g., "typename T") has been parsed. Typename specifies whether /// the keyword "typename" was used to declare the type parameter /// (otherwise, "class" was used), and KeyLoc is the location of the /// "class" or "typename" keyword. ParamName is the name of the /// parameter (NULL indicates an unnamed template parameter) and /// ParamNameLoc is the location of the parameter name (if any). /// If the type parameter has a default argument, it will be added /// later via ActOnTypeParameterDefault. Decl *Sema::ActOnTypeParameter(Scope *S, bool Typename, SourceLocation EllipsisLoc, SourceLocation KeyLoc, IdentifierInfo *ParamName, SourceLocation ParamNameLoc, unsigned Depth, unsigned Position, SourceLocation EqualLoc, ParsedType DefaultArg) { assert(S->isTemplateParamScope() && "Template type parameter not in template parameter scope!"); bool Invalid = false; SourceLocation Loc = ParamNameLoc; if (!ParamName) Loc = KeyLoc; bool IsParameterPack = EllipsisLoc.isValid(); TemplateTypeParmDecl *Param = TemplateTypeParmDecl::Create(Context, Context.getTranslationUnitDecl(), KeyLoc, Loc, Depth, Position, ParamName, Typename, IsParameterPack); Param->setAccess(AS_public); if (Invalid) Param->setInvalidDecl(); if (ParamName) { maybeDiagnoseTemplateParameterShadow(*this, S, ParamNameLoc, ParamName); // Add the template parameter into the current scope. S->AddDecl(Param); IdResolver.AddDecl(Param); } // C++0x [temp.param]p9: // A default template-argument may be specified for any kind of // template-parameter that is not a template parameter pack. if (DefaultArg && IsParameterPack) { Diag(EqualLoc, diag::err_template_param_pack_default_arg); DefaultArg = ParsedType(); } // Handle the default argument, if provided. if (DefaultArg) { TypeSourceInfo *DefaultTInfo; GetTypeFromParser(DefaultArg, &DefaultTInfo); assert(DefaultTInfo && "expected source information for type"); // Check for unexpanded parameter packs. if (DiagnoseUnexpandedParameterPack(Loc, DefaultTInfo, UPPC_DefaultArgument)) return Param; // Check the template argument itself. if (CheckTemplateArgument(Param, DefaultTInfo)) { Param->setInvalidDecl(); return Param; } Param->setDefaultArgument(DefaultTInfo); } return Param; } /// \brief Check that the type of a non-type template parameter is /// well-formed. /// /// \returns the (possibly-promoted) parameter type if valid; /// otherwise, produces a diagnostic and returns a NULL type. QualType Sema::CheckNonTypeTemplateParameterType(QualType T, SourceLocation Loc) { // We don't allow variably-modified types as the type of non-type template // parameters. if (T->isVariablyModifiedType()) { Diag(Loc, diag::err_variably_modified_nontype_template_param) << T; return QualType(); } // C++ [temp.param]p4: // // A non-type template-parameter shall have one of the following // (optionally cv-qualified) types: // // -- integral or enumeration type, if (T->isIntegralOrEnumerationType() || // -- pointer to object or pointer to function, T->isPointerType() || // -- reference to object or reference to function, T->isReferenceType() || // -- pointer to member, T->isMemberPointerType() || // -- std::nullptr_t. T->isNullPtrType() || // If T is a dependent type, we can't do the check now, so we // assume that it is well-formed. T->isDependentType()) { // C++ [temp.param]p5: The top-level cv-qualifiers on the template-parameter // are ignored when determining its type. return T.getUnqualifiedType(); } // C++ [temp.param]p8: // // A non-type template-parameter of type "array of T" or // "function returning T" is adjusted to be of type "pointer to // T" or "pointer to function returning T", respectively. else if (T->isArrayType() || T->isFunctionType()) return Context.getDecayedType(T); Diag(Loc, diag::err_template_nontype_parm_bad_type) << T; return QualType(); } Decl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D, unsigned Depth, unsigned Position, SourceLocation EqualLoc, Expr *Default) { TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S); QualType T = TInfo->getType(); assert(S->isTemplateParamScope() && "Non-type template parameter not in template parameter scope!"); bool Invalid = false; T = CheckNonTypeTemplateParameterType(T, D.getIdentifierLoc()); if (T.isNull()) { T = Context.IntTy; // Recover with an 'int' type. Invalid = true; } IdentifierInfo *ParamName = D.getIdentifier(); bool IsParameterPack = D.hasEllipsis(); NonTypeTemplateParmDecl *Param = NonTypeTemplateParmDecl::Create(Context, Context.getTranslationUnitDecl(), D.getLocStart(), D.getIdentifierLoc(), Depth, Position, ParamName, T, IsParameterPack, TInfo); Param->setAccess(AS_public); if (Invalid) Param->setInvalidDecl(); if (ParamName) { maybeDiagnoseTemplateParameterShadow(*this, S, D.getIdentifierLoc(), ParamName); // Add the template parameter into the current scope. S->AddDecl(Param); IdResolver.AddDecl(Param); } // C++0x [temp.param]p9: // A default template-argument may be specified for any kind of // template-parameter that is not a template parameter pack. if (Default && IsParameterPack) { Diag(EqualLoc, diag::err_template_param_pack_default_arg); Default = nullptr; } // Check the well-formedness of the default template argument, if provided. if (Default) { // Check for unexpanded parameter packs. if (DiagnoseUnexpandedParameterPack(Default, UPPC_DefaultArgument)) return Param; TemplateArgument Converted; ExprResult DefaultRes = CheckTemplateArgument(Param, Param->getType(), Default, Converted); if (DefaultRes.isInvalid()) { Param->setInvalidDecl(); return Param; } Default = DefaultRes.get(); Param->setDefaultArgument(Default); } return Param; } /// ActOnTemplateTemplateParameter - Called when a C++ template template /// parameter (e.g. T in template