diff --git a/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp b/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp index 97d5d7bb2180..c5dbb1b0a95c 100644 --- a/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp +++ b/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp @@ -1,15120 +1,15130 @@ //===--- ExprConstant.cpp - Expression Constant Evaluator -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the Expr constant evaluator. // // Constant expression evaluation produces four main results: // // * A success/failure flag indicating whether constant folding was successful. // This is the 'bool' return value used by most of the code in this file. A // 'false' return value indicates that constant folding has failed, and any // appropriate diagnostic has already been produced. // // * An evaluated result, valid only if constant folding has not failed. // // * A flag indicating if evaluation encountered (unevaluated) side-effects. // These arise in cases such as (sideEffect(), 0) and (sideEffect() || 1), // where it is possible to determine the evaluated result regardless. // // * A set of notes indicating why the evaluation was not a constant expression // (under the C++11 / C++1y rules only, at the moment), or, if folding failed // too, why the expression could not be folded. // // If we are checking for a potential constant expression, failure to constant // fold a potential constant sub-expression will be indicated by a 'false' // return value (the expression could not be folded) and no diagnostic (the // expression is not necessarily non-constant). // //===----------------------------------------------------------------------===// #include "Interp/Context.h" #include "Interp/Frame.h" #include "Interp/State.h" #include "clang/AST/APValue.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/Attr.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/CharUnits.h" #include "clang/AST/CurrentSourceLocExprScope.h" #include "clang/AST/Expr.h" #include "clang/AST/OSLog.h" #include "clang/AST/OptionalDiagnostic.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/FixedPoint.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" #include #include #define DEBUG_TYPE "exprconstant" using namespace clang; using llvm::APInt; using llvm::APSInt; using llvm::APFloat; using llvm::Optional; namespace { struct LValue; class CallStackFrame; class EvalInfo; using SourceLocExprScopeGuard = CurrentSourceLocExprScope::SourceLocExprScopeGuard; static QualType getType(APValue::LValueBase B) { if (!B) return QualType(); if (const ValueDecl *D = B.dyn_cast()) { // FIXME: It's unclear where we're supposed to take the type from, and // this actually matters for arrays of unknown bound. Eg: // // extern int arr[]; void f() { extern int arr[3]; }; // constexpr int *p = &arr[1]; // valid? // // For now, we take the array bound from the most recent declaration. for (auto *Redecl = cast(D->getMostRecentDecl()); Redecl; Redecl = cast_or_null(Redecl->getPreviousDecl())) { QualType T = Redecl->getType(); if (!T->isIncompleteArrayType()) return T; } return D->getType(); } if (B.is()) return B.getTypeInfoType(); if (B.is()) return B.getDynamicAllocType(); const Expr *Base = B.get(); // For a materialized temporary, the type of the temporary we materialized // may not be the type of the expression. if (const MaterializeTemporaryExpr *MTE = dyn_cast(Base)) { SmallVector CommaLHSs; SmallVector Adjustments; const Expr *Temp = MTE->getSubExpr(); const Expr *Inner = Temp->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); // Keep any cv-qualifiers from the reference if we generated a temporary // for it directly. Otherwise use the type after adjustment. if (!Adjustments.empty()) return Inner->getType(); } return Base->getType(); } /// Get an LValue path entry, which is known to not be an array index, as a /// field declaration. static const FieldDecl *getAsField(APValue::LValuePathEntry E) { return dyn_cast_or_null(E.getAsBaseOrMember().getPointer()); } /// Get an LValue path entry, which is known to not be an array index, as a /// base class declaration. static const CXXRecordDecl *getAsBaseClass(APValue::LValuePathEntry E) { return dyn_cast_or_null(E.getAsBaseOrMember().getPointer()); } /// Determine whether this LValue path entry for a base class names a virtual /// base class. static bool isVirtualBaseClass(APValue::LValuePathEntry E) { return E.getAsBaseOrMember().getInt(); } /// Given an expression, determine the type used to store the result of /// evaluating that expression. static QualType getStorageType(const ASTContext &Ctx, const Expr *E) { if (E->isRValue()) return E->getType(); return Ctx.getLValueReferenceType(E->getType()); } /// Given a CallExpr, try to get the alloc_size attribute. May return null. static const AllocSizeAttr *getAllocSizeAttr(const CallExpr *CE) { const FunctionDecl *Callee = CE->getDirectCallee(); return Callee ? Callee->getAttr() : nullptr; } /// Attempts to unwrap a CallExpr (with an alloc_size attribute) from an Expr. /// This will look through a single cast. /// /// Returns null if we couldn't unwrap a function with alloc_size. static const CallExpr *tryUnwrapAllocSizeCall(const Expr *E) { if (!E->getType()->isPointerType()) return nullptr; E = E->IgnoreParens(); // If we're doing a variable assignment from e.g. malloc(N), there will // probably be a cast of some kind. In exotic cases, we might also see a // top-level ExprWithCleanups. Ignore them either way. if (const auto *FE = dyn_cast(E)) E = FE->getSubExpr()->IgnoreParens(); if (const auto *Cast = dyn_cast(E)) E = Cast->getSubExpr()->IgnoreParens(); if (const auto *CE = dyn_cast(E)) return getAllocSizeAttr(CE) ? CE : nullptr; return nullptr; } /// Determines whether or not the given Base contains a call to a function /// with the alloc_size attribute. static bool isBaseAnAllocSizeCall(APValue::LValueBase Base) { const auto *E = Base.dyn_cast(); return E && E->getType()->isPointerType() && tryUnwrapAllocSizeCall(E); } /// The bound to claim that an array of unknown bound has. /// The value in MostDerivedArraySize is undefined in this case. So, set it /// to an arbitrary value that's likely to loudly break things if it's used. static const uint64_t AssumedSizeForUnsizedArray = std::numeric_limits::max() / 2; /// Determines if an LValue with the given LValueBase will have an unsized /// array in its designator. /// Find the path length and type of the most-derived subobject in the given /// path, and find the size of the containing array, if any. static unsigned findMostDerivedSubobject(ASTContext &Ctx, APValue::LValueBase Base, ArrayRef Path, uint64_t &ArraySize, QualType &Type, bool &IsArray, bool &FirstEntryIsUnsizedArray) { // This only accepts LValueBases from APValues, and APValues don't support // arrays that lack size info. assert(!isBaseAnAllocSizeCall(Base) && "Unsized arrays shouldn't appear here"); unsigned MostDerivedLength = 0; Type = getType(Base); for (unsigned I = 0, N = Path.size(); I != N; ++I) { if (Type->isArrayType()) { const ArrayType *AT = Ctx.getAsArrayType(Type); Type = AT->getElementType(); MostDerivedLength = I + 1; IsArray = true; if (auto *CAT = dyn_cast(AT)) { ArraySize = CAT->getSize().getZExtValue(); } else { assert(I == 0 && "unexpected unsized array designator"); FirstEntryIsUnsizedArray = true; ArraySize = AssumedSizeForUnsizedArray; } } else if (Type->isAnyComplexType()) { const ComplexType *CT = Type->castAs(); Type = CT->getElementType(); ArraySize = 2; MostDerivedLength = I + 1; IsArray = true; } else if (const FieldDecl *FD = getAsField(Path[I])) { Type = FD->getType(); ArraySize = 0; MostDerivedLength = I + 1; IsArray = false; } else { // Path[I] describes a base class. ArraySize = 0; IsArray = false; } } return MostDerivedLength; } /// A path from a glvalue to a subobject of that glvalue. struct SubobjectDesignator { /// True if the subobject was named in a manner not supported by C++11. Such /// lvalues can still be folded, but they are not core constant expressions /// and we cannot perform lvalue-to-rvalue conversions on them. unsigned Invalid : 1; /// Is this a pointer one past the end of an object? unsigned IsOnePastTheEnd : 1; /// Indicator of whether the first entry is an unsized array. unsigned FirstEntryIsAnUnsizedArray : 1; /// Indicator of whether the most-derived object is an array element. unsigned MostDerivedIsArrayElement : 1; /// The length of the path to the most-derived object of which this is a /// subobject. unsigned MostDerivedPathLength : 28; /// The size of the array of which the most-derived object is an element. /// This will always be 0 if the most-derived object is not an array /// element. 0 is not an indicator of whether or not the most-derived object /// is an array, however, because 0-length arrays are allowed. /// /// If the current array is an unsized array, the value of this is /// undefined. uint64_t MostDerivedArraySize; /// The type of the most derived object referred to by this address. QualType MostDerivedType; typedef APValue::LValuePathEntry PathEntry; /// The entries on the path from the glvalue to the designated subobject. SmallVector Entries; SubobjectDesignator() : Invalid(true) {} explicit SubobjectDesignator(QualType T) : Invalid(false), IsOnePastTheEnd(false), FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), MostDerivedPathLength(0), MostDerivedArraySize(0), MostDerivedType(T) {} SubobjectDesignator(ASTContext &Ctx, const APValue &V) : Invalid(!V.isLValue() || !V.hasLValuePath()), IsOnePastTheEnd(false), FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), MostDerivedPathLength(0), MostDerivedArraySize(0) { assert(V.isLValue() && "Non-LValue used to make an LValue designator?"); if (!Invalid) { IsOnePastTheEnd = V.isLValueOnePastTheEnd(); ArrayRef VEntries = V.getLValuePath(); Entries.insert(Entries.end(), VEntries.begin(), VEntries.end()); if (V.getLValueBase()) { bool IsArray = false; bool FirstIsUnsizedArray = false; MostDerivedPathLength = findMostDerivedSubobject( Ctx, V.getLValueBase(), V.getLValuePath(), MostDerivedArraySize, MostDerivedType, IsArray, FirstIsUnsizedArray); MostDerivedIsArrayElement = IsArray; FirstEntryIsAnUnsizedArray = FirstIsUnsizedArray; } } } void truncate(ASTContext &Ctx, APValue::LValueBase Base, unsigned NewLength) { if (Invalid) return; assert(Base && "cannot truncate path for null pointer"); assert(NewLength <= Entries.size() && "not a truncation"); if (NewLength == Entries.size()) return; Entries.resize(NewLength); bool IsArray = false; bool FirstIsUnsizedArray = false; MostDerivedPathLength = findMostDerivedSubobject( Ctx, Base, Entries, MostDerivedArraySize, MostDerivedType, IsArray, FirstIsUnsizedArray); MostDerivedIsArrayElement = IsArray; FirstEntryIsAnUnsizedArray = FirstIsUnsizedArray; } void setInvalid() { Invalid = true; Entries.clear(); } /// Determine whether the most derived subobject is an array without a /// known bound. bool isMostDerivedAnUnsizedArray() const { assert(!Invalid && "Calling this makes no sense on invalid designators"); return Entries.size() == 1 && FirstEntryIsAnUnsizedArray; } /// Determine what the most derived array's size is. Results in an assertion /// failure if the most derived array lacks a size. uint64_t getMostDerivedArraySize() const { assert(!isMostDerivedAnUnsizedArray() && "Unsized array has no size"); return MostDerivedArraySize; } /// Determine whether this is a one-past-the-end pointer. bool isOnePastTheEnd() const { assert(!Invalid); if (IsOnePastTheEnd) return true; if (!isMostDerivedAnUnsizedArray() && MostDerivedIsArrayElement && Entries[MostDerivedPathLength - 1].getAsArrayIndex() == MostDerivedArraySize) return true; return false; } /// Get the range of valid index adjustments in the form /// {maximum value that can be subtracted from this pointer, /// maximum value that can be added to this pointer} std::pair validIndexAdjustments() { if (Invalid || isMostDerivedAnUnsizedArray()) return {0, 0}; // [expr.add]p4: For the purposes of these operators, a pointer to a // nonarray object behaves the same as a pointer to the first element of // an array of length one with the type of the object as its element type. bool IsArray = MostDerivedPathLength == Entries.size() && MostDerivedIsArrayElement; uint64_t ArrayIndex = IsArray ? Entries.back().getAsArrayIndex() : (uint64_t)IsOnePastTheEnd; uint64_t ArraySize = IsArray ? getMostDerivedArraySize() : (uint64_t)1; return {ArrayIndex, ArraySize - ArrayIndex}; } /// Check that this refers to a valid subobject. bool isValidSubobject() const { if (Invalid) return false; return !isOnePastTheEnd(); } /// Check that this refers to a valid subobject, and if not, produce a /// relevant diagnostic and set the designator as invalid. bool checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK); /// Get the type of the designated object. QualType getType(ASTContext &Ctx) const { assert(!Invalid && "invalid designator has no subobject type"); return MostDerivedPathLength == Entries.size() ? MostDerivedType : Ctx.getRecordType(getAsBaseClass(Entries.back())); } /// Update this designator to refer to the first element within this array. void addArrayUnchecked(const ConstantArrayType *CAT) { Entries.push_back(PathEntry::ArrayIndex(0)); // This is a most-derived object. MostDerivedType = CAT->getElementType(); MostDerivedIsArrayElement = true; MostDerivedArraySize = CAT->getSize().getZExtValue(); MostDerivedPathLength = Entries.size(); } /// Update this designator to refer to the first element within the array of /// elements of type T. This is an array of unknown size. void addUnsizedArrayUnchecked(QualType ElemTy) { Entries.push_back(PathEntry::ArrayIndex(0)); MostDerivedType = ElemTy; MostDerivedIsArrayElement = true; // The value in MostDerivedArraySize is undefined in this case. So, set it // to an arbitrary value that's likely to loudly break things if it's // used. MostDerivedArraySize = AssumedSizeForUnsizedArray; MostDerivedPathLength = Entries.size(); } /// Update this designator to refer to the given base or member of this /// object. void addDeclUnchecked(const Decl *D, bool Virtual = false) { Entries.push_back(APValue::BaseOrMemberType(D, Virtual)); // If this isn't a base class, it's a new most-derived object. if (const FieldDecl *FD = dyn_cast(D)) { MostDerivedType = FD->getType(); MostDerivedIsArrayElement = false; MostDerivedArraySize = 0; MostDerivedPathLength = Entries.size(); } } /// Update this designator to refer to the given complex component. void addComplexUnchecked(QualType EltTy, bool Imag) { Entries.push_back(PathEntry::ArrayIndex(Imag)); // This is technically a most-derived object, though in practice this // is unlikely to matter. MostDerivedType = EltTy; MostDerivedIsArrayElement = true; MostDerivedArraySize = 2; MostDerivedPathLength = Entries.size(); } void diagnoseUnsizedArrayPointerArithmetic(EvalInfo &Info, const Expr *E); void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, const APSInt &N); /// Add N to the address of this subobject. void adjustIndex(EvalInfo &Info, const Expr *E, APSInt N) { if (Invalid || !N) return; uint64_t TruncatedN = N.extOrTrunc(64).getZExtValue(); if (isMostDerivedAnUnsizedArray()) { diagnoseUnsizedArrayPointerArithmetic(Info, E); // Can't verify -- trust that the user is doing the right thing (or if // not, trust that the caller will catch the bad behavior). // FIXME: Should we reject if this overflows, at least? Entries.back() = PathEntry::ArrayIndex( Entries.back().getAsArrayIndex() + TruncatedN); return; } // [expr.add]p4: For the purposes of these operators, a pointer to a // nonarray object behaves the same as a pointer to the first element of // an array of length one with the type of the object as its element type. bool IsArray = MostDerivedPathLength == Entries.size() && MostDerivedIsArrayElement; uint64_t ArrayIndex = IsArray ? Entries.back().getAsArrayIndex() : (uint64_t)IsOnePastTheEnd; uint64_t ArraySize = IsArray ? getMostDerivedArraySize() : (uint64_t)1; if (N < -(int64_t)ArrayIndex || N > ArraySize - ArrayIndex) { // Calculate the actual index in a wide enough type, so we can include // it in the note. N = N.extend(std::max(N.getBitWidth() + 1, 65)); (llvm::APInt&)N += ArrayIndex; assert(N.ugt(ArraySize) && "bounds check failed for in-bounds index"); diagnosePointerArithmetic(Info, E, N); setInvalid(); return; } ArrayIndex += TruncatedN; assert(ArrayIndex <= ArraySize && "bounds check succeeded for out-of-bounds index"); if (IsArray) Entries.back() = PathEntry::ArrayIndex(ArrayIndex); else IsOnePastTheEnd = (ArrayIndex != 0); } }; /// A stack frame in the constexpr call stack. class CallStackFrame : public interp::Frame { public: EvalInfo &Info; /// Parent - The caller of this stack frame. CallStackFrame *Caller; /// Callee - The function which was called. const FunctionDecl *Callee; /// This - The binding for the this pointer in this call, if any. const LValue *This; /// Arguments - Parameter bindings for this function call, indexed by /// parameters' function scope indices. APValue *Arguments; /// Source location information about the default argument or default /// initializer expression we're evaluating, if any. CurrentSourceLocExprScope CurSourceLocExprScope; // Note that we intentionally use std::map here so that references to // values are stable. typedef std::pair MapKeyTy; typedef std::map MapTy; /// Temporaries - Temporary lvalues materialized within this stack frame. MapTy Temporaries; /// CallLoc - The location of the call expression for this call. SourceLocation CallLoc; /// Index - The call index of this call. unsigned Index; /// The stack of integers for tracking version numbers for temporaries. SmallVector TempVersionStack = {1}; unsigned CurTempVersion = TempVersionStack.back(); unsigned getTempVersion() const { return TempVersionStack.back(); } void pushTempVersion() { TempVersionStack.push_back(++CurTempVersion); } void popTempVersion() { TempVersionStack.pop_back(); } // FIXME: Adding this to every 'CallStackFrame' may have a nontrivial impact // on the overall stack usage of deeply-recursing constexpr evaluations. // (We should cache this map rather than recomputing it repeatedly.) // But let's try this and see how it goes; we can look into caching the map // as a later change. /// LambdaCaptureFields - Mapping from captured variables/this to /// corresponding data members in the closure class. llvm::DenseMap LambdaCaptureFields; FieldDecl *LambdaThisCaptureField; CallStackFrame(EvalInfo &Info, SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, APValue *Arguments); ~CallStackFrame(); // Return the temporary for Key whose version number is Version. APValue *getTemporary(const void *Key, unsigned Version) { MapKeyTy KV(Key, Version); auto LB = Temporaries.lower_bound(KV); if (LB != Temporaries.end() && LB->first == KV) return &LB->second; // Pair (Key,Version) wasn't found in the map. Check that no elements // in the map have 'Key' as their key. assert((LB == Temporaries.end() || LB->first.first != Key) && (LB == Temporaries.begin() || std::prev(LB)->first.first != Key) && "Element with key 'Key' found in map"); return nullptr; } // Return the current temporary for Key in the map. APValue *getCurrentTemporary(const void *Key) { auto UB = Temporaries.upper_bound(MapKeyTy(Key, UINT_MAX)); if (UB != Temporaries.begin() && std::prev(UB)->first.first == Key) return &std::prev(UB)->second; return nullptr; } // Return the version number of the current temporary for Key. unsigned getCurrentTemporaryVersion(const void *Key) const { auto UB = Temporaries.upper_bound(MapKeyTy(Key, UINT_MAX)); if (UB != Temporaries.begin() && std::prev(UB)->first.first == Key) return std::prev(UB)->first.second; return 0; } /// Allocate storage for an object of type T in this stack frame. /// Populates LV with a handle to the created object. Key identifies /// the temporary within the stack frame, and must not be reused without /// bumping the temporary version number. template APValue &createTemporary(const KeyT *Key, QualType T, bool IsLifetimeExtended, LValue &LV); void describe(llvm::raw_ostream &OS) override; Frame *getCaller() const override { return Caller; } SourceLocation getCallLocation() const override { return CallLoc; } const FunctionDecl *getCallee() const override { return Callee; } bool isStdFunction() const { for (const DeclContext *DC = Callee; DC; DC = DC->getParent()) if (DC->isStdNamespace()) return true; return false; } }; /// Temporarily override 'this'. class ThisOverrideRAII { public: ThisOverrideRAII(CallStackFrame &Frame, const LValue *NewThis, bool Enable) : Frame(Frame), OldThis(Frame.This) { if (Enable) Frame.This = NewThis; } ~ThisOverrideRAII() { Frame.This = OldThis; } private: CallStackFrame &Frame; const LValue *OldThis; }; } static bool HandleDestruction(EvalInfo &Info, const Expr *E, const LValue &This, QualType ThisType); static bool HandleDestruction(EvalInfo &Info, SourceLocation Loc, APValue::LValueBase LVBase, APValue &Value, QualType T); namespace { /// A cleanup, and a flag indicating whether it is lifetime-extended. class Cleanup { llvm::PointerIntPair Value; APValue::LValueBase Base; QualType T; public: Cleanup(APValue *Val, APValue::LValueBase Base, QualType T, bool IsLifetimeExtended) : Value(Val, IsLifetimeExtended), Base(Base), T(T) {} bool isLifetimeExtended() const { return Value.getInt(); } bool endLifetime(EvalInfo &Info, bool RunDestructors) { if (RunDestructors) { SourceLocation Loc; if (const ValueDecl *VD = Base.dyn_cast()) Loc = VD->getLocation(); else if (const Expr *E = Base.dyn_cast()) Loc = E->getExprLoc(); return HandleDestruction(Info, Loc, Base, *Value.getPointer(), T); } *Value.getPointer() = APValue(); return true; } bool hasSideEffect() { return T.isDestructedType(); } }; /// A reference to an object whose construction we are currently evaluating. struct ObjectUnderConstruction { APValue::LValueBase Base; ArrayRef Path; friend bool operator==(const ObjectUnderConstruction &LHS, const ObjectUnderConstruction &RHS) { return LHS.Base == RHS.Base && LHS.Path == RHS.Path; } friend llvm::hash_code hash_value(const ObjectUnderConstruction &Obj) { return llvm::hash_combine(Obj.Base, Obj.Path); } }; enum class ConstructionPhase { None, Bases, AfterBases, AfterFields, Destroying, DestroyingBases }; } namespace llvm { template<> struct DenseMapInfo { using Base = DenseMapInfo; static ObjectUnderConstruction getEmptyKey() { return {Base::getEmptyKey(), {}}; } static ObjectUnderConstruction getTombstoneKey() { return {Base::getTombstoneKey(), {}}; } static unsigned getHashValue(const ObjectUnderConstruction &Object) { return hash_value(Object); } static bool isEqual(const ObjectUnderConstruction &LHS, const ObjectUnderConstruction &RHS) { return LHS == RHS; } }; } namespace { /// A dynamically-allocated heap object. struct DynAlloc { /// The value of this heap-allocated object. APValue Value; /// The allocating expression; used for diagnostics. Either a CXXNewExpr /// or a CallExpr (the latter is for direct calls to operator new inside /// std::allocator::allocate). const Expr *AllocExpr = nullptr; enum Kind { New, ArrayNew, StdAllocator }; /// Get the kind of the allocation. This must match between allocation /// and deallocation. Kind getKind() const { if (auto *NE = dyn_cast(AllocExpr)) return NE->isArray() ? ArrayNew : New; assert(isa(AllocExpr)); return StdAllocator; } }; struct DynAllocOrder { bool operator()(DynamicAllocLValue L, DynamicAllocLValue R) const { return L.getIndex() < R.getIndex(); } }; /// EvalInfo - This is a private struct used by the evaluator to capture /// information about a subexpression as it is folded. It retains information /// about the AST context, but also maintains information about the folded /// expression. /// /// If an expression could be evaluated, it is still possible it is not a C /// "integer constant expression" or constant expression. If not, this struct /// captures information about how and why not. /// /// One bit of information passed *into* the request for constant folding /// indicates whether the subexpression is "evaluated" or not according to C /// rules. For example, the RHS of (0 && foo()) is not evaluated. We can /// evaluate the expression regardless of what the RHS is, but C only allows /// certain things in certain situations. class EvalInfo : public interp::State { public: ASTContext &Ctx; /// EvalStatus - Contains information about the evaluation. Expr::EvalStatus &EvalStatus; /// CurrentCall - The top of the constexpr call stack. CallStackFrame *CurrentCall; /// CallStackDepth - The number of calls in the call stack right now. unsigned CallStackDepth; /// NextCallIndex - The next call index to assign. unsigned NextCallIndex; /// StepsLeft - The remaining number of evaluation steps we're permitted /// to perform. This is essentially a limit for the number of statements /// we will evaluate. unsigned StepsLeft; /// Enable the experimental new constant interpreter. If an expression is /// not supported by the interpreter, an error is triggered. bool EnableNewConstInterp; /// BottomFrame - The frame in which evaluation started. This must be /// initialized after CurrentCall and CallStackDepth. CallStackFrame BottomFrame; /// A stack of values whose lifetimes end at the end of some surrounding /// evaluation frame. llvm::SmallVector CleanupStack; /// EvaluatingDecl - This is the declaration whose initializer is being /// evaluated, if any. APValue::LValueBase EvaluatingDecl; enum class EvaluatingDeclKind { None, /// We're evaluating the construction of EvaluatingDecl. Ctor, /// We're evaluating the destruction of EvaluatingDecl. Dtor, }; EvaluatingDeclKind IsEvaluatingDecl = EvaluatingDeclKind::None; /// EvaluatingDeclValue - This is the value being constructed for the /// declaration whose initializer is being evaluated, if any. APValue *EvaluatingDeclValue; /// Set of objects that are currently being constructed. llvm::DenseMap ObjectsUnderConstruction; /// Current heap allocations, along with the location where each was /// allocated. We use std::map here because we need stable addresses /// for the stored APValues. std::map HeapAllocs; /// The number of heap allocations performed so far in this evaluation. unsigned NumHeapAllocs = 0; struct EvaluatingConstructorRAII { EvalInfo &EI; ObjectUnderConstruction Object; bool DidInsert; EvaluatingConstructorRAII(EvalInfo &EI, ObjectUnderConstruction Object, bool HasBases) : EI(EI), Object(Object) { DidInsert = EI.ObjectsUnderConstruction .insert({Object, HasBases ? ConstructionPhase::Bases : ConstructionPhase::AfterBases}) .second; } void finishedConstructingBases() { EI.ObjectsUnderConstruction[Object] = ConstructionPhase::AfterBases; } void finishedConstructingFields() { EI.ObjectsUnderConstruction[Object] = ConstructionPhase::AfterFields; } ~EvaluatingConstructorRAII() { if (DidInsert) EI.ObjectsUnderConstruction.erase(Object); } }; struct EvaluatingDestructorRAII { EvalInfo &EI; ObjectUnderConstruction Object; bool DidInsert; EvaluatingDestructorRAII(EvalInfo &EI, ObjectUnderConstruction Object) : EI(EI), Object(Object) { DidInsert = EI.ObjectsUnderConstruction .insert({Object, ConstructionPhase::Destroying}) .second; } void startedDestroyingBases() { EI.ObjectsUnderConstruction[Object] = ConstructionPhase::DestroyingBases; } ~EvaluatingDestructorRAII() { if (DidInsert) EI.ObjectsUnderConstruction.erase(Object); } }; ConstructionPhase isEvaluatingCtorDtor(APValue::LValueBase Base, ArrayRef Path) { return ObjectsUnderConstruction.lookup({Base, Path}); } /// If we're currently speculatively evaluating, the outermost call stack /// depth at which we can mutate state, otherwise 0. unsigned SpeculativeEvaluationDepth = 0; /// The current array initialization index, if we're performing array /// initialization. uint64_t ArrayInitIndex = -1; /// HasActiveDiagnostic - Was the previous diagnostic stored? If so, further /// notes attached to it will also be stored, otherwise they will not be. bool HasActiveDiagnostic; /// Have we emitted a diagnostic explaining why we couldn't constant /// fold (not just why it's not strictly a constant expression)? bool HasFoldFailureDiagnostic; /// Whether or not we're in a context where the front end requires a /// constant value. bool InConstantContext; /// Whether we're checking that an expression is a potential constant /// expression. If so, do not fail on constructs that could become constant /// later on (such as a use of an undefined global). bool CheckingPotentialConstantExpression = false; /// Whether we're checking for an expression that has undefined behavior. /// If so, we will produce warnings if we encounter an operation that is /// always undefined. bool CheckingForUndefinedBehavior = false; enum EvaluationMode { /// Evaluate as a constant expression. Stop if we find that the expression /// is not a constant expression. EM_ConstantExpression, /// Evaluate as a constant expression. Stop if we find that the expression /// is not a constant expression. Some expressions can be retried in the /// optimizer if we don't constant fold them here, but in an unevaluated /// context we try to fold them immediately since the optimizer never /// gets a chance to look at it. EM_ConstantExpressionUnevaluated, /// Fold the expression to a constant. Stop if we hit a side-effect that /// we can't model. EM_ConstantFold, /// Evaluate in any way we know how. Don't worry about side-effects that /// can't be modeled. EM_IgnoreSideEffects, } EvalMode; /// Are we checking whether the expression is a potential constant /// expression? bool checkingPotentialConstantExpression() const override { return CheckingPotentialConstantExpression; } /// Are we checking an expression for overflow? // FIXME: We should check for any kind of undefined or suspicious behavior // in such constructs, not just overflow. bool checkingForUndefinedBehavior() const override { return CheckingForUndefinedBehavior; } EvalInfo(const ASTContext &C, Expr::EvalStatus &S, EvaluationMode Mode) : Ctx(const_cast(C)), EvalStatus(S), CurrentCall(nullptr), CallStackDepth(0), NextCallIndex(1), StepsLeft(C.getLangOpts().ConstexprStepLimit), EnableNewConstInterp(C.getLangOpts().EnableNewConstInterp), BottomFrame(*this, SourceLocation(), nullptr, nullptr, nullptr), EvaluatingDecl((const ValueDecl *)nullptr), EvaluatingDeclValue(nullptr), HasActiveDiagnostic(false), HasFoldFailureDiagnostic(false), InConstantContext(false), EvalMode(Mode) {} ~EvalInfo() { discardCleanups(); } void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value, EvaluatingDeclKind EDK = EvaluatingDeclKind::Ctor) { EvaluatingDecl = Base; IsEvaluatingDecl = EDK; EvaluatingDeclValue = &Value; } bool CheckCallLimit(SourceLocation Loc) { // Don't perform any constexpr calls (other than the call we're checking) // when checking a potential constant expression. if (checkingPotentialConstantExpression() && CallStackDepth > 1) return false; if (NextCallIndex == 0) { // NextCallIndex has wrapped around. FFDiag(Loc, diag::note_constexpr_call_limit_exceeded); return false; } if (CallStackDepth <= getLangOpts().ConstexprCallDepth) return true; FFDiag(Loc, diag::note_constexpr_depth_limit_exceeded) << getLangOpts().ConstexprCallDepth; return false; } std::pair getCallFrameAndDepth(unsigned CallIndex) { assert(CallIndex && "no call index in getCallFrameAndDepth"); // We will eventually hit BottomFrame, which has Index 1, so Frame can't // be null in this loop. unsigned Depth = CallStackDepth; CallStackFrame *Frame = CurrentCall; while (Frame->Index > CallIndex) { Frame = Frame->Caller; --Depth; } if (Frame->Index == CallIndex) return {Frame, Depth}; return {nullptr, 0}; } bool nextStep(const Stmt *S) { if (!StepsLeft) { FFDiag(S->getBeginLoc(), diag::note_constexpr_step_limit_exceeded); return false; } --StepsLeft; return true; } APValue *createHeapAlloc(const Expr *E, QualType T, LValue &LV); Optional lookupDynamicAlloc(DynamicAllocLValue DA) { Optional Result; auto It = HeapAllocs.find(DA); if (It != HeapAllocs.end()) Result = &It->second; return Result; } /// Information about a stack frame for std::allocator::[de]allocate. struct StdAllocatorCaller { unsigned FrameIndex; QualType ElemType; explicit operator bool() const { return FrameIndex != 0; }; }; StdAllocatorCaller getStdAllocatorCaller(StringRef FnName) const { for (const CallStackFrame *Call = CurrentCall; Call != &BottomFrame; Call = Call->Caller) { const auto *MD = dyn_cast_or_null(Call->Callee); if (!MD) continue; const IdentifierInfo *FnII = MD->getIdentifier(); if (!FnII || !FnII->isStr(FnName)) continue; const auto *CTSD = dyn_cast(MD->getParent()); if (!CTSD) continue; const IdentifierInfo *ClassII = CTSD->getIdentifier(); const TemplateArgumentList &TAL = CTSD->getTemplateArgs(); if (CTSD->isInStdNamespace() && ClassII && ClassII->isStr("allocator") && TAL.size() >= 1 && TAL[0].getKind() == TemplateArgument::Type) return {Call->Index, TAL[0].getAsType()}; } return {}; } void performLifetimeExtension() { // Disable the cleanups for lifetime-extended temporaries. CleanupStack.erase( std::remove_if(CleanupStack.begin(), CleanupStack.end(), [](Cleanup &C) { return C.isLifetimeExtended(); }), CleanupStack.end()); } /// Throw away any remaining cleanups at the end of evaluation. If any /// cleanups would have had a side-effect, note that as an unmodeled /// side-effect and return false. Otherwise, return true. bool discardCleanups() { for (Cleanup &C : CleanupStack) { if (C.hasSideEffect() && !noteSideEffect()) { CleanupStack.clear(); return false; } } CleanupStack.clear(); return true; } private: interp::Frame *getCurrentFrame() override { return CurrentCall; } const interp::Frame *getBottomFrame() const override { return &BottomFrame; } bool hasActiveDiagnostic() override { return HasActiveDiagnostic; } void setActiveDiagnostic(bool Flag) override { HasActiveDiagnostic = Flag; } void setFoldFailureDiagnostic(bool Flag) override { HasFoldFailureDiagnostic = Flag; } Expr::EvalStatus &getEvalStatus() const override { return EvalStatus; } ASTContext &getCtx() const override { return Ctx; } // If we have a prior diagnostic, it will be noting that the expression // isn't a constant expression. This diagnostic is more important, // unless we require this evaluation to produce a constant expression. // // FIXME: We might want to show both diagnostics to the user in // EM_ConstantFold mode. bool hasPriorDiagnostic() override { if (!EvalStatus.Diag->empty()) { switch (EvalMode) { case EM_ConstantFold: case EM_IgnoreSideEffects: if (!HasFoldFailureDiagnostic) break; // We've already failed to fold something. Keep that diagnostic. LLVM_FALLTHROUGH; case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: setActiveDiagnostic(false); return true; } } return false; } unsigned getCallStackDepth() override { return CallStackDepth; } public: /// Should we continue evaluation after encountering a side-effect that we /// couldn't model? bool keepEvaluatingAfterSideEffect() { switch (EvalMode) { case EM_IgnoreSideEffects: return true; case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: case EM_ConstantFold: // By default, assume any side effect might be valid in some other // evaluation of this expression from a different context. return checkingPotentialConstantExpression() || checkingForUndefinedBehavior(); } llvm_unreachable("Missed EvalMode case"); } /// Note that we have had a side-effect, and determine whether we should /// keep evaluating. bool noteSideEffect() { EvalStatus.HasSideEffects = true; return keepEvaluatingAfterSideEffect(); } /// Should we continue evaluation after encountering undefined behavior? bool keepEvaluatingAfterUndefinedBehavior() { switch (EvalMode) { case EM_IgnoreSideEffects: case EM_ConstantFold: return true; case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: return checkingForUndefinedBehavior(); } llvm_unreachable("Missed EvalMode case"); } /// Note that we hit something that was technically undefined behavior, but /// that we can evaluate past it (such as signed overflow or floating-point /// division by zero.) bool noteUndefinedBehavior() override { EvalStatus.HasUndefinedBehavior = true; return keepEvaluatingAfterUndefinedBehavior(); } /// Should we continue evaluation as much as possible after encountering a /// construct which can't be reduced to a value? bool keepEvaluatingAfterFailure() const override { if (!StepsLeft) return false; switch (EvalMode) { case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: case EM_ConstantFold: case EM_IgnoreSideEffects: return checkingPotentialConstantExpression() || checkingForUndefinedBehavior(); } llvm_unreachable("Missed EvalMode case"); } /// Notes that we failed to evaluate an expression that other expressions /// directly depend on, and determine if we should keep evaluating. This /// should only be called if we actually intend to keep evaluating. /// /// Call noteSideEffect() instead if we may be able to ignore the value that /// we failed to evaluate, e.g. if we failed to evaluate Foo() in: /// /// (Foo(), 1) // use noteSideEffect /// (Foo() || true) // use noteSideEffect /// Foo() + 1 // use noteFailure LLVM_NODISCARD bool noteFailure() { // Failure when evaluating some expression often means there is some // subexpression whose evaluation was skipped. Therefore, (because we // don't track whether we skipped an expression when unwinding after an // evaluation failure) every evaluation failure that bubbles up from a // subexpression implies that a side-effect has potentially happened. We // skip setting the HasSideEffects flag to true until we decide to // continue evaluating after that point, which happens here. bool KeepGoing = keepEvaluatingAfterFailure(); EvalStatus.HasSideEffects |= KeepGoing; return KeepGoing; } class ArrayInitLoopIndex { EvalInfo &Info; uint64_t OuterIndex; public: ArrayInitLoopIndex(EvalInfo &Info) : Info(Info), OuterIndex(Info.ArrayInitIndex) { Info.ArrayInitIndex = 0; } ~ArrayInitLoopIndex() { Info.ArrayInitIndex = OuterIndex; } operator uint64_t&() { return Info.ArrayInitIndex; } }; }; /// Object used to treat all foldable expressions as constant expressions. struct FoldConstant { EvalInfo &Info; bool Enabled; bool HadNoPriorDiags; EvalInfo::EvaluationMode OldMode; explicit FoldConstant(EvalInfo &Info, bool Enabled) : Info(Info), Enabled(Enabled), HadNoPriorDiags(Info.EvalStatus.Diag && Info.EvalStatus.Diag->empty() && !Info.EvalStatus.HasSideEffects), OldMode(Info.EvalMode) { if (Enabled) Info.EvalMode = EvalInfo::EM_ConstantFold; } void keepDiagnostics() { Enabled = false; } ~FoldConstant() { if (Enabled && HadNoPriorDiags && !Info.EvalStatus.Diag->empty() && !Info.EvalStatus.HasSideEffects) Info.EvalStatus.Diag->clear(); Info.EvalMode = OldMode; } }; /// RAII object used to set the current evaluation mode to ignore /// side-effects. struct IgnoreSideEffectsRAII { EvalInfo &Info; EvalInfo::EvaluationMode OldMode; explicit IgnoreSideEffectsRAII(EvalInfo &Info) : Info(Info), OldMode(Info.EvalMode) { Info.EvalMode = EvalInfo::EM_IgnoreSideEffects; } ~IgnoreSideEffectsRAII() { Info.EvalMode = OldMode; } }; /// RAII object used to optionally suppress diagnostics and side-effects from /// a speculative evaluation. class SpeculativeEvaluationRAII { EvalInfo *Info = nullptr; Expr::EvalStatus OldStatus; unsigned OldSpeculativeEvaluationDepth; void moveFromAndCancel(SpeculativeEvaluationRAII &&Other) { Info = Other.Info; OldStatus = Other.OldStatus; OldSpeculativeEvaluationDepth = Other.OldSpeculativeEvaluationDepth; Other.Info = nullptr; } void maybeRestoreState() { if (!Info) return; Info->EvalStatus = OldStatus; Info->SpeculativeEvaluationDepth = OldSpeculativeEvaluationDepth; } public: SpeculativeEvaluationRAII() = default; SpeculativeEvaluationRAII( EvalInfo &Info, SmallVectorImpl *NewDiag = nullptr) : Info(&Info), OldStatus(Info.EvalStatus), OldSpeculativeEvaluationDepth(Info.SpeculativeEvaluationDepth) { Info.EvalStatus.Diag = NewDiag; Info.SpeculativeEvaluationDepth = Info.CallStackDepth + 1; } SpeculativeEvaluationRAII(const SpeculativeEvaluationRAII &Other) = delete; SpeculativeEvaluationRAII(SpeculativeEvaluationRAII &&Other) { moveFromAndCancel(std::move(Other)); } SpeculativeEvaluationRAII &operator=(SpeculativeEvaluationRAII &&Other) { maybeRestoreState(); moveFromAndCancel(std::move(Other)); return *this; } ~SpeculativeEvaluationRAII() { maybeRestoreState(); } }; /// RAII object wrapping a full-expression or block scope, and handling /// the ending of the lifetime of temporaries created within it. template class ScopeRAII { EvalInfo &Info; unsigned OldStackSize; public: ScopeRAII(EvalInfo &Info) : Info(Info), OldStackSize(Info.CleanupStack.size()) { // Push a new temporary version. This is needed to distinguish between // temporaries created in different iterations of a loop. Info.CurrentCall->pushTempVersion(); } bool destroy(bool RunDestructors = true) { bool OK = cleanup(Info, RunDestructors, OldStackSize); OldStackSize = -1U; return OK; } ~ScopeRAII() { if (OldStackSize != -1U) destroy(false); // Body moved to a static method to encourage the compiler to inline away // instances of this class. Info.CurrentCall->popTempVersion(); } private: static bool cleanup(EvalInfo &Info, bool RunDestructors, unsigned OldStackSize) { assert(OldStackSize <= Info.CleanupStack.size() && "running cleanups out of order?"); // Run all cleanups for a block scope, and non-lifetime-extended cleanups // for a full-expression scope. bool Success = true; for (unsigned I = Info.CleanupStack.size(); I > OldStackSize; --I) { if (!(IsFullExpression && Info.CleanupStack[I - 1].isLifetimeExtended())) { if (!Info.CleanupStack[I - 1].endLifetime(Info, RunDestructors)) { Success = false; break; } } } // Compact lifetime-extended cleanups. auto NewEnd = Info.CleanupStack.begin() + OldStackSize; if (IsFullExpression) NewEnd = std::remove_if(NewEnd, Info.CleanupStack.end(), [](Cleanup &C) { return !C.isLifetimeExtended(); }); Info.CleanupStack.erase(NewEnd, Info.CleanupStack.end()); return Success; } }; typedef ScopeRAII BlockScopeRAII; typedef ScopeRAII FullExpressionRAII; } bool SubobjectDesignator::checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK) { if (Invalid) return false; if (isOnePastTheEnd()) { Info.CCEDiag(E, diag::note_constexpr_past_end_subobject) << CSK; setInvalid(); return false; } // Note, we do not diagnose if isMostDerivedAnUnsizedArray(), because there // must actually be at least one array element; even a VLA cannot have a // bound of zero. And if our index is nonzero, we already had a CCEDiag. return true; } void SubobjectDesignator::diagnoseUnsizedArrayPointerArithmetic(EvalInfo &Info, const Expr *E) { Info.CCEDiag(E, diag::note_constexpr_unsized_array_indexed); // Do not set the designator as invalid: we can represent this situation, // and correct handling of __builtin_object_size requires us to do so. } void SubobjectDesignator::diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, const APSInt &N) { // If we're complaining, we must be able to statically determine the size of // the most derived array. if (MostDerivedPathLength == Entries.size() && MostDerivedIsArrayElement) Info.CCEDiag(E, diag::note_constexpr_array_index) << N << /*array*/ 0 << static_cast(getMostDerivedArraySize()); else Info.CCEDiag(E, diag::note_constexpr_array_index) << N << /*non-array*/ 1; setInvalid(); } CallStackFrame::CallStackFrame(EvalInfo &Info, SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, APValue *Arguments) : Info(Info), Caller(Info.CurrentCall), Callee(Callee), This(This), Arguments(Arguments), CallLoc(CallLoc), Index(Info.NextCallIndex++) { Info.CurrentCall = this; ++Info.CallStackDepth; } CallStackFrame::~CallStackFrame() { assert(Info.CurrentCall == this && "calls retired out of order"); --Info.CallStackDepth; Info.CurrentCall = Caller; } static bool isRead(AccessKinds AK) { return AK == AK_Read || AK == AK_ReadObjectRepresentation; } static bool isModification(AccessKinds AK) { switch (AK) { case AK_Read: case AK_ReadObjectRepresentation: case AK_MemberCall: case AK_DynamicCast: case AK_TypeId: return false; case AK_Assign: case AK_Increment: case AK_Decrement: case AK_Construct: case AK_Destroy: return true; } llvm_unreachable("unknown access kind"); } static bool isAnyAccess(AccessKinds AK) { return isRead(AK) || isModification(AK); } /// Is this an access per the C++ definition? static bool isFormalAccess(AccessKinds AK) { return isAnyAccess(AK) && AK != AK_Construct && AK != AK_Destroy; } /// Is this kind of axcess valid on an indeterminate object value? static bool isValidIndeterminateAccess(AccessKinds AK) { switch (AK) { case AK_Read: case AK_Increment: case AK_Decrement: // These need the object's value. return false; case AK_ReadObjectRepresentation: case AK_Assign: case AK_Construct: case AK_Destroy: // Construction and destruction don't need the value. return true; case AK_MemberCall: case AK_DynamicCast: case AK_TypeId: // These aren't really meaningful on scalars. return true; } llvm_unreachable("unknown access kind"); } namespace { struct ComplexValue { private: bool IsInt; public: APSInt IntReal, IntImag; APFloat FloatReal, FloatImag; ComplexValue() : FloatReal(APFloat::Bogus()), FloatImag(APFloat::Bogus()) {} void makeComplexFloat() { IsInt = false; } bool isComplexFloat() const { return !IsInt; } APFloat &getComplexFloatReal() { return FloatReal; } APFloat &getComplexFloatImag() { return FloatImag; } void makeComplexInt() { IsInt = true; } bool isComplexInt() const { return IsInt; } APSInt &getComplexIntReal() { return IntReal; } APSInt &getComplexIntImag() { return IntImag; } void moveInto(APValue &v) const { if (isComplexFloat()) v = APValue(FloatReal, FloatImag); else v = APValue(IntReal, IntImag); } void setFrom(const APValue &v) { assert(v.isComplexFloat() || v.isComplexInt()); if (v.isComplexFloat()) { makeComplexFloat(); FloatReal = v.getComplexFloatReal(); FloatImag = v.getComplexFloatImag(); } else { makeComplexInt(); IntReal = v.getComplexIntReal(); IntImag = v.getComplexIntImag(); } } }; struct LValue { APValue::LValueBase Base; CharUnits Offset; SubobjectDesignator Designator; bool IsNullPtr : 1; bool InvalidBase : 1; const APValue::LValueBase getLValueBase() const { return Base; } CharUnits &getLValueOffset() { return Offset; } const CharUnits &getLValueOffset() const { return Offset; } SubobjectDesignator &getLValueDesignator() { return Designator; } const SubobjectDesignator &getLValueDesignator() const { return Designator;} bool isNullPointer() const { return IsNullPtr;} unsigned getLValueCallIndex() const { return Base.getCallIndex(); } unsigned getLValueVersion() const { return Base.getVersion(); } void moveInto(APValue &V) const { if (Designator.Invalid) V = APValue(Base, Offset, APValue::NoLValuePath(), IsNullPtr); else { assert(!InvalidBase && "APValues can't handle invalid LValue bases"); V = APValue(Base, Offset, Designator.Entries, Designator.IsOnePastTheEnd, IsNullPtr); } } void setFrom(ASTContext &Ctx, const APValue &V) { assert(V.isLValue() && "Setting LValue from a non-LValue?"); Base = V.getLValueBase(); Offset = V.getLValueOffset(); InvalidBase = false; Designator = SubobjectDesignator(Ctx, V); IsNullPtr = V.isNullPointer(); } void set(APValue::LValueBase B, bool BInvalid = false) { #ifndef NDEBUG // We only allow a few types of invalid bases. Enforce that here. if (BInvalid) { const auto *E = B.get(); assert((isa(E) || tryUnwrapAllocSizeCall(E)) && "Unexpected type of invalid base"); } #endif Base = B; Offset = CharUnits::fromQuantity(0); InvalidBase = BInvalid; Designator = SubobjectDesignator(getType(B)); IsNullPtr = false; } void setNull(ASTContext &Ctx, QualType PointerTy) { Base = (Expr *)nullptr; Offset = CharUnits::fromQuantity(Ctx.getTargetNullPointerValue(PointerTy)); InvalidBase = false; Designator = SubobjectDesignator(PointerTy->getPointeeType()); IsNullPtr = true; } void setInvalid(APValue::LValueBase B, unsigned I = 0) { set(B, true); } std::string toString(ASTContext &Ctx, QualType T) const { APValue Printable; moveInto(Printable); return Printable.getAsString(Ctx, T); } private: // Check that this LValue is not based on a null pointer. If it is, produce // a diagnostic and mark the designator as invalid. template bool checkNullPointerDiagnosingWith(const GenDiagType &GenDiag) { if (Designator.Invalid) return false; if (IsNullPtr) { GenDiag(); Designator.setInvalid(); return false; } return true; } public: bool checkNullPointer(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK) { return checkNullPointerDiagnosingWith([&Info, E, CSK] { Info.CCEDiag(E, diag::note_constexpr_null_subobject) << CSK; }); } bool checkNullPointerForFoldAccess(EvalInfo &Info, const Expr *E, AccessKinds AK) { return checkNullPointerDiagnosingWith([&Info, E, AK] { Info.FFDiag(E, diag::note_constexpr_access_null) << AK; }); } // Check this LValue refers to an object. If not, set the designator to be // invalid and emit a diagnostic. bool checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK) { return (CSK == CSK_ArrayToPointer || checkNullPointer(Info, E, CSK)) && Designator.checkSubobject(Info, E, CSK); } void addDecl(EvalInfo &Info, const Expr *E, const Decl *D, bool Virtual = false) { if (checkSubobject(Info, E, isa(D) ? CSK_Field : CSK_Base)) Designator.addDeclUnchecked(D, Virtual); } void addUnsizedArray(EvalInfo &Info, const Expr *E, QualType ElemTy) { if (!Designator.Entries.empty()) { Info.CCEDiag(E, diag::note_constexpr_unsupported_unsized_array); Designator.setInvalid(); return; } if (checkSubobject(Info, E, CSK_ArrayToPointer)) { assert(getType(Base)->isPointerType() || getType(Base)->isArrayType()); Designator.FirstEntryIsAnUnsizedArray = true; Designator.addUnsizedArrayUnchecked(ElemTy); } } void addArray(EvalInfo &Info, const Expr *E, const ConstantArrayType *CAT) { if (checkSubobject(Info, E, CSK_ArrayToPointer)) Designator.addArrayUnchecked(CAT); } void addComplex(EvalInfo &Info, const Expr *E, QualType EltTy, bool Imag) { if (checkSubobject(Info, E, Imag ? CSK_Imag : CSK_Real)) Designator.addComplexUnchecked(EltTy, Imag); } void clearIsNullPointer() { IsNullPtr = false; } void adjustOffsetAndIndex(EvalInfo &Info, const Expr *E, const APSInt &Index, CharUnits ElementSize) { // An index of 0 has no effect. (In C, adding 0 to a null pointer is UB, // but we're not required to diagnose it and it's valid in C++.) if (!Index) return; // Compute the new offset in the appropriate width, wrapping at 64 bits. // FIXME: When compiling for a 32-bit target, we should use 32-bit // offsets. uint64_t Offset64 = Offset.getQuantity(); uint64_t ElemSize64 = ElementSize.getQuantity(); uint64_t Index64 = Index.extOrTrunc(64).getZExtValue(); Offset = CharUnits::fromQuantity(Offset64 + ElemSize64 * Index64); if (checkNullPointer(Info, E, CSK_ArrayIndex)) Designator.adjustIndex(Info, E, Index); clearIsNullPointer(); } void adjustOffset(CharUnits N) { Offset += N; if (N.getQuantity()) clearIsNullPointer(); } }; struct MemberPtr { MemberPtr() {} explicit MemberPtr(const ValueDecl *Decl) : DeclAndIsDerivedMember(Decl, false), Path() {} /// The member or (direct or indirect) field referred to by this member /// pointer, or 0 if this is a null member pointer. const ValueDecl *getDecl() const { return DeclAndIsDerivedMember.getPointer(); } /// Is this actually a member of some type derived from the relevant class? bool isDerivedMember() const { return DeclAndIsDerivedMember.getInt(); } /// Get the class which the declaration actually lives in. const CXXRecordDecl *getContainingRecord() const { return cast( DeclAndIsDerivedMember.getPointer()->getDeclContext()); } void moveInto(APValue &V) const { V = APValue(getDecl(), isDerivedMember(), Path); } void setFrom(const APValue &V) { assert(V.isMemberPointer()); DeclAndIsDerivedMember.setPointer(V.getMemberPointerDecl()); DeclAndIsDerivedMember.setInt(V.isMemberPointerToDerivedMember()); Path.clear(); ArrayRef P = V.getMemberPointerPath(); Path.insert(Path.end(), P.begin(), P.end()); } /// DeclAndIsDerivedMember - The member declaration, and a flag indicating /// whether the member is a member of some class derived from the class type /// of the member pointer. llvm::PointerIntPair DeclAndIsDerivedMember; /// Path - The path of base/derived classes from the member declaration's /// class (exclusive) to the class type of the member pointer (inclusive). SmallVector Path; /// Perform a cast towards the class of the Decl (either up or down the /// hierarchy). bool castBack(const CXXRecordDecl *Class) { assert(!Path.empty()); const CXXRecordDecl *Expected; if (Path.size() >= 2) Expected = Path[Path.size() - 2]; else Expected = getContainingRecord(); if (Expected->getCanonicalDecl() != Class->getCanonicalDecl()) { // C++11 [expr.static.cast]p12: In a conversion from (D::*) to (B::*), // if B does not contain the original member and is not a base or // derived class of the class containing the original member, the result // of the cast is undefined. // C++11 [conv.mem]p2 does not cover this case for a cast from (B::*) to // (D::*). We consider that to be a language defect. return false; } Path.pop_back(); return true; } /// Perform a base-to-derived member pointer cast. bool castToDerived(const CXXRecordDecl *Derived) { if (!getDecl()) return true; if (!isDerivedMember()) { Path.push_back(Derived); return true; } if (!castBack(Derived)) return false; if (Path.empty()) DeclAndIsDerivedMember.setInt(false); return true; } /// Perform a derived-to-base member pointer cast. bool castToBase(const CXXRecordDecl *Base) { if (!getDecl()) return true; if (Path.empty()) DeclAndIsDerivedMember.setInt(true); if (isDerivedMember()) { Path.push_back(Base); return true; } return castBack(Base); } }; /// Compare two member pointers, which are assumed to be of the same type. static bool operator==(const MemberPtr &LHS, const MemberPtr &RHS) { if (!LHS.getDecl() || !RHS.getDecl()) return !LHS.getDecl() && !RHS.getDecl(); if (LHS.getDecl()->getCanonicalDecl() != RHS.getDecl()->getCanonicalDecl()) return false; return LHS.Path == RHS.Path; } } static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E); static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This, const Expr *E, bool AllowNonLiteralTypes = false); static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info, bool InvalidBaseOK = false); static bool EvaluatePointer(const Expr *E, LValue &Result, EvalInfo &Info, bool InvalidBaseOK = false); static bool EvaluateMemberPointer(const Expr *E, MemberPtr &Result, EvalInfo &Info); static bool EvaluateTemporary(const Expr *E, LValue &Result, EvalInfo &Info); static bool EvaluateInteger(const Expr *E, APSInt &Result, EvalInfo &Info); static bool EvaluateIntegerOrLValue(const Expr *E, APValue &Result, EvalInfo &Info); static bool EvaluateFloat(const Expr *E, APFloat &Result, EvalInfo &Info); static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info); static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result, EvalInfo &Info); static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result); /// Evaluate an integer or fixed point expression into an APResult. static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result, EvalInfo &Info); /// Evaluate only a fixed point expression into an APResult. static bool EvaluateFixedPoint(const Expr *E, APFixedPoint &Result, EvalInfo &Info); //===----------------------------------------------------------------------===// // Misc utilities //===----------------------------------------------------------------------===// /// Negate an APSInt in place, converting it to a signed form if necessary, and /// preserving its value (by extending by up to one bit as needed). static void negateAsSigned(APSInt &Int) { if (Int.isUnsigned() || Int.isMinSignedValue()) { Int = Int.extend(Int.getBitWidth() + 1); Int.setIsSigned(true); } Int = -Int; } template APValue &CallStackFrame::createTemporary(const KeyT *Key, QualType T, bool IsLifetimeExtended, LValue &LV) { unsigned Version = getTempVersion(); APValue::LValueBase Base(Key, Index, Version); LV.set(Base); APValue &Result = Temporaries[MapKeyTy(Key, Version)]; assert(Result.isAbsent() && "temporary created multiple times"); // If we're creating a temporary immediately in the operand of a speculative // evaluation, don't register a cleanup to be run outside the speculative // evaluation context, since we won't actually be able to initialize this // object. if (Index <= Info.SpeculativeEvaluationDepth) { if (T.isDestructedType()) Info.noteSideEffect(); } else { Info.CleanupStack.push_back(Cleanup(&Result, Base, T, IsLifetimeExtended)); } return Result; } APValue *EvalInfo::createHeapAlloc(const Expr *E, QualType T, LValue &LV) { if (NumHeapAllocs > DynamicAllocLValue::getMaxIndex()) { FFDiag(E, diag::note_constexpr_heap_alloc_limit_exceeded); return nullptr; } DynamicAllocLValue DA(NumHeapAllocs++); LV.set(APValue::LValueBase::getDynamicAlloc(DA, T)); auto Result = HeapAllocs.emplace(std::piecewise_construct, std::forward_as_tuple(DA), std::tuple<>()); assert(Result.second && "reused a heap alloc index?"); Result.first->second.AllocExpr = E; return &Result.first->second.Value; } /// Produce a string describing the given constexpr call. void CallStackFrame::describe(raw_ostream &Out) { unsigned ArgIndex = 0; bool IsMemberCall = isa(Callee) && !isa(Callee) && cast(Callee)->isInstance(); if (!IsMemberCall) Out << *Callee << '('; if (This && IsMemberCall) { APValue Val; This->moveInto(Val); Val.printPretty(Out, Info.Ctx, This->Designator.MostDerivedType); // FIXME: Add parens around Val if needed. Out << "->" << *Callee << '('; IsMemberCall = false; } for (FunctionDecl::param_const_iterator I = Callee->param_begin(), E = Callee->param_end(); I != E; ++I, ++ArgIndex) { if (ArgIndex > (unsigned)IsMemberCall) Out << ", "; const ParmVarDecl *Param = *I; const APValue &Arg = Arguments[ArgIndex]; Arg.printPretty(Out, Info.Ctx, Param->getType()); if (ArgIndex == 0 && IsMemberCall) Out << "->" << *Callee << '('; } Out << ')'; } /// Evaluate an expression to see if it had side-effects, and discard its /// result. /// \return \c true if the caller should keep evaluating. static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) { APValue Scratch; if (!Evaluate(Scratch, Info, E)) // We don't need the value, but we might have skipped a side effect here. return Info.noteSideEffect(); return true; } /// Should this call expression be treated as a string literal? static bool IsStringLiteralCall(const CallExpr *E) { unsigned Builtin = E->getBuiltinCallee(); return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString || Builtin == Builtin::BI__builtin___NSStringMakeConstantString); } static bool IsGlobalLValue(APValue::LValueBase B) { // C++11 [expr.const]p3 An address constant expression is a prvalue core // constant expression of pointer type that evaluates to... // ... a null pointer value, or a prvalue core constant expression of type // std::nullptr_t. if (!B) return true; if (const ValueDecl *D = B.dyn_cast()) { // ... the address of an object with static storage duration, if (const VarDecl *VD = dyn_cast(D)) return VD->hasGlobalStorage(); // ... the address of a function, // ... the address of a GUID [MS extension], return isa(D) || isa(D); } if (B.is() || B.is()) return true; const Expr *E = B.get(); switch (E->getStmtClass()) { default: return false; case Expr::CompoundLiteralExprClass: { const CompoundLiteralExpr *CLE = cast(E); return CLE->isFileScope() && CLE->isLValue(); } case Expr::MaterializeTemporaryExprClass: // A materialized temporary might have been lifetime-extended to static // storage duration. return cast(E)->getStorageDuration() == SD_Static; // A string literal has static storage duration. case Expr::StringLiteralClass: case Expr::PredefinedExprClass: case Expr::ObjCStringLiteralClass: case Expr::ObjCEncodeExprClass: return true; case Expr::ObjCBoxedExprClass: return cast(E)->isExpressibleAsConstantInitializer(); case Expr::CallExprClass: return IsStringLiteralCall(cast(E)); // For GCC compatibility, &&label has static storage duration. case Expr::AddrLabelExprClass: return true; // A Block literal expression may be used as the initialization value for // Block variables at global or local static scope. case Expr::BlockExprClass: return !cast(E)->getBlockDecl()->hasCaptures(); case Expr::ImplicitValueInitExprClass: // FIXME: // We can never form an lvalue with an implicit value initialization as its // base through expression evaluation, so these only appear in one case: the // implicit variable declaration we invent when checking whether a constexpr // constructor can produce a constant expression. We must assume that such // an expression might be a global lvalue. return true; } } static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) { return LVal.Base.dyn_cast(); } static bool IsLiteralLValue(const LValue &Value) { if (Value.getLValueCallIndex()) return false; const Expr *E = Value.Base.dyn_cast(); return E && !isa(E); } static bool IsWeakLValue(const LValue &Value) { const ValueDecl *Decl = GetLValueBaseDecl(Value); return Decl && Decl->isWeak(); } static bool isZeroSized(const LValue &Value) { const ValueDecl *Decl = GetLValueBaseDecl(Value); if (Decl && isa(Decl)) { QualType Ty = Decl->getType(); if (Ty->isArrayType()) return Ty->isIncompleteType() || Decl->getASTContext().getTypeSize(Ty) == 0; } return false; } static bool HasSameBase(const LValue &A, const LValue &B) { if (!A.getLValueBase()) return !B.getLValueBase(); if (!B.getLValueBase()) return false; if (A.getLValueBase().getOpaqueValue() != B.getLValueBase().getOpaqueValue()) { const Decl *ADecl = GetLValueBaseDecl(A); if (!ADecl) return false; const Decl *BDecl = GetLValueBaseDecl(B); if (!BDecl || ADecl->getCanonicalDecl() != BDecl->getCanonicalDecl()) return false; } return IsGlobalLValue(A.getLValueBase()) || (A.getLValueCallIndex() == B.getLValueCallIndex() && A.getLValueVersion() == B.getLValueVersion()); } static void NoteLValueLocation(EvalInfo &Info, APValue::LValueBase Base) { assert(Base && "no location for a null lvalue"); const ValueDecl *VD = Base.dyn_cast(); if (VD) Info.Note(VD->getLocation(), diag::note_declared_at); else if (const Expr *E = Base.dyn_cast()) Info.Note(E->getExprLoc(), diag::note_constexpr_temporary_here); else if (DynamicAllocLValue DA = Base.dyn_cast()) { // FIXME: Produce a note for dangling pointers too. if (Optional Alloc = Info.lookupDynamicAlloc(DA)) Info.Note((*Alloc)->AllocExpr->getExprLoc(), diag::note_constexpr_dynamic_alloc_here); } // We have no information to show for a typeid(T) object. } enum class CheckEvaluationResultKind { ConstantExpression, FullyInitialized, }; /// Materialized temporaries that we've already checked to determine if they're /// initializsed by a constant expression. using CheckedTemporaries = llvm::SmallPtrSet; static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, EvalInfo &Info, SourceLocation DiagLoc, QualType Type, const APValue &Value, Expr::ConstExprUsage Usage, SourceLocation SubobjectLoc, CheckedTemporaries &CheckedTemps); /// Check that this reference or pointer core constant expression is a valid /// value for an address or reference constant expression. Return true if we /// can fold this expression, whether or not it's a constant expression. static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, QualType Type, const LValue &LVal, Expr::ConstExprUsage Usage, CheckedTemporaries &CheckedTemps) { bool IsReferenceType = Type->isReferenceType(); APValue::LValueBase Base = LVal.getLValueBase(); const SubobjectDesignator &Designator = LVal.getLValueDesignator(); if (auto *VD = LVal.getLValueBase().dyn_cast()) { if (auto *FD = dyn_cast(VD)) { if (FD->isConsteval()) { Info.FFDiag(Loc, diag::note_consteval_address_accessible) << !Type->isAnyPointerType(); Info.Note(FD->getLocation(), diag::note_declared_at); return false; } } } // Check that the object is a global. Note that the fake 'this' object we // manufacture when checking potential constant expressions is conservatively // assumed to be global here. if (!IsGlobalLValue(Base)) { if (Info.getLangOpts().CPlusPlus11) { const ValueDecl *VD = Base.dyn_cast(); Info.FFDiag(Loc, diag::note_constexpr_non_global, 1) << IsReferenceType << !Designator.Entries.empty() << !!VD << VD; NoteLValueLocation(Info, Base); } else { Info.FFDiag(Loc); } // Don't allow references to temporaries to escape. return false; } assert((Info.checkingPotentialConstantExpression() || LVal.getLValueCallIndex() == 0) && "have call index for global lvalue"); if (Base.is()) { Info.FFDiag(Loc, diag::note_constexpr_dynamic_alloc) << IsReferenceType << !Designator.Entries.empty(); NoteLValueLocation(Info, Base); return false; } if (const ValueDecl *VD = Base.dyn_cast()) { if (const VarDecl *Var = dyn_cast(VD)) { // Check if this is a thread-local variable. if (Var->getTLSKind()) // FIXME: Diagnostic! return false; // A dllimport variable never acts like a constant. if (Usage == Expr::EvaluateForCodeGen && Var->hasAttr()) // FIXME: Diagnostic! return false; } if (const auto *FD = dyn_cast(VD)) { // __declspec(dllimport) must be handled very carefully: // We must never initialize an expression with the thunk in C++. // Doing otherwise would allow the same id-expression to yield // different addresses for the same function in different translation // units. However, this means that we must dynamically initialize the // expression with the contents of the import address table at runtime. // // The C language has no notion of ODR; furthermore, it has no notion of // dynamic initialization. This means that we are permitted to // perform initialization with the address of the thunk. if (Info.getLangOpts().CPlusPlus && Usage == Expr::EvaluateForCodeGen && FD->hasAttr()) // FIXME: Diagnostic! return false; } } else if (const auto *MTE = dyn_cast_or_null( Base.dyn_cast())) { if (CheckedTemps.insert(MTE).second) { QualType TempType = getType(Base); if (TempType.isDestructedType()) { Info.FFDiag(MTE->getExprLoc(), diag::note_constexpr_unsupported_tempoarary_nontrivial_dtor) << TempType; return false; } APValue *V = MTE->getOrCreateValue(false); assert(V && "evasluation result refers to uninitialised temporary"); if (!CheckEvaluationResult(CheckEvaluationResultKind::ConstantExpression, Info, MTE->getExprLoc(), TempType, *V, Usage, SourceLocation(), CheckedTemps)) return false; } } // Allow address constant expressions to be past-the-end pointers. This is // an extension: the standard requires them to point to an object. if (!IsReferenceType) return true; // A reference constant expression must refer to an object. if (!Base) { // FIXME: diagnostic Info.CCEDiag(Loc); return true; } // Does this refer one past the end of some object? if (!Designator.Invalid && Designator.isOnePastTheEnd()) { const ValueDecl *VD = Base.dyn_cast(); Info.FFDiag(Loc, diag::note_constexpr_past_end, 1) << !Designator.Entries.empty() << !!VD << VD; NoteLValueLocation(Info, Base); } return true; } /// Member pointers are constant expressions unless they point to a /// non-virtual dllimport member function. static bool CheckMemberPointerConstantExpression(EvalInfo &Info, SourceLocation Loc, QualType Type, const APValue &Value, Expr::ConstExprUsage Usage) { const ValueDecl *Member = Value.getMemberPointerDecl(); const auto *FD = dyn_cast_or_null(Member); if (!FD) return true; if (FD->isConsteval()) { Info.FFDiag(Loc, diag::note_consteval_address_accessible) << /*pointer*/ 0; Info.Note(FD->getLocation(), diag::note_declared_at); return false; } return Usage == Expr::EvaluateForMangling || FD->isVirtual() || !FD->hasAttr(); } /// Check that this core constant expression is of literal type, and if not, /// produce an appropriate diagnostic. static bool CheckLiteralType(EvalInfo &Info, const Expr *E, const LValue *This = nullptr) { if (!E->isRValue() || E->getType()->isLiteralType(Info.Ctx)) return true; // C++1y: A constant initializer for an object o [...] may also invoke // constexpr constructors for o and its subobjects even if those objects // are of non-literal class types. // // C++11 missed this detail for aggregates, so classes like this: // struct foo_t { union { int i; volatile int j; } u; }; // are not (obviously) initializable like so: // __attribute__((__require_constant_initialization__)) // static const foo_t x = {{0}}; // because "i" is a subobject with non-literal initialization (due to the // volatile member of the union). See: // http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1677 // Therefore, we use the C++1y behavior. if (This && Info.EvaluatingDecl == This->getLValueBase()) return true; // Prvalue constant expressions must be of literal types. if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(E, diag::note_constexpr_nonliteral) << E->getType(); else Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, EvalInfo &Info, SourceLocation DiagLoc, QualType Type, const APValue &Value, Expr::ConstExprUsage Usage, SourceLocation SubobjectLoc, CheckedTemporaries &CheckedTemps) { if (!Value.hasValue()) { Info.FFDiag(DiagLoc, diag::note_constexpr_uninitialized) << true << Type; if (SubobjectLoc.isValid()) Info.Note(SubobjectLoc, diag::note_constexpr_subobject_declared_here); return false; } // We allow _Atomic(T) to be initialized from anything that T can be // initialized from. if (const AtomicType *AT = Type->getAs()) Type = AT->getValueType(); // Core issue 1454: For a literal constant expression of array or class type, // each subobject of its value shall have been initialized by a constant // expression. if (Value.isArray()) { QualType EltTy = Type->castAsArrayTypeUnsafe()->getElementType(); for (unsigned I = 0, N = Value.getArrayInitializedElts(); I != N; ++I) { if (!CheckEvaluationResult(CERK, Info, DiagLoc, EltTy, Value.getArrayInitializedElt(I), Usage, SubobjectLoc, CheckedTemps)) return false; } if (!Value.hasArrayFiller()) return true; return CheckEvaluationResult(CERK, Info, DiagLoc, EltTy, Value.getArrayFiller(), Usage, SubobjectLoc, CheckedTemps); } if (Value.isUnion() && Value.getUnionField()) { return CheckEvaluationResult( CERK, Info, DiagLoc, Value.getUnionField()->getType(), Value.getUnionValue(), Usage, Value.getUnionField()->getLocation(), CheckedTemps); } if (Value.isStruct()) { RecordDecl *RD = Type->castAs()->getDecl(); if (const CXXRecordDecl *CD = dyn_cast(RD)) { unsigned BaseIndex = 0; for (const CXXBaseSpecifier &BS : CD->bases()) { if (!CheckEvaluationResult(CERK, Info, DiagLoc, BS.getType(), Value.getStructBase(BaseIndex), Usage, BS.getBeginLoc(), CheckedTemps)) return false; ++BaseIndex; } } for (const auto *I : RD->fields()) { if (I->isUnnamedBitfield()) continue; if (!CheckEvaluationResult(CERK, Info, DiagLoc, I->getType(), Value.getStructField(I->getFieldIndex()), Usage, I->getLocation(), CheckedTemps)) return false; } } if (Value.isLValue() && CERK == CheckEvaluationResultKind::ConstantExpression) { LValue LVal; LVal.setFrom(Info.Ctx, Value); return CheckLValueConstantExpression(Info, DiagLoc, Type, LVal, Usage, CheckedTemps); } if (Value.isMemberPointer() && CERK == CheckEvaluationResultKind::ConstantExpression) return CheckMemberPointerConstantExpression(Info, DiagLoc, Type, Value, Usage); // Everything else is fine. return true; } /// Check that this core constant expression value is a valid value for a /// constant expression. If not, report an appropriate diagnostic. Does not /// check that the expression is of literal type. static bool CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc, QualType Type, const APValue &Value, Expr::ConstExprUsage Usage = Expr::EvaluateForCodeGen) { CheckedTemporaries CheckedTemps; return CheckEvaluationResult(CheckEvaluationResultKind::ConstantExpression, Info, DiagLoc, Type, Value, Usage, SourceLocation(), CheckedTemps); } /// Check that this evaluated value is fully-initialized and can be loaded by /// an lvalue-to-rvalue conversion. static bool CheckFullyInitialized(EvalInfo &Info, SourceLocation DiagLoc, QualType Type, const APValue &Value) { CheckedTemporaries CheckedTemps; return CheckEvaluationResult( CheckEvaluationResultKind::FullyInitialized, Info, DiagLoc, Type, Value, Expr::EvaluateForCodeGen, SourceLocation(), CheckedTemps); } /// Enforce C++2a [expr.const]/4.17, which disallows new-expressions unless /// "the allocated storage is deallocated within the evaluation". static bool CheckMemoryLeaks(EvalInfo &Info) { if (!Info.HeapAllocs.empty()) { // We can still fold to a constant despite a compile-time memory leak, // so long as the heap allocation isn't referenced in the result (we check // that in CheckConstantExpression). Info.CCEDiag(Info.HeapAllocs.begin()->second.AllocExpr, diag::note_constexpr_memory_leak) << unsigned(Info.HeapAllocs.size() - 1); } return true; } static bool EvalPointerValueAsBool(const APValue &Value, bool &Result) { // A null base expression indicates a null pointer. These are always // evaluatable, and they are false unless the offset is zero. if (!Value.getLValueBase()) { Result = !Value.getLValueOffset().isZero(); return true; } // We have a non-null base. These are generally known to be true, but if it's // a weak declaration it can be null at runtime. Result = true; const ValueDecl *Decl = Value.getLValueBase().dyn_cast(); return !Decl || !Decl->isWeak(); } static bool HandleConversionToBool(const APValue &Val, bool &Result) { switch (Val.getKind()) { case APValue::None: case APValue::Indeterminate: return false; case APValue::Int: Result = Val.getInt().getBoolValue(); return true; case APValue::FixedPoint: Result = Val.getFixedPoint().getBoolValue(); return true; case APValue::Float: Result = !Val.getFloat().isZero(); return true; case APValue::ComplexInt: Result = Val.getComplexIntReal().getBoolValue() || Val.getComplexIntImag().getBoolValue(); return true; case APValue::ComplexFloat: Result = !Val.getComplexFloatReal().isZero() || !Val.getComplexFloatImag().isZero(); return true; case APValue::LValue: return EvalPointerValueAsBool(Val, Result); case APValue::MemberPointer: Result = Val.getMemberPointerDecl(); return true; case APValue::Vector: case APValue::Array: case APValue::Struct: case APValue::Union: case APValue::AddrLabelDiff: return false; } llvm_unreachable("unknown APValue kind"); } static bool EvaluateAsBooleanCondition(const Expr *E, bool &Result, EvalInfo &Info) { assert(E->isRValue() && "missing lvalue-to-rvalue conv in bool condition"); APValue Val; if (!Evaluate(Val, Info, E)) return false; return HandleConversionToBool(Val, Result); } template static bool HandleOverflow(EvalInfo &Info, const Expr *E, const T &SrcValue, QualType DestType) { Info.CCEDiag(E, diag::note_constexpr_overflow) << SrcValue << DestType; return Info.noteUndefinedBehavior(); } static bool HandleFloatToIntCast(EvalInfo &Info, const Expr *E, QualType SrcType, const APFloat &Value, QualType DestType, APSInt &Result) { unsigned DestWidth = Info.Ctx.getIntWidth(DestType); // Determine whether we are converting to unsigned or signed. bool DestSigned = DestType->isSignedIntegerOrEnumerationType(); Result = APSInt(DestWidth, !DestSigned); bool ignored; if (Value.convertToInteger(Result, llvm::APFloat::rmTowardZero, &ignored) & APFloat::opInvalidOp) return HandleOverflow(Info, E, Value, DestType); return true; } static bool HandleFloatToFloatCast(EvalInfo &Info, const Expr *E, QualType SrcType, QualType DestType, APFloat &Result) { APFloat Value = Result; bool ignored; Result.convert(Info.Ctx.getFloatTypeSemantics(DestType), APFloat::rmNearestTiesToEven, &ignored); return true; } static APSInt HandleIntToIntCast(EvalInfo &Info, const Expr *E, QualType DestType, QualType SrcType, const APSInt &Value) { unsigned DestWidth = Info.Ctx.getIntWidth(DestType); // Figure out if this is a truncate, extend or noop cast. // If the input is signed, do a sign extend, noop, or truncate. APSInt Result = Value.extOrTrunc(DestWidth); Result.setIsUnsigned(DestType->isUnsignedIntegerOrEnumerationType()); if (DestType->isBooleanType()) Result = Value.getBoolValue(); return Result; } static bool HandleIntToFloatCast(EvalInfo &Info, const Expr *E, QualType SrcType, const APSInt &Value, QualType DestType, APFloat &Result) { Result = APFloat(Info.Ctx.getFloatTypeSemantics(DestType), 1); Result.convertFromAPInt(Value, Value.isSigned(), APFloat::rmNearestTiesToEven); return true; } static bool truncateBitfieldValue(EvalInfo &Info, const Expr *E, APValue &Value, const FieldDecl *FD) { assert(FD->isBitField() && "truncateBitfieldValue on non-bitfield"); if (!Value.isInt()) { // Trying to store a pointer-cast-to-integer into a bitfield. // FIXME: In this case, we should provide the diagnostic for casting // a pointer to an integer. assert(Value.isLValue() && "integral value neither int nor lvalue?"); Info.FFDiag(E); return false; } APSInt &Int = Value.getInt(); unsigned OldBitWidth = Int.getBitWidth(); unsigned NewBitWidth = FD->getBitWidthValue(Info.Ctx); if (NewBitWidth < OldBitWidth) Int = Int.trunc(NewBitWidth).extend(OldBitWidth); return true; } static bool EvalAndBitcastToAPInt(EvalInfo &Info, const Expr *E, llvm::APInt &Res) { APValue SVal; if (!Evaluate(SVal, Info, E)) return false; if (SVal.isInt()) { Res = SVal.getInt(); return true; } if (SVal.isFloat()) { Res = SVal.getFloat().bitcastToAPInt(); return true; } if (SVal.isVector()) { QualType VecTy = E->getType(); unsigned VecSize = Info.Ctx.getTypeSize(VecTy); QualType EltTy = VecTy->castAs()->getElementType(); unsigned EltSize = Info.Ctx.getTypeSize(EltTy); bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian(); Res = llvm::APInt::getNullValue(VecSize); for (unsigned i = 0; i < SVal.getVectorLength(); i++) { APValue &Elt = SVal.getVectorElt(i); llvm::APInt EltAsInt; if (Elt.isInt()) { EltAsInt = Elt.getInt(); } else if (Elt.isFloat()) { EltAsInt = Elt.getFloat().bitcastToAPInt(); } else { // Don't try to handle vectors of anything other than int or float // (not sure if it's possible to hit this case). Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } unsigned BaseEltSize = EltAsInt.getBitWidth(); if (BigEndian) Res |= EltAsInt.zextOrTrunc(VecSize).rotr(i*EltSize+BaseEltSize); else Res |= EltAsInt.zextOrTrunc(VecSize).rotl(i*EltSize); } return true; } // Give up if the input isn't an int, float, or vector. For example, we // reject "(v4i16)(intptr_t)&a". Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } /// Perform the given integer operation, which is known to need at most BitWidth /// bits, and check for overflow in the original type (if that type was not an /// unsigned type). template static bool CheckedIntArithmetic(EvalInfo &Info, const Expr *E, const APSInt &LHS, const APSInt &RHS, unsigned BitWidth, Operation Op, APSInt &Result) { if (LHS.isUnsigned()) { Result = Op(LHS, RHS); return true; } APSInt Value(Op(LHS.extend(BitWidth), RHS.extend(BitWidth)), false); Result = Value.trunc(LHS.getBitWidth()); if (Result.extend(BitWidth) != Value) { if (Info.checkingForUndefinedBehavior()) Info.Ctx.getDiagnostics().Report(E->getExprLoc(), diag::warn_integer_constant_overflow) << Result.toString(10) << E->getType(); else return HandleOverflow(Info, E, Value, E->getType()); } return true; } /// Perform the given binary integer operation. static bool handleIntIntBinOp(EvalInfo &Info, const Expr *E, const APSInt &LHS, BinaryOperatorKind Opcode, APSInt RHS, APSInt &Result) { switch (Opcode) { default: Info.FFDiag(E); return false; case BO_Mul: return CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() * 2, std::multiplies(), Result); case BO_Add: return CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() + 1, std::plus(), Result); case BO_Sub: return CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() + 1, std::minus(), Result); case BO_And: Result = LHS & RHS; return true; case BO_Xor: Result = LHS ^ RHS; return true; case BO_Or: Result = LHS | RHS; return true; case BO_Div: case BO_Rem: if (RHS == 0) { Info.FFDiag(E, diag::note_expr_divide_by_zero); return false; } Result = (Opcode == BO_Rem ? LHS % RHS : LHS / RHS); // Check for overflow case: INT_MIN / -1 or INT_MIN % -1. APSInt supports // this operation and gives the two's complement result. if (RHS.isNegative() && RHS.isAllOnesValue() && LHS.isSigned() && LHS.isMinSignedValue()) return HandleOverflow(Info, E, -LHS.extend(LHS.getBitWidth() + 1), E->getType()); return true; case BO_Shl: { if (Info.getLangOpts().OpenCL) // OpenCL 6.3j: shift values are effectively % word size of LHS. RHS &= APSInt(llvm::APInt(RHS.getBitWidth(), static_cast(LHS.getBitWidth() - 1)), RHS.isUnsigned()); else if (RHS.isSigned() && RHS.isNegative()) { // During constant-folding, a negative shift is an opposite shift. Such // a shift is not a constant expression. Info.CCEDiag(E, diag::note_constexpr_negative_shift) << RHS; RHS = -RHS; goto shift_right; } shift_left: // C++11 [expr.shift]p1: Shift width must be less than the bit width of // the shifted type. unsigned SA = (unsigned) RHS.getLimitedValue(LHS.getBitWidth()-1); if (SA != RHS) { Info.CCEDiag(E, diag::note_constexpr_large_shift) << RHS << E->getType() << LHS.getBitWidth(); } else if (LHS.isSigned() && !Info.getLangOpts().CPlusPlus20) { // C++11 [expr.shift]p2: A signed left shift must have a non-negative // operand, and must not overflow the corresponding unsigned type. // C++2a [expr.shift]p2: E1 << E2 is the unique value congruent to // E1 x 2^E2 module 2^N. if (LHS.isNegative()) Info.CCEDiag(E, diag::note_constexpr_lshift_of_negative) << LHS; else if (LHS.countLeadingZeros() < SA) Info.CCEDiag(E, diag::note_constexpr_lshift_discards); } Result = LHS << SA; return true; } case BO_Shr: { if (Info.getLangOpts().OpenCL) // OpenCL 6.3j: shift values are effectively % word size of LHS. RHS &= APSInt(llvm::APInt(RHS.getBitWidth(), static_cast(LHS.getBitWidth() - 1)), RHS.isUnsigned()); else if (RHS.isSigned() && RHS.isNegative()) { // During constant-folding, a negative shift is an opposite shift. Such a // shift is not a constant expression. Info.CCEDiag(E, diag::note_constexpr_negative_shift) << RHS; RHS = -RHS; goto shift_left; } shift_right: // C++11 [expr.shift]p1: Shift width must be less than the bit width of the // shifted type. unsigned SA = (unsigned) RHS.getLimitedValue(LHS.getBitWidth()-1); if (SA != RHS) Info.CCEDiag(E, diag::note_constexpr_large_shift) << RHS << E->getType() << LHS.getBitWidth(); Result = LHS >> SA; return true; } case BO_LT: Result = LHS < RHS; return true; case BO_GT: Result = LHS > RHS; return true; case BO_LE: Result = LHS <= RHS; return true; case BO_GE: Result = LHS >= RHS; return true; case BO_EQ: Result = LHS == RHS; return true; case BO_NE: Result = LHS != RHS; return true; case BO_Cmp: llvm_unreachable("BO_Cmp should be handled elsewhere"); } } /// Perform the given binary floating-point operation, in-place, on LHS. static bool handleFloatFloatBinOp(EvalInfo &Info, const Expr *E, APFloat &LHS, BinaryOperatorKind Opcode, const APFloat &RHS) { switch (Opcode) { default: Info.FFDiag(E); return false; case BO_Mul: LHS.multiply(RHS, APFloat::rmNearestTiesToEven); break; case BO_Add: LHS.add(RHS, APFloat::rmNearestTiesToEven); break; case BO_Sub: LHS.subtract(RHS, APFloat::rmNearestTiesToEven); break; case BO_Div: // [expr.mul]p4: // If the second operand of / or % is zero the behavior is undefined. if (RHS.isZero()) Info.CCEDiag(E, diag::note_expr_divide_by_zero); LHS.divide(RHS, APFloat::rmNearestTiesToEven); break; } // [expr.pre]p4: // If during the evaluation of an expression, the result is not // mathematically defined [...], the behavior is undefined. // FIXME: C++ rules require us to not conform to IEEE 754 here. if (LHS.isNaN()) { Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << LHS.isNaN(); return Info.noteUndefinedBehavior(); } return true; } static bool handleLogicalOpForVector(const APInt &LHSValue, BinaryOperatorKind Opcode, const APInt &RHSValue, APInt &Result) { bool LHS = (LHSValue != 0); bool RHS = (RHSValue != 0); if (Opcode == BO_LAnd) Result = LHS && RHS; else Result = LHS || RHS; return true; } static bool handleLogicalOpForVector(const APFloat &LHSValue, BinaryOperatorKind Opcode, const APFloat &RHSValue, APInt &Result) { bool LHS = !LHSValue.isZero(); bool RHS = !RHSValue.isZero(); if (Opcode == BO_LAnd) Result = LHS && RHS; else Result = LHS || RHS; return true; } static bool handleLogicalOpForVector(const APValue &LHSValue, BinaryOperatorKind Opcode, const APValue &RHSValue, APInt &Result) { // The result is always an int type, however operands match the first. if (LHSValue.getKind() == APValue::Int) return handleLogicalOpForVector(LHSValue.getInt(), Opcode, RHSValue.getInt(), Result); assert(LHSValue.getKind() == APValue::Float && "Should be no other options"); return handleLogicalOpForVector(LHSValue.getFloat(), Opcode, RHSValue.getFloat(), Result); } template static bool handleCompareOpForVectorHelper(const APTy &LHSValue, BinaryOperatorKind Opcode, const APTy &RHSValue, APInt &Result) { switch (Opcode) { default: llvm_unreachable("unsupported binary operator"); case BO_EQ: Result = (LHSValue == RHSValue); break; case BO_NE: Result = (LHSValue != RHSValue); break; case BO_LT: Result = (LHSValue < RHSValue); break; case BO_GT: Result = (LHSValue > RHSValue); break; case BO_LE: Result = (LHSValue <= RHSValue); break; case BO_GE: Result = (LHSValue >= RHSValue); break; } return true; } static bool handleCompareOpForVector(const APValue &LHSValue, BinaryOperatorKind Opcode, const APValue &RHSValue, APInt &Result) { // The result is always an int type, however operands match the first. if (LHSValue.getKind() == APValue::Int) return handleCompareOpForVectorHelper(LHSValue.getInt(), Opcode, RHSValue.getInt(), Result); assert(LHSValue.getKind() == APValue::Float && "Should be no other options"); return handleCompareOpForVectorHelper(LHSValue.getFloat(), Opcode, RHSValue.getFloat(), Result); } // Perform binary operations for vector types, in place on the LHS. static bool handleVectorVectorBinOp(EvalInfo &Info, const Expr *E, BinaryOperatorKind Opcode, APValue &LHSValue, const APValue &RHSValue) { assert(Opcode != BO_PtrMemD && Opcode != BO_PtrMemI && "Operation not supported on vector types"); const auto *VT = E->getType()->castAs(); unsigned NumElements = VT->getNumElements(); QualType EltTy = VT->getElementType(); // In the cases (typically C as I've observed) where we aren't evaluating // constexpr but are checking for cases where the LHS isn't yet evaluatable, // just give up. if (!LHSValue.isVector()) { assert(LHSValue.isLValue() && "A vector result that isn't a vector OR uncalculated LValue"); Info.FFDiag(E); return false; } assert(LHSValue.getVectorLength() == NumElements && RHSValue.getVectorLength() == NumElements && "Different vector sizes"); SmallVector ResultElements; for (unsigned EltNum = 0; EltNum < NumElements; ++EltNum) { APValue LHSElt = LHSValue.getVectorElt(EltNum); APValue RHSElt = RHSValue.getVectorElt(EltNum); if (EltTy->isIntegerType()) { APSInt EltResult{Info.Ctx.getIntWidth(EltTy), EltTy->isUnsignedIntegerType()}; bool Success = true; if (BinaryOperator::isLogicalOp(Opcode)) Success = handleLogicalOpForVector(LHSElt, Opcode, RHSElt, EltResult); else if (BinaryOperator::isComparisonOp(Opcode)) Success = handleCompareOpForVector(LHSElt, Opcode, RHSElt, EltResult); else Success = handleIntIntBinOp(Info, E, LHSElt.getInt(), Opcode, RHSElt.getInt(), EltResult); if (!Success) { Info.FFDiag(E); return false; } ResultElements.emplace_back(EltResult); } else if (EltTy->isFloatingType()) { assert(LHSElt.getKind() == APValue::Float && RHSElt.getKind() == APValue::Float && "Mismatched LHS/RHS/Result Type"); APFloat LHSFloat = LHSElt.getFloat(); if (!handleFloatFloatBinOp(Info, E, LHSFloat, Opcode, RHSElt.getFloat())) { Info.FFDiag(E); return false; } ResultElements.emplace_back(LHSFloat); } } LHSValue = APValue(ResultElements.data(), ResultElements.size()); return true; } /// Cast an lvalue referring to a base subobject to a derived class, by /// truncating the lvalue's path to the given length. static bool CastToDerivedClass(EvalInfo &Info, const Expr *E, LValue &Result, const RecordDecl *TruncatedType, unsigned TruncatedElements) { SubobjectDesignator &D = Result.Designator; // Check we actually point to a derived class object. if (TruncatedElements == D.Entries.size()) return true; assert(TruncatedElements >= D.MostDerivedPathLength && "not casting to a derived class"); if (!Result.checkSubobject(Info, E, CSK_Derived)) return false; // Truncate the path to the subobject, and remove any derived-to-base offsets. const RecordDecl *RD = TruncatedType; for (unsigned I = TruncatedElements, N = D.Entries.size(); I != N; ++I) { if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); const CXXRecordDecl *Base = getAsBaseClass(D.Entries[I]); if (isVirtualBaseClass(D.Entries[I])) Result.Offset -= Layout.getVBaseClassOffset(Base); else Result.Offset -= Layout.getBaseClassOffset(Base); RD = Base; } D.Entries.resize(TruncatedElements); return true; } static bool HandleLValueDirectBase(EvalInfo &Info, const Expr *E, LValue &Obj, const CXXRecordDecl *Derived, const CXXRecordDecl *Base, const ASTRecordLayout *RL = nullptr) { if (!RL) { if (Derived->isInvalidDecl()) return false; RL = &Info.Ctx.getASTRecordLayout(Derived); } Obj.getLValueOffset() += RL->getBaseClassOffset(Base); Obj.addDecl(Info, E, Base, /*Virtual*/ false); return true; } static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj, const CXXRecordDecl *DerivedDecl, const CXXBaseSpecifier *Base) { const CXXRecordDecl *BaseDecl = Base->getType()->getAsCXXRecordDecl(); if (!Base->isVirtual()) return HandleLValueDirectBase(Info, E, Obj, DerivedDecl, BaseDecl); SubobjectDesignator &D = Obj.Designator; if (D.Invalid) return false; // Extract most-derived object and corresponding type. DerivedDecl = D.MostDerivedType->getAsCXXRecordDecl(); if (!CastToDerivedClass(Info, E, Obj, DerivedDecl, D.MostDerivedPathLength)) return false; // Find the virtual base class. if (DerivedDecl->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(DerivedDecl); Obj.getLValueOffset() += Layout.getVBaseClassOffset(BaseDecl); Obj.addDecl(Info, E, BaseDecl, /*Virtual*/ true); return true; } static bool HandleLValueBasePath(EvalInfo &Info, const CastExpr *E, QualType Type, LValue &Result) { for (CastExpr::path_const_iterator PathI = E->path_begin(), PathE = E->path_end(); PathI != PathE; ++PathI) { if (!HandleLValueBase(Info, E, Result, Type->getAsCXXRecordDecl(), *PathI)) return false; Type = (*PathI)->getType(); } return true; } /// Cast an lvalue referring to a derived class to a known base subobject. static bool CastToBaseClass(EvalInfo &Info, const Expr *E, LValue &Result, const CXXRecordDecl *DerivedRD, const CXXRecordDecl *BaseRD) { CXXBasePaths Paths(/*FindAmbiguities=*/false, /*RecordPaths=*/true, /*DetectVirtual=*/false); if (!DerivedRD->isDerivedFrom(BaseRD, Paths)) llvm_unreachable("Class must be derived from the passed in base class!"); for (CXXBasePathElement &Elem : Paths.front()) if (!HandleLValueBase(Info, E, Result, Elem.Class, Elem.Base)) return false; return true; } /// Update LVal to refer to the given field, which must be a member of the type /// currently described by LVal. static bool HandleLValueMember(EvalInfo &Info, const Expr *E, LValue &LVal, const FieldDecl *FD, const ASTRecordLayout *RL = nullptr) { if (!RL) { if (FD->getParent()->isInvalidDecl()) return false; RL = &Info.Ctx.getASTRecordLayout(FD->getParent()); } unsigned I = FD->getFieldIndex(); LVal.adjustOffset(Info.Ctx.toCharUnitsFromBits(RL->getFieldOffset(I))); LVal.addDecl(Info, E, FD); return true; } /// Update LVal to refer to the given indirect field. static bool HandleLValueIndirectMember(EvalInfo &Info, const Expr *E, LValue &LVal, const IndirectFieldDecl *IFD) { for (const auto *C : IFD->chain()) if (!HandleLValueMember(Info, E, LVal, cast(C))) return false; return true; } /// Get the size of the given type in char units. static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc, QualType Type, CharUnits &Size) { // sizeof(void), __alignof__(void), sizeof(function) = 1 as a gcc // extension. if (Type->isVoidType() || Type->isFunctionType()) { Size = CharUnits::One(); return true; } if (Type->isDependentType()) { Info.FFDiag(Loc); return false; } if (!Type->isConstantSizeType()) { // sizeof(vla) is not a constantexpr: C99 6.5.3.4p2. // FIXME: Better diagnostic. Info.FFDiag(Loc); return false; } Size = Info.Ctx.getTypeSizeInChars(Type); return true; } /// Update a pointer value to model pointer arithmetic. /// \param Info - Information about the ongoing evaluation. /// \param E - The expression being evaluated, for diagnostic purposes. /// \param LVal - The pointer value to be updated. /// \param EltTy - The pointee type represented by LVal. /// \param Adjustment - The adjustment, in objects of type EltTy, to add. static bool HandleLValueArrayAdjustment(EvalInfo &Info, const Expr *E, LValue &LVal, QualType EltTy, APSInt Adjustment) { CharUnits SizeOfPointee; if (!HandleSizeof(Info, E->getExprLoc(), EltTy, SizeOfPointee)) return false; LVal.adjustOffsetAndIndex(Info, E, Adjustment, SizeOfPointee); return true; } static bool HandleLValueArrayAdjustment(EvalInfo &Info, const Expr *E, LValue &LVal, QualType EltTy, int64_t Adjustment) { return HandleLValueArrayAdjustment(Info, E, LVal, EltTy, APSInt::get(Adjustment)); } /// Update an lvalue to refer to a component of a complex number. /// \param Info - Information about the ongoing evaluation. /// \param LVal - The lvalue to be updated. /// \param EltTy - The complex number's component type. /// \param Imag - False for the real component, true for the imaginary. static bool HandleLValueComplexElement(EvalInfo &Info, const Expr *E, LValue &LVal, QualType EltTy, bool Imag) { if (Imag) { CharUnits SizeOfComponent; if (!HandleSizeof(Info, E->getExprLoc(), EltTy, SizeOfComponent)) return false; LVal.Offset += SizeOfComponent; } LVal.addComplex(Info, E, EltTy, Imag); return true; } /// Try to evaluate the initializer for a variable declaration. /// /// \param Info Information about the ongoing evaluation. /// \param E An expression to be used when printing diagnostics. /// \param VD The variable whose initializer should be obtained. /// \param Frame The frame in which the variable was created. Must be null /// if this variable is not local to the evaluation. /// \param Result Filled in with a pointer to the value of the variable. static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E, const VarDecl *VD, CallStackFrame *Frame, APValue *&Result, const LValue *LVal) { // If this is a parameter to an active constexpr function call, perform // argument substitution. if (const ParmVarDecl *PVD = dyn_cast(VD)) { // Assume arguments of a potential constant expression are unknown // constant expressions. if (Info.checkingPotentialConstantExpression()) return false; if (!Frame || !Frame->Arguments) { Info.FFDiag(E, diag::note_constexpr_function_param_value_unknown) << VD; return false; } Result = &Frame->Arguments[PVD->getFunctionScopeIndex()]; return true; } // If this is a local variable, dig out its value. if (Frame) { Result = LVal ? Frame->getTemporary(VD, LVal->getLValueVersion()) : Frame->getCurrentTemporary(VD); if (!Result) { // Assume variables referenced within a lambda's call operator that were // not declared within the call operator are captures and during checking // of a potential constant expression, assume they are unknown constant // expressions. assert(isLambdaCallOperator(Frame->Callee) && (VD->getDeclContext() != Frame->Callee || VD->isInitCapture()) && "missing value for local variable"); if (Info.checkingPotentialConstantExpression()) return false; // FIXME: implement capture evaluation during constant expr evaluation. Info.FFDiag(E->getBeginLoc(), diag::note_unimplemented_constexpr_lambda_feature_ast) << "captures not currently allowed"; return false; } return true; } // Dig out the initializer, and use the declaration which it's attached to. // FIXME: We should eventually check whether the variable has a reachable // initializing declaration. const Expr *Init = VD->getAnyInitializer(VD); if (!Init) { // Don't diagnose during potential constant expression checking; an // initializer might be added later. if (!Info.checkingPotentialConstantExpression()) { Info.FFDiag(E, diag::note_constexpr_var_init_unknown, 1) << VD; Info.Note(VD->getLocation(), diag::note_declared_at); } return false; } if (Init->isValueDependent()) { // The DeclRefExpr is not value-dependent, but the variable it refers to // has a value-dependent initializer. This should only happen in // constant-folding cases, where the variable is not actually of a suitable // type for use in a constant expression (otherwise the DeclRefExpr would // have been value-dependent too), so diagnose that. assert(!VD->mightBeUsableInConstantExpressions(Info.Ctx)); if (!Info.checkingPotentialConstantExpression()) { Info.FFDiag(E, Info.getLangOpts().CPlusPlus11 ? diag::note_constexpr_ltor_non_constexpr : diag::note_constexpr_ltor_non_integral, 1) << VD << VD->getType(); Info.Note(VD->getLocation(), diag::note_declared_at); } return false; } // If we're currently evaluating the initializer of this declaration, use that // in-flight value. if (Info.EvaluatingDecl.dyn_cast() == VD) { Result = Info.EvaluatingDeclValue; return true; } // Check that we can fold the initializer. In C++, we will have already done // this in the cases where it matters for conformance. SmallVector Notes; if (!VD->evaluateValue(Notes)) { Info.FFDiag(E, diag::note_constexpr_var_init_non_constant, Notes.size() + 1) << VD; Info.Note(VD->getLocation(), diag::note_declared_at); Info.addNotes(Notes); return false; } // Check that the variable is actually usable in constant expressions. if (!VD->checkInitIsICE()) { Info.CCEDiag(E, diag::note_constexpr_var_init_non_constant, Notes.size() + 1) << VD; Info.Note(VD->getLocation(), diag::note_declared_at); Info.addNotes(Notes); } // Never use the initializer of a weak variable, not even for constant // folding. We can't be sure that this is the definition that will be used. if (VD->isWeak()) { Info.FFDiag(E, diag::note_constexpr_var_init_weak) << VD; Info.Note(VD->getLocation(), diag::note_declared_at); return false; } Result = VD->getEvaluatedValue(); return true; } static bool IsConstNonVolatile(QualType T) { Qualifiers Quals = T.getQualifiers(); return Quals.hasConst() && !Quals.hasVolatile(); } /// Get the base index of the given base class within an APValue representing /// the given derived class. static unsigned getBaseIndex(const CXXRecordDecl *Derived, const CXXRecordDecl *Base) { Base = Base->getCanonicalDecl(); unsigned Index = 0; for (CXXRecordDecl::base_class_const_iterator I = Derived->bases_begin(), E = Derived->bases_end(); I != E; ++I, ++Index) { if (I->getType()->getAsCXXRecordDecl()->getCanonicalDecl() == Base) return Index; } llvm_unreachable("base class missing from derived class's bases list"); } /// Extract the value of a character from a string literal. static APSInt extractStringLiteralCharacter(EvalInfo &Info, const Expr *Lit, uint64_t Index) { assert(!isa(Lit) && "SourceLocExpr should have already been converted to a StringLiteral"); // FIXME: Support MakeStringConstant if (const auto *ObjCEnc = dyn_cast(Lit)) { std::string Str; Info.Ctx.getObjCEncodingForType(ObjCEnc->getEncodedType(), Str); assert(Index <= Str.size() && "Index too large"); return APSInt::getUnsigned(Str.c_str()[Index]); } if (auto PE = dyn_cast(Lit)) Lit = PE->getFunctionName(); const StringLiteral *S = cast(Lit); const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(S->getType()); assert(CAT && "string literal isn't an array"); QualType CharType = CAT->getElementType(); assert(CharType->isIntegerType() && "unexpected character type"); APSInt Value(S->getCharByteWidth() * Info.Ctx.getCharWidth(), CharType->isUnsignedIntegerType()); if (Index < S->getLength()) Value = S->getCodeUnit(Index); return Value; } // Expand a string literal into an array of characters. // // FIXME: This is inefficient; we should probably introduce something similar // to the LLVM ConstantDataArray to make this cheaper. static void expandStringLiteral(EvalInfo &Info, const StringLiteral *S, APValue &Result, QualType AllocType = QualType()) { const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType( AllocType.isNull() ? S->getType() : AllocType); assert(CAT && "string literal isn't an array"); QualType CharType = CAT->getElementType(); assert(CharType->isIntegerType() && "unexpected character type"); unsigned Elts = CAT->getSize().getZExtValue(); Result = APValue(APValue::UninitArray(), std::min(S->getLength(), Elts), Elts); APSInt Value(S->getCharByteWidth() * Info.Ctx.getCharWidth(), CharType->isUnsignedIntegerType()); if (Result.hasArrayFiller()) Result.getArrayFiller() = APValue(Value); for (unsigned I = 0, N = Result.getArrayInitializedElts(); I != N; ++I) { Value = S->getCodeUnit(I); Result.getArrayInitializedElt(I) = APValue(Value); } } // Expand an array so that it has more than Index filled elements. static void expandArray(APValue &Array, unsigned Index) { unsigned Size = Array.getArraySize(); assert(Index < Size); // Always at least double the number of elements for which we store a value. unsigned OldElts = Array.getArrayInitializedElts(); unsigned NewElts = std::max(Index+1, OldElts * 2); NewElts = std::min(Size, std::max(NewElts, 8u)); // Copy the data across. APValue NewValue(APValue::UninitArray(), NewElts, Size); for (unsigned I = 0; I != OldElts; ++I) NewValue.getArrayInitializedElt(I).swap(Array.getArrayInitializedElt(I)); for (unsigned I = OldElts; I != NewElts; ++I) NewValue.getArrayInitializedElt(I) = Array.getArrayFiller(); if (NewValue.hasArrayFiller()) NewValue.getArrayFiller() = Array.getArrayFiller(); Array.swap(NewValue); } /// Determine whether a type would actually be read by an lvalue-to-rvalue /// conversion. If it's of class type, we may assume that the copy operation /// is trivial. Note that this is never true for a union type with fields /// (because the copy always "reads" the active member) and always true for /// a non-class type. static bool isReadByLvalueToRvalueConversion(const CXXRecordDecl *RD); static bool isReadByLvalueToRvalueConversion(QualType T) { CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); return !RD || isReadByLvalueToRvalueConversion(RD); } static bool isReadByLvalueToRvalueConversion(const CXXRecordDecl *RD) { // FIXME: A trivial copy of a union copies the object representation, even if // the union is empty. if (RD->isUnion()) return !RD->field_empty(); if (RD->isEmpty()) return false; for (auto *Field : RD->fields()) if (!Field->isUnnamedBitfield() && isReadByLvalueToRvalueConversion(Field->getType())) return true; for (auto &BaseSpec : RD->bases()) if (isReadByLvalueToRvalueConversion(BaseSpec.getType())) return true; return false; } /// Diagnose an attempt to read from any unreadable field within the specified /// type, which might be a class type. static bool diagnoseMutableFields(EvalInfo &Info, const Expr *E, AccessKinds AK, QualType T) { CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); if (!RD) return false; if (!RD->hasMutableFields()) return false; for (auto *Field : RD->fields()) { // If we're actually going to read this field in some way, then it can't // be mutable. If we're in a union, then assigning to a mutable field // (even an empty one) can change the active member, so that's not OK. // FIXME: Add core issue number for the union case. if (Field->isMutable() && (RD->isUnion() || isReadByLvalueToRvalueConversion(Field->getType()))) { Info.FFDiag(E, diag::note_constexpr_access_mutable, 1) << AK << Field; Info.Note(Field->getLocation(), diag::note_declared_at); return true; } if (diagnoseMutableFields(Info, E, AK, Field->getType())) return true; } for (auto &BaseSpec : RD->bases()) if (diagnoseMutableFields(Info, E, AK, BaseSpec.getType())) return true; // All mutable fields were empty, and thus not actually read. return false; } static bool lifetimeStartedInEvaluation(EvalInfo &Info, APValue::LValueBase Base, bool MutableSubobject = false) { // A temporary we created. if (Base.getCallIndex()) return true; auto *Evaluating = Info.EvaluatingDecl.dyn_cast(); if (!Evaluating) return false; auto *BaseD = Base.dyn_cast(); switch (Info.IsEvaluatingDecl) { case EvalInfo::EvaluatingDeclKind::None: return false; case EvalInfo::EvaluatingDeclKind::Ctor: // The variable whose initializer we're evaluating. if (BaseD) return declaresSameEntity(Evaluating, BaseD); // A temporary lifetime-extended by the variable whose initializer we're // evaluating. if (auto *BaseE = Base.dyn_cast()) if (auto *BaseMTE = dyn_cast(BaseE)) return declaresSameEntity(BaseMTE->getExtendingDecl(), Evaluating); return false; case EvalInfo::EvaluatingDeclKind::Dtor: // C++2a [expr.const]p6: // [during constant destruction] the lifetime of a and its non-mutable // subobjects (but not its mutable subobjects) [are] considered to start // within e. // // FIXME: We can meaningfully extend this to cover non-const objects, but // we will need special handling: we should be able to access only // subobjects of such objects that are themselves declared const. if (!BaseD || !(BaseD->getType().isConstQualified() || BaseD->getType()->isReferenceType()) || MutableSubobject) return false; return declaresSameEntity(Evaluating, BaseD); } llvm_unreachable("unknown evaluating decl kind"); } namespace { /// A handle to a complete object (an object that is not a subobject of /// another object). struct CompleteObject { /// The identity of the object. APValue::LValueBase Base; /// The value of the complete object. APValue *Value; /// The type of the complete object. QualType Type; CompleteObject() : Value(nullptr) {} CompleteObject(APValue::LValueBase Base, APValue *Value, QualType Type) : Base(Base), Value(Value), Type(Type) {} bool mayAccessMutableMembers(EvalInfo &Info, AccessKinds AK) const { // If this isn't a "real" access (eg, if it's just accessing the type // info), allow it. We assume the type doesn't change dynamically for // subobjects of constexpr objects (even though we'd hit UB here if it // did). FIXME: Is this right? if (!isAnyAccess(AK)) return true; // In C++14 onwards, it is permitted to read a mutable member whose // lifetime began within the evaluation. // FIXME: Should we also allow this in C++11? if (!Info.getLangOpts().CPlusPlus14) return false; return lifetimeStartedInEvaluation(Info, Base, /*MutableSubobject*/true); } explicit operator bool() const { return !Type.isNull(); } }; } // end anonymous namespace static QualType getSubobjectType(QualType ObjType, QualType SubobjType, bool IsMutable = false) { // C++ [basic.type.qualifier]p1: // - A const object is an object of type const T or a non-mutable subobject // of a const object. if (ObjType.isConstQualified() && !IsMutable) SubobjType.addConst(); // - A volatile object is an object of type const T or a subobject of a // volatile object. if (ObjType.isVolatileQualified()) SubobjType.addVolatile(); return SubobjType; } /// Find the designated sub-object of an rvalue. template typename SubobjectHandler::result_type findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, const SubobjectDesignator &Sub, SubobjectHandler &handler) { if (Sub.Invalid) // A diagnostic will have already been produced. return handler.failed(); if (Sub.isOnePastTheEnd() || Sub.isMostDerivedAnUnsizedArray()) { if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(E, Sub.isOnePastTheEnd() ? diag::note_constexpr_access_past_end : diag::note_constexpr_access_unsized_array) << handler.AccessKind; else Info.FFDiag(E); return handler.failed(); } APValue *O = Obj.Value; QualType ObjType = Obj.Type; const FieldDecl *LastField = nullptr; const FieldDecl *VolatileField = nullptr; // Walk the designator's path to find the subobject. for (unsigned I = 0, N = Sub.Entries.size(); /**/; ++I) { // Reading an indeterminate value is undefined, but assigning over one is OK. if ((O->isAbsent() && !(handler.AccessKind == AK_Construct && I == N)) || (O->isIndeterminate() && !isValidIndeterminateAccess(handler.AccessKind))) { if (!Info.checkingPotentialConstantExpression()) Info.FFDiag(E, diag::note_constexpr_access_uninit) << handler.AccessKind << O->isIndeterminate(); return handler.failed(); } // C++ [class.ctor]p5, C++ [class.dtor]p5: // const and volatile semantics are not applied on an object under // {con,de}struction. if ((ObjType.isConstQualified() || ObjType.isVolatileQualified()) && ObjType->isRecordType() && Info.isEvaluatingCtorDtor( Obj.Base, llvm::makeArrayRef(Sub.Entries.begin(), Sub.Entries.begin() + I)) != ConstructionPhase::None) { ObjType = Info.Ctx.getCanonicalType(ObjType); ObjType.removeLocalConst(); ObjType.removeLocalVolatile(); } // If this is our last pass, check that the final object type is OK. if (I == N || (I == N - 1 && ObjType->isAnyComplexType())) { // Accesses to volatile objects are prohibited. if (ObjType.isVolatileQualified() && isFormalAccess(handler.AccessKind)) { if (Info.getLangOpts().CPlusPlus) { int DiagKind; SourceLocation Loc; const NamedDecl *Decl = nullptr; if (VolatileField) { DiagKind = 2; Loc = VolatileField->getLocation(); Decl = VolatileField; } else if (auto *VD = Obj.Base.dyn_cast()) { DiagKind = 1; Loc = VD->getLocation(); Decl = VD; } else { DiagKind = 0; if (auto *E = Obj.Base.dyn_cast()) Loc = E->getExprLoc(); } Info.FFDiag(E, diag::note_constexpr_access_volatile_obj, 1) << handler.AccessKind << DiagKind << Decl; Info.Note(Loc, diag::note_constexpr_volatile_here) << DiagKind; } else { Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); } return handler.failed(); } // If we are reading an object of class type, there may still be more // things we need to check: if there are any mutable subobjects, we // cannot perform this read. (This only happens when performing a trivial // copy or assignment.) if (ObjType->isRecordType() && !Obj.mayAccessMutableMembers(Info, handler.AccessKind) && diagnoseMutableFields(Info, E, handler.AccessKind, ObjType)) return handler.failed(); } if (I == N) { if (!handler.found(*O, ObjType)) return false; // If we modified a bit-field, truncate it to the right width. if (isModification(handler.AccessKind) && LastField && LastField->isBitField() && !truncateBitfieldValue(Info, E, *O, LastField)) return false; return true; } LastField = nullptr; if (ObjType->isArrayType()) { // Next subobject is an array element. const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(ObjType); assert(CAT && "vla in literal type?"); uint64_t Index = Sub.Entries[I].getAsArrayIndex(); if (CAT->getSize().ule(Index)) { // Note, it should not be possible to form a pointer with a valid // designator which points more than one past the end of the array. if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(E, diag::note_constexpr_access_past_end) << handler.AccessKind; else Info.FFDiag(E); return handler.failed(); } ObjType = CAT->getElementType(); if (O->getArrayInitializedElts() > Index) O = &O->getArrayInitializedElt(Index); else if (!isRead(handler.AccessKind)) { expandArray(*O, Index); O = &O->getArrayInitializedElt(Index); } else O = &O->getArrayFiller(); } else if (ObjType->isAnyComplexType()) { // Next subobject is a complex number. uint64_t Index = Sub.Entries[I].getAsArrayIndex(); if (Index > 1) { if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(E, diag::note_constexpr_access_past_end) << handler.AccessKind; else Info.FFDiag(E); return handler.failed(); } ObjType = getSubobjectType( ObjType, ObjType->castAs()->getElementType()); assert(I == N - 1 && "extracting subobject of scalar?"); if (O->isComplexInt()) { return handler.found(Index ? O->getComplexIntImag() : O->getComplexIntReal(), ObjType); } else { assert(O->isComplexFloat()); return handler.found(Index ? O->getComplexFloatImag() : O->getComplexFloatReal(), ObjType); } } else if (const FieldDecl *Field = getAsField(Sub.Entries[I])) { if (Field->isMutable() && !Obj.mayAccessMutableMembers(Info, handler.AccessKind)) { Info.FFDiag(E, diag::note_constexpr_access_mutable, 1) << handler.AccessKind << Field; Info.Note(Field->getLocation(), diag::note_declared_at); return handler.failed(); } // Next subobject is a class, struct or union field. RecordDecl *RD = ObjType->castAs()->getDecl(); if (RD->isUnion()) { const FieldDecl *UnionField = O->getUnionField(); if (!UnionField || UnionField->getCanonicalDecl() != Field->getCanonicalDecl()) { if (I == N - 1 && handler.AccessKind == AK_Construct) { // Placement new onto an inactive union member makes it active. O->setUnion(Field, APValue()); } else { // FIXME: If O->getUnionValue() is absent, report that there's no // active union member rather than reporting the prior active union // member. We'll need to fix nullptr_t to not use APValue() as its // representation first. Info.FFDiag(E, diag::note_constexpr_access_inactive_union_member) << handler.AccessKind << Field << !UnionField << UnionField; return handler.failed(); } } O = &O->getUnionValue(); } else O = &O->getStructField(Field->getFieldIndex()); ObjType = getSubobjectType(ObjType, Field->getType(), Field->isMutable()); LastField = Field; if (Field->getType().isVolatileQualified()) VolatileField = Field; } else { // Next subobject is a base class. const CXXRecordDecl *Derived = ObjType->getAsCXXRecordDecl(); const CXXRecordDecl *Base = getAsBaseClass(Sub.Entries[I]); O = &O->getStructBase(getBaseIndex(Derived, Base)); ObjType = getSubobjectType(ObjType, Info.Ctx.getRecordType(Base)); } } } namespace { struct ExtractSubobjectHandler { EvalInfo &Info; const Expr *E; APValue &Result; const AccessKinds AccessKind; typedef bool result_type; bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { Result = Subobj; if (AccessKind == AK_ReadObjectRepresentation) return true; return CheckFullyInitialized(Info, E->getExprLoc(), SubobjType, Result); } bool found(APSInt &Value, QualType SubobjType) { Result = APValue(Value); return true; } bool found(APFloat &Value, QualType SubobjType) { Result = APValue(Value); return true; } }; } // end anonymous namespace /// Extract the designated sub-object of an rvalue. static bool extractSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, const SubobjectDesignator &Sub, APValue &Result, AccessKinds AK = AK_Read) { assert(AK == AK_Read || AK == AK_ReadObjectRepresentation); ExtractSubobjectHandler Handler = {Info, E, Result, AK}; return findSubobject(Info, E, Obj, Sub, Handler); } namespace { struct ModifySubobjectHandler { EvalInfo &Info; APValue &NewVal; const Expr *E; typedef bool result_type; static const AccessKinds AccessKind = AK_Assign; bool checkConst(QualType QT) { // Assigning to a const object has undefined behavior. if (QT.isConstQualified()) { Info.FFDiag(E, diag::note_constexpr_modify_const_type) << QT; return false; } return true; } bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { if (!checkConst(SubobjType)) return false; // We've been given ownership of NewVal, so just swap it in. Subobj.swap(NewVal); return true; } bool found(APSInt &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; if (!NewVal.isInt()) { // Maybe trying to write a cast pointer value into a complex? Info.FFDiag(E); return false; } Value = NewVal.getInt(); return true; } bool found(APFloat &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; Value = NewVal.getFloat(); return true; } }; } // end anonymous namespace const AccessKinds ModifySubobjectHandler::AccessKind; /// Update the designated sub-object of an rvalue to the given value. static bool modifySubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, const SubobjectDesignator &Sub, APValue &NewVal) { ModifySubobjectHandler Handler = { Info, NewVal, E }; return findSubobject(Info, E, Obj, Sub, Handler); } /// Find the position where two subobject designators diverge, or equivalently /// the length of the common initial subsequence. static unsigned FindDesignatorMismatch(QualType ObjType, const SubobjectDesignator &A, const SubobjectDesignator &B, bool &WasArrayIndex) { unsigned I = 0, N = std::min(A.Entries.size(), B.Entries.size()); for (/**/; I != N; ++I) { if (!ObjType.isNull() && (ObjType->isArrayType() || ObjType->isAnyComplexType())) { // Next subobject is an array element. if (A.Entries[I].getAsArrayIndex() != B.Entries[I].getAsArrayIndex()) { WasArrayIndex = true; return I; } if (ObjType->isAnyComplexType()) ObjType = ObjType->castAs()->getElementType(); else ObjType = ObjType->castAsArrayTypeUnsafe()->getElementType(); } else { if (A.Entries[I].getAsBaseOrMember() != B.Entries[I].getAsBaseOrMember()) { WasArrayIndex = false; return I; } if (const FieldDecl *FD = getAsField(A.Entries[I])) // Next subobject is a field. ObjType = FD->getType(); else // Next subobject is a base class. ObjType = QualType(); } } WasArrayIndex = false; return I; } /// Determine whether the given subobject designators refer to elements of the /// same array object. static bool AreElementsOfSameArray(QualType ObjType, const SubobjectDesignator &A, const SubobjectDesignator &B) { if (A.Entries.size() != B.Entries.size()) return false; bool IsArray = A.MostDerivedIsArrayElement; if (IsArray && A.MostDerivedPathLength != A.Entries.size()) // A is a subobject of the array element. return false; // If A (and B) designates an array element, the last entry will be the array // index. That doesn't have to match. Otherwise, we're in the 'implicit array // of length 1' case, and the entire path must match. bool WasArrayIndex; unsigned CommonLength = FindDesignatorMismatch(ObjType, A, B, WasArrayIndex); return CommonLength >= A.Entries.size() - IsArray; } /// Find the complete object to which an LValue refers. static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E, AccessKinds AK, const LValue &LVal, QualType LValType) { if (LVal.InvalidBase) { Info.FFDiag(E); return CompleteObject(); } if (!LVal.Base) { Info.FFDiag(E, diag::note_constexpr_access_null) << AK; return CompleteObject(); } CallStackFrame *Frame = nullptr; unsigned Depth = 0; if (LVal.getLValueCallIndex()) { std::tie(Frame, Depth) = Info.getCallFrameAndDepth(LVal.getLValueCallIndex()); if (!Frame) { Info.FFDiag(E, diag::note_constexpr_lifetime_ended, 1) << AK << LVal.Base.is(); NoteLValueLocation(Info, LVal.Base); return CompleteObject(); } } bool IsAccess = isAnyAccess(AK); // C++11 DR1311: An lvalue-to-rvalue conversion on a volatile-qualified type // is not a constant expression (even if the object is non-volatile). We also // apply this rule to C++98, in order to conform to the expected 'volatile' // semantics. if (isFormalAccess(AK) && LValType.isVolatileQualified()) { if (Info.getLangOpts().CPlusPlus) Info.FFDiag(E, diag::note_constexpr_access_volatile_type) << AK << LValType; else Info.FFDiag(E); return CompleteObject(); } // Compute value storage location and type of base object. APValue *BaseVal = nullptr; QualType BaseType = getType(LVal.Base); if (const ConstantExpr *CE = dyn_cast_or_null(LVal.Base.dyn_cast())) { /// Nested immediate invocation have been previously removed so if we found /// a ConstantExpr it can only be the EvaluatingDecl. assert(CE->isImmediateInvocation() && CE == Info.EvaluatingDecl); (void)CE; BaseVal = Info.EvaluatingDeclValue; } else if (const ValueDecl *D = LVal.Base.dyn_cast()) { // Allow reading from a GUID declaration. if (auto *GD = dyn_cast(D)) { if (isModification(AK)) { // All the remaining cases do not permit modification of the object. Info.FFDiag(E, diag::note_constexpr_modify_global); return CompleteObject(); } APValue &V = GD->getAsAPValue(); if (V.isAbsent()) { Info.FFDiag(E, diag::note_constexpr_unsupported_layout) << GD->getType(); return CompleteObject(); } return CompleteObject(LVal.Base, &V, GD->getType()); } // In C++98, const, non-volatile integers initialized with ICEs are ICEs. // In C++11, constexpr, non-volatile variables initialized with constant // expressions are constant expressions too. Inside constexpr functions, // parameters are constant expressions even if they're non-const. // In C++1y, objects local to a constant expression (those with a Frame) are // both readable and writable inside constant expressions. // In C, such things can also be folded, although they are not ICEs. const VarDecl *VD = dyn_cast(D); if (VD) { if (const VarDecl *VDef = VD->getDefinition(Info.Ctx)) VD = VDef; } if (!VD || VD->isInvalidDecl()) { Info.FFDiag(E); return CompleteObject(); } // In OpenCL if a variable is in constant address space it is a const value. bool IsConstant = BaseType.isConstQualified() || (Info.getLangOpts().OpenCL && BaseType.getAddressSpace() == LangAS::opencl_constant); // Unless we're looking at a local variable or argument in a constexpr call, // the variable we're reading must be const. if (!Frame) { if (Info.getLangOpts().CPlusPlus14 && lifetimeStartedInEvaluation(Info, LVal.Base)) { // OK, we can read and modify an object if we're in the process of // evaluating its initializer, because its lifetime began in this // evaluation. } else if (isModification(AK)) { // All the remaining cases do not permit modification of the object. Info.FFDiag(E, diag::note_constexpr_modify_global); return CompleteObject(); } else if (VD->isConstexpr()) { // OK, we can read this variable. } else if (BaseType->isIntegralOrEnumerationType()) { // In OpenCL if a variable is in constant address space it is a const // value. if (!IsConstant) { if (!IsAccess) return CompleteObject(LVal.getLValueBase(), nullptr, BaseType); if (Info.getLangOpts().CPlusPlus) { Info.FFDiag(E, diag::note_constexpr_ltor_non_const_int, 1) << VD; Info.Note(VD->getLocation(), diag::note_declared_at); } else { Info.FFDiag(E); } return CompleteObject(); } } else if (!IsAccess) { return CompleteObject(LVal.getLValueBase(), nullptr, BaseType); } else if (IsConstant && Info.checkingPotentialConstantExpression() && BaseType->isLiteralType(Info.Ctx) && !VD->hasDefinition()) { // This variable might end up being constexpr. Don't diagnose it yet. } else if (IsConstant) { // Keep evaluating to see what we can do. In particular, we support // folding of const floating-point types, in order to make static const // data members of such types (supported as an extension) more useful. if (Info.getLangOpts().CPlusPlus) { Info.CCEDiag(E, Info.getLangOpts().CPlusPlus11 ? diag::note_constexpr_ltor_non_constexpr : diag::note_constexpr_ltor_non_integral, 1) << VD << BaseType; Info.Note(VD->getLocation(), diag::note_declared_at); } else { Info.CCEDiag(E); } } else { // Never allow reading a non-const value. if (Info.getLangOpts().CPlusPlus) { Info.FFDiag(E, Info.getLangOpts().CPlusPlus11 ? diag::note_constexpr_ltor_non_constexpr : diag::note_constexpr_ltor_non_integral, 1) << VD << BaseType; Info.Note(VD->getLocation(), diag::note_declared_at); } else { Info.FFDiag(E); } return CompleteObject(); } } if (!evaluateVarDeclInit(Info, E, VD, Frame, BaseVal, &LVal)) return CompleteObject(); } else if (DynamicAllocLValue DA = LVal.Base.dyn_cast()) { Optional Alloc = Info.lookupDynamicAlloc(DA); if (!Alloc) { Info.FFDiag(E, diag::note_constexpr_access_deleted_object) << AK; return CompleteObject(); } return CompleteObject(LVal.Base, &(*Alloc)->Value, LVal.Base.getDynamicAllocType()); } else { const Expr *Base = LVal.Base.dyn_cast(); if (!Frame) { if (const MaterializeTemporaryExpr *MTE = dyn_cast_or_null(Base)) { assert(MTE->getStorageDuration() == SD_Static && "should have a frame for a non-global materialized temporary"); // Per C++1y [expr.const]p2: // an lvalue-to-rvalue conversion [is not allowed unless it applies to] // - a [...] glvalue of integral or enumeration type that refers to // a non-volatile const object [...] // [...] // - a [...] glvalue of literal type that refers to a non-volatile // object whose lifetime began within the evaluation of e. // // C++11 misses the 'began within the evaluation of e' check and // instead allows all temporaries, including things like: // int &&r = 1; // int x = ++r; // constexpr int k = r; // Therefore we use the C++14 rules in C++11 too. // // Note that temporaries whose lifetimes began while evaluating a // variable's constructor are not usable while evaluating the // corresponding destructor, not even if they're of const-qualified // types. if (!(BaseType.isConstQualified() && BaseType->isIntegralOrEnumerationType()) && !lifetimeStartedInEvaluation(Info, LVal.Base)) { if (!IsAccess) return CompleteObject(LVal.getLValueBase(), nullptr, BaseType); Info.FFDiag(E, diag::note_constexpr_access_static_temporary, 1) << AK; Info.Note(MTE->getExprLoc(), diag::note_constexpr_temporary_here); return CompleteObject(); } BaseVal = MTE->getOrCreateValue(false); assert(BaseVal && "got reference to unevaluated temporary"); } else { if (!IsAccess) return CompleteObject(LVal.getLValueBase(), nullptr, BaseType); APValue Val; LVal.moveInto(Val); Info.FFDiag(E, diag::note_constexpr_access_unreadable_object) << AK << Val.getAsString(Info.Ctx, Info.Ctx.getLValueReferenceType(LValType)); NoteLValueLocation(Info, LVal.Base); return CompleteObject(); } } else { BaseVal = Frame->getTemporary(Base, LVal.Base.getVersion()); assert(BaseVal && "missing value for temporary"); } } // In C++14, we can't safely access any mutable state when we might be // evaluating after an unmodeled side effect. // // FIXME: Not all local state is mutable. Allow local constant subobjects // to be read here (but take care with 'mutable' fields). if ((Frame && Info.getLangOpts().CPlusPlus14 && Info.EvalStatus.HasSideEffects) || (isModification(AK) && Depth < Info.SpeculativeEvaluationDepth)) return CompleteObject(); return CompleteObject(LVal.getLValueBase(), BaseVal, BaseType); } /// Perform an lvalue-to-rvalue conversion on the given glvalue. This /// can also be used for 'lvalue-to-lvalue' conversions for looking up the /// glvalue referred to by an entity of reference type. /// /// \param Info - Information about the ongoing evaluation. /// \param Conv - The expression for which we are performing the conversion. /// Used for diagnostics. /// \param Type - The type of the glvalue (before stripping cv-qualifiers in the /// case of a non-class type). /// \param LVal - The glvalue on which we are attempting to perform this action. /// \param RVal - The produced value will be placed here. /// \param WantObjectRepresentation - If true, we're looking for the object /// representation rather than the value, and in particular, /// there is no requirement that the result be fully initialized. static bool handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, QualType Type, const LValue &LVal, APValue &RVal, bool WantObjectRepresentation = false) { if (LVal.Designator.Invalid) return false; // Check for special cases where there is no existing APValue to look at. const Expr *Base = LVal.Base.dyn_cast(); AccessKinds AK = WantObjectRepresentation ? AK_ReadObjectRepresentation : AK_Read; if (Base && !LVal.getLValueCallIndex() && !Type.isVolatileQualified()) { if (const CompoundLiteralExpr *CLE = dyn_cast(Base)) { // In C99, a CompoundLiteralExpr is an lvalue, and we defer evaluating the // initializer until now for such expressions. Such an expression can't be // an ICE in C, so this only matters for fold. if (Type.isVolatileQualified()) { Info.FFDiag(Conv); return false; } APValue Lit; if (!Evaluate(Lit, Info, CLE->getInitializer())) return false; CompleteObject LitObj(LVal.Base, &Lit, Base->getType()); return extractSubobject(Info, Conv, LitObj, LVal.Designator, RVal, AK); } else if (isa(Base) || isa(Base)) { // Special-case character extraction so we don't have to construct an // APValue for the whole string. assert(LVal.Designator.Entries.size() <= 1 && "Can only read characters from string literals"); if (LVal.Designator.Entries.empty()) { // Fail for now for LValue to RValue conversion of an array. // (This shouldn't show up in C/C++, but it could be triggered by a // weird EvaluateAsRValue call from a tool.) Info.FFDiag(Conv); return false; } if (LVal.Designator.isOnePastTheEnd()) { if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(Conv, diag::note_constexpr_access_past_end) << AK; else Info.FFDiag(Conv); return false; } uint64_t CharIndex = LVal.Designator.Entries[0].getAsArrayIndex(); RVal = APValue(extractStringLiteralCharacter(Info, Base, CharIndex)); return true; } } CompleteObject Obj = findCompleteObject(Info, Conv, AK, LVal, Type); return Obj && extractSubobject(Info, Conv, Obj, LVal.Designator, RVal, AK); } /// Perform an assignment of Val to LVal. Takes ownership of Val. static bool handleAssignment(EvalInfo &Info, const Expr *E, const LValue &LVal, QualType LValType, APValue &Val) { if (LVal.Designator.Invalid) return false; if (!Info.getLangOpts().CPlusPlus14) { Info.FFDiag(E); return false; } CompleteObject Obj = findCompleteObject(Info, E, AK_Assign, LVal, LValType); return Obj && modifySubobject(Info, E, Obj, LVal.Designator, Val); } namespace { struct CompoundAssignSubobjectHandler { EvalInfo &Info; const Expr *E; QualType PromotedLHSType; BinaryOperatorKind Opcode; const APValue &RHS; static const AccessKinds AccessKind = AK_Assign; typedef bool result_type; bool checkConst(QualType QT) { // Assigning to a const object has undefined behavior. if (QT.isConstQualified()) { Info.FFDiag(E, diag::note_constexpr_modify_const_type) << QT; return false; } return true; } bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { switch (Subobj.getKind()) { case APValue::Int: return found(Subobj.getInt(), SubobjType); case APValue::Float: return found(Subobj.getFloat(), SubobjType); case APValue::ComplexInt: case APValue::ComplexFloat: // FIXME: Implement complex compound assignment. Info.FFDiag(E); return false; case APValue::LValue: return foundPointer(Subobj, SubobjType); case APValue::Vector: return foundVector(Subobj, SubobjType); default: // FIXME: can this happen? Info.FFDiag(E); return false; } } bool foundVector(APValue &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; if (!SubobjType->isVectorType()) { Info.FFDiag(E); return false; } return handleVectorVectorBinOp(Info, E, Opcode, Value, RHS); } bool found(APSInt &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; if (!SubobjType->isIntegerType()) { // We don't support compound assignment on integer-cast-to-pointer // values. Info.FFDiag(E); return false; } if (RHS.isInt()) { APSInt LHS = HandleIntToIntCast(Info, E, PromotedLHSType, SubobjType, Value); if (!handleIntIntBinOp(Info, E, LHS, Opcode, RHS.getInt(), LHS)) return false; Value = HandleIntToIntCast(Info, E, SubobjType, PromotedLHSType, LHS); return true; } else if (RHS.isFloat()) { APFloat FValue(0.0); return HandleIntToFloatCast(Info, E, SubobjType, Value, PromotedLHSType, FValue) && handleFloatFloatBinOp(Info, E, FValue, Opcode, RHS.getFloat()) && HandleFloatToIntCast(Info, E, PromotedLHSType, FValue, SubobjType, Value); } Info.FFDiag(E); return false; } bool found(APFloat &Value, QualType SubobjType) { return checkConst(SubobjType) && HandleFloatToFloatCast(Info, E, SubobjType, PromotedLHSType, Value) && handleFloatFloatBinOp(Info, E, Value, Opcode, RHS.getFloat()) && HandleFloatToFloatCast(Info, E, PromotedLHSType, SubobjType, Value); } bool foundPointer(APValue &Subobj, QualType SubobjType) { if (!checkConst(SubobjType)) return false; QualType PointeeType; if (const PointerType *PT = SubobjType->getAs()) PointeeType = PT->getPointeeType(); if (PointeeType.isNull() || !RHS.isInt() || (Opcode != BO_Add && Opcode != BO_Sub)) { Info.FFDiag(E); return false; } APSInt Offset = RHS.getInt(); if (Opcode == BO_Sub) negateAsSigned(Offset); LValue LVal; LVal.setFrom(Info.Ctx, Subobj); if (!HandleLValueArrayAdjustment(Info, E, LVal, PointeeType, Offset)) return false; LVal.moveInto(Subobj); return true; } }; } // end anonymous namespace const AccessKinds CompoundAssignSubobjectHandler::AccessKind; /// Perform a compound assignment of LVal = RVal. static bool handleCompoundAssignment( EvalInfo &Info, const Expr *E, const LValue &LVal, QualType LValType, QualType PromotedLValType, BinaryOperatorKind Opcode, const APValue &RVal) { if (LVal.Designator.Invalid) return false; if (!Info.getLangOpts().CPlusPlus14) { Info.FFDiag(E); return false; } CompleteObject Obj = findCompleteObject(Info, E, AK_Assign, LVal, LValType); CompoundAssignSubobjectHandler Handler = { Info, E, PromotedLValType, Opcode, RVal }; return Obj && findSubobject(Info, E, Obj, LVal.Designator, Handler); } namespace { struct IncDecSubobjectHandler { EvalInfo &Info; const UnaryOperator *E; AccessKinds AccessKind; APValue *Old; typedef bool result_type; bool checkConst(QualType QT) { // Assigning to a const object has undefined behavior. if (QT.isConstQualified()) { Info.FFDiag(E, diag::note_constexpr_modify_const_type) << QT; return false; } return true; } bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { // Stash the old value. Also clear Old, so we don't clobber it later // if we're post-incrementing a complex. if (Old) { *Old = Subobj; Old = nullptr; } switch (Subobj.getKind()) { case APValue::Int: return found(Subobj.getInt(), SubobjType); case APValue::Float: return found(Subobj.getFloat(), SubobjType); case APValue::ComplexInt: return found(Subobj.getComplexIntReal(), SubobjType->castAs()->getElementType() .withCVRQualifiers(SubobjType.getCVRQualifiers())); case APValue::ComplexFloat: return found(Subobj.getComplexFloatReal(), SubobjType->castAs()->getElementType() .withCVRQualifiers(SubobjType.getCVRQualifiers())); case APValue::LValue: return foundPointer(Subobj, SubobjType); default: // FIXME: can this happen? Info.FFDiag(E); return false; } } bool found(APSInt &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; if (!SubobjType->isIntegerType()) { // We don't support increment / decrement on integer-cast-to-pointer // values. Info.FFDiag(E); return false; } if (Old) *Old = APValue(Value); // bool arithmetic promotes to int, and the conversion back to bool // doesn't reduce mod 2^n, so special-case it. if (SubobjType->isBooleanType()) { if (AccessKind == AK_Increment) Value = 1; else Value = !Value; return true; } bool WasNegative = Value.isNegative(); if (AccessKind == AK_Increment) { ++Value; if (!WasNegative && Value.isNegative() && E->canOverflow()) { APSInt ActualValue(Value, /*IsUnsigned*/true); return HandleOverflow(Info, E, ActualValue, SubobjType); } } else { --Value; if (WasNegative && !Value.isNegative() && E->canOverflow()) { unsigned BitWidth = Value.getBitWidth(); APSInt ActualValue(Value.sext(BitWidth + 1), /*IsUnsigned*/false); ActualValue.setBit(BitWidth); return HandleOverflow(Info, E, ActualValue, SubobjType); } } return true; } bool found(APFloat &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; if (Old) *Old = APValue(Value); APFloat One(Value.getSemantics(), 1); if (AccessKind == AK_Increment) Value.add(One, APFloat::rmNearestTiesToEven); else Value.subtract(One, APFloat::rmNearestTiesToEven); return true; } bool foundPointer(APValue &Subobj, QualType SubobjType) { if (!checkConst(SubobjType)) return false; QualType PointeeType; if (const PointerType *PT = SubobjType->getAs()) PointeeType = PT->getPointeeType(); else { Info.FFDiag(E); return false; } LValue LVal; LVal.setFrom(Info.Ctx, Subobj); if (!HandleLValueArrayAdjustment(Info, E, LVal, PointeeType, AccessKind == AK_Increment ? 1 : -1)) return false; LVal.moveInto(Subobj); return true; } }; } // end anonymous namespace /// Perform an increment or decrement on LVal. static bool handleIncDec(EvalInfo &Info, const Expr *E, const LValue &LVal, QualType LValType, bool IsIncrement, APValue *Old) { if (LVal.Designator.Invalid) return false; if (!Info.getLangOpts().CPlusPlus14) { Info.FFDiag(E); return false; } AccessKinds AK = IsIncrement ? AK_Increment : AK_Decrement; CompleteObject Obj = findCompleteObject(Info, E, AK, LVal, LValType); IncDecSubobjectHandler Handler = {Info, cast(E), AK, Old}; return Obj && findSubobject(Info, E, Obj, LVal.Designator, Handler); } /// Build an lvalue for the object argument of a member function call. static bool EvaluateObjectArgument(EvalInfo &Info, const Expr *Object, LValue &This) { if (Object->getType()->isPointerType() && Object->isRValue()) return EvaluatePointer(Object, This, Info); if (Object->isGLValue()) return EvaluateLValue(Object, This, Info); if (Object->getType()->isLiteralType(Info.Ctx)) return EvaluateTemporary(Object, This, Info); Info.FFDiag(Object, diag::note_constexpr_nonliteral) << Object->getType(); return false; } /// HandleMemberPointerAccess - Evaluate a member access operation and build an /// lvalue referring to the result. /// /// \param Info - Information about the ongoing evaluation. /// \param LV - An lvalue referring to the base of the member pointer. /// \param RHS - The member pointer expression. /// \param IncludeMember - Specifies whether the member itself is included in /// the resulting LValue subobject designator. This is not possible when /// creating a bound member function. /// \return The field or method declaration to which the member pointer refers, /// or 0 if evaluation fails. static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info, QualType LVType, LValue &LV, const Expr *RHS, bool IncludeMember = true) { MemberPtr MemPtr; if (!EvaluateMemberPointer(RHS, MemPtr, Info)) return nullptr; // C++11 [expr.mptr.oper]p6: If the second operand is the null pointer to // member value, the behavior is undefined. if (!MemPtr.getDecl()) { // FIXME: Specific diagnostic. Info.FFDiag(RHS); return nullptr; } if (MemPtr.isDerivedMember()) { // This is a member of some derived class. Truncate LV appropriately. // The end of the derived-to-base path for the base object must match the // derived-to-base path for the member pointer. if (LV.Designator.MostDerivedPathLength + MemPtr.Path.size() > LV.Designator.Entries.size()) { Info.FFDiag(RHS); return nullptr; } unsigned PathLengthToMember = LV.Designator.Entries.size() - MemPtr.Path.size(); for (unsigned I = 0, N = MemPtr.Path.size(); I != N; ++I) { const CXXRecordDecl *LVDecl = getAsBaseClass( LV.Designator.Entries[PathLengthToMember + I]); const CXXRecordDecl *MPDecl = MemPtr.Path[I]; if (LVDecl->getCanonicalDecl() != MPDecl->getCanonicalDecl()) { Info.FFDiag(RHS); return nullptr; } } // Truncate the lvalue to the appropriate derived class. if (!CastToDerivedClass(Info, RHS, LV, MemPtr.getContainingRecord(), PathLengthToMember)) return nullptr; } else if (!MemPtr.Path.empty()) { // Extend the LValue path with the member pointer's path. LV.Designator.Entries.reserve(LV.Designator.Entries.size() + MemPtr.Path.size() + IncludeMember); // Walk down to the appropriate base class. if (const PointerType *PT = LVType->getAs()) LVType = PT->getPointeeType(); const CXXRecordDecl *RD = LVType->getAsCXXRecordDecl(); assert(RD && "member pointer access on non-class-type expression"); // The first class in the path is that of the lvalue. for (unsigned I = 1, N = MemPtr.Path.size(); I != N; ++I) { const CXXRecordDecl *Base = MemPtr.Path[N - I - 1]; if (!HandleLValueDirectBase(Info, RHS, LV, RD, Base)) return nullptr; RD = Base; } // Finally cast to the class containing the member. if (!HandleLValueDirectBase(Info, RHS, LV, RD, MemPtr.getContainingRecord())) return nullptr; } // Add the member. Note that we cannot build bound member functions here. if (IncludeMember) { if (const FieldDecl *FD = dyn_cast(MemPtr.getDecl())) { if (!HandleLValueMember(Info, RHS, LV, FD)) return nullptr; } else if (const IndirectFieldDecl *IFD = dyn_cast(MemPtr.getDecl())) { if (!HandleLValueIndirectMember(Info, RHS, LV, IFD)) return nullptr; } else { llvm_unreachable("can't construct reference to bound member function"); } } return MemPtr.getDecl(); } static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info, const BinaryOperator *BO, LValue &LV, bool IncludeMember = true) { assert(BO->getOpcode() == BO_PtrMemD || BO->getOpcode() == BO_PtrMemI); if (!EvaluateObjectArgument(Info, BO->getLHS(), LV)) { if (Info.noteFailure()) { MemberPtr MemPtr; EvaluateMemberPointer(BO->getRHS(), MemPtr, Info); } return nullptr; } return HandleMemberPointerAccess(Info, BO->getLHS()->getType(), LV, BO->getRHS(), IncludeMember); } /// HandleBaseToDerivedCast - Apply the given base-to-derived cast operation on /// the provided lvalue, which currently refers to the base object. static bool HandleBaseToDerivedCast(EvalInfo &Info, const CastExpr *E, LValue &Result) { SubobjectDesignator &D = Result.Designator; if (D.Invalid || !Result.checkNullPointer(Info, E, CSK_Derived)) return false; QualType TargetQT = E->getType(); if (const PointerType *PT = TargetQT->getAs()) TargetQT = PT->getPointeeType(); // Check this cast lands within the final derived-to-base subobject path. if (D.MostDerivedPathLength + E->path_size() > D.Entries.size()) { Info.CCEDiag(E, diag::note_constexpr_invalid_downcast) << D.MostDerivedType << TargetQT; return false; } // Check the type of the final cast. We don't need to check the path, // since a cast can only be formed if the path is unique. unsigned NewEntriesSize = D.Entries.size() - E->path_size(); const CXXRecordDecl *TargetType = TargetQT->getAsCXXRecordDecl(); const CXXRecordDecl *FinalType; if (NewEntriesSize == D.MostDerivedPathLength) FinalType = D.MostDerivedType->getAsCXXRecordDecl(); else FinalType = getAsBaseClass(D.Entries[NewEntriesSize - 1]); if (FinalType->getCanonicalDecl() != TargetType->getCanonicalDecl()) { Info.CCEDiag(E, diag::note_constexpr_invalid_downcast) << D.MostDerivedType << TargetQT; return false; } // Truncate the lvalue to the appropriate derived class. return CastToDerivedClass(Info, E, Result, TargetType, NewEntriesSize); } /// Get the value to use for a default-initialized object of type T. /// Return false if it encounters something invalid. static bool getDefaultInitValue(QualType T, APValue &Result) { bool Success = true; if (auto *RD = T->getAsCXXRecordDecl()) { if (RD->isInvalidDecl()) { Result = APValue(); return false; } if (RD->isUnion()) { Result = APValue((const FieldDecl *)nullptr); return true; } Result = APValue(APValue::UninitStruct(), RD->getNumBases(), std::distance(RD->field_begin(), RD->field_end())); unsigned Index = 0; for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(), End = RD->bases_end(); I != End; ++I, ++Index) Success &= getDefaultInitValue(I->getType(), Result.getStructBase(Index)); for (const auto *I : RD->fields()) { if (I->isUnnamedBitfield()) continue; Success &= getDefaultInitValue(I->getType(), Result.getStructField(I->getFieldIndex())); } return Success; } if (auto *AT = dyn_cast_or_null(T->getAsArrayTypeUnsafe())) { Result = APValue(APValue::UninitArray(), 0, AT->getSize().getZExtValue()); if (Result.hasArrayFiller()) Success &= getDefaultInitValue(AT->getElementType(), Result.getArrayFiller()); return Success; } Result = APValue::IndeterminateValue(); return true; } namespace { enum EvalStmtResult { /// Evaluation failed. ESR_Failed, /// Hit a 'return' statement. ESR_Returned, /// Evaluation succeeded. ESR_Succeeded, /// Hit a 'continue' statement. ESR_Continue, /// Hit a 'break' statement. ESR_Break, /// Still scanning for 'case' or 'default' statement. ESR_CaseNotFound }; } static bool EvaluateVarDecl(EvalInfo &Info, const VarDecl *VD) { // We don't need to evaluate the initializer for a static local. if (!VD->hasLocalStorage()) return true; LValue Result; APValue &Val = Info.CurrentCall->createTemporary(VD, VD->getType(), true, Result); const Expr *InitE = VD->getInit(); if (!InitE) return getDefaultInitValue(VD->getType(), Val); if (InitE->isValueDependent()) return false; if (!EvaluateInPlace(Val, Info, Result, InitE)) { // Wipe out any partially-computed value, to allow tracking that this // evaluation failed. Val = APValue(); return false; } return true; } static bool EvaluateDecl(EvalInfo &Info, const Decl *D) { bool OK = true; if (const VarDecl *VD = dyn_cast(D)) OK &= EvaluateVarDecl(Info, VD); if (const DecompositionDecl *DD = dyn_cast(D)) for (auto *BD : DD->bindings()) if (auto *VD = BD->getHoldingVar()) OK &= EvaluateDecl(Info, VD); return OK; } /// Evaluate a condition (either a variable declaration or an expression). static bool EvaluateCond(EvalInfo &Info, const VarDecl *CondDecl, const Expr *Cond, bool &Result) { FullExpressionRAII Scope(Info); if (CondDecl && !EvaluateDecl(Info, CondDecl)) return false; if (!EvaluateAsBooleanCondition(Cond, Result, Info)) return false; return Scope.destroy(); } namespace { /// A location where the result (returned value) of evaluating a /// statement should be stored. struct StmtResult { /// The APValue that should be filled in with the returned value. APValue &Value; /// The location containing the result, if any (used to support RVO). const LValue *Slot; }; struct TempVersionRAII { CallStackFrame &Frame; TempVersionRAII(CallStackFrame &Frame) : Frame(Frame) { Frame.pushTempVersion(); } ~TempVersionRAII() { Frame.popTempVersion(); } }; } static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, const Stmt *S, const SwitchCase *SC = nullptr); /// Evaluate the body of a loop, and translate the result as appropriate. static EvalStmtResult EvaluateLoopBody(StmtResult &Result, EvalInfo &Info, const Stmt *Body, const SwitchCase *Case = nullptr) { BlockScopeRAII Scope(Info); EvalStmtResult ESR = EvaluateStmt(Result, Info, Body, Case); if (ESR != ESR_Failed && ESR != ESR_CaseNotFound && !Scope.destroy()) ESR = ESR_Failed; switch (ESR) { case ESR_Break: return ESR_Succeeded; case ESR_Succeeded: case ESR_Continue: return ESR_Continue; case ESR_Failed: case ESR_Returned: case ESR_CaseNotFound: return ESR; } llvm_unreachable("Invalid EvalStmtResult!"); } /// Evaluate a switch statement. static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info, const SwitchStmt *SS) { BlockScopeRAII Scope(Info); // Evaluate the switch condition. APSInt Value; { if (const Stmt *Init = SS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, Init); if (ESR != ESR_Succeeded) { if (ESR != ESR_Failed && !Scope.destroy()) ESR = ESR_Failed; return ESR; } } FullExpressionRAII CondScope(Info); if (SS->getConditionVariable() && !EvaluateDecl(Info, SS->getConditionVariable())) return ESR_Failed; if (!EvaluateInteger(SS->getCond(), Value, Info)) return ESR_Failed; if (!CondScope.destroy()) return ESR_Failed; } // Find the switch case corresponding to the value of the condition. // FIXME: Cache this lookup. const SwitchCase *Found = nullptr; for (const SwitchCase *SC = SS->getSwitchCaseList(); SC; SC = SC->getNextSwitchCase()) { if (isa(SC)) { Found = SC; continue; } const CaseStmt *CS = cast(SC); APSInt LHS = CS->getLHS()->EvaluateKnownConstInt(Info.Ctx); APSInt RHS = CS->getRHS() ? CS->getRHS()->EvaluateKnownConstInt(Info.Ctx) : LHS; if (LHS <= Value && Value <= RHS) { Found = SC; break; } } if (!Found) return Scope.destroy() ? ESR_Succeeded : ESR_Failed; // Search the switch body for the switch case and evaluate it from there. EvalStmtResult ESR = EvaluateStmt(Result, Info, SS->getBody(), Found); if (ESR != ESR_Failed && ESR != ESR_CaseNotFound && !Scope.destroy()) return ESR_Failed; switch (ESR) { case ESR_Break: return ESR_Succeeded; case ESR_Succeeded: case ESR_Continue: case ESR_Failed: case ESR_Returned: return ESR; case ESR_CaseNotFound: // This can only happen if the switch case is nested within a statement // expression. We have no intention of supporting that. Info.FFDiag(Found->getBeginLoc(), diag::note_constexpr_stmt_expr_unsupported); return ESR_Failed; } llvm_unreachable("Invalid EvalStmtResult!"); } // Evaluate a statement. static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, const Stmt *S, const SwitchCase *Case) { if (!Info.nextStep(S)) return ESR_Failed; // If we're hunting down a 'case' or 'default' label, recurse through // substatements until we hit the label. if (Case) { switch (S->getStmtClass()) { case Stmt::CompoundStmtClass: // FIXME: Precompute which substatement of a compound statement we // would jump to, and go straight there rather than performing a // linear scan each time. case Stmt::LabelStmtClass: case Stmt::AttributedStmtClass: case Stmt::DoStmtClass: break; case Stmt::CaseStmtClass: case Stmt::DefaultStmtClass: if (Case == S) Case = nullptr; break; case Stmt::IfStmtClass: { // FIXME: Precompute which side of an 'if' we would jump to, and go // straight there rather than scanning both sides. const IfStmt *IS = cast(S); // Wrap the evaluation in a block scope, in case it's a DeclStmt // preceded by our switch label. BlockScopeRAII Scope(Info); // Step into the init statement in case it brings an (uninitialized) // variable into scope. if (const Stmt *Init = IS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, Init, Case); if (ESR != ESR_CaseNotFound) { assert(ESR != ESR_Succeeded); return ESR; } } // Condition variable must be initialized if it exists. // FIXME: We can skip evaluating the body if there's a condition // variable, as there can't be any case labels within it. // (The same is true for 'for' statements.) EvalStmtResult ESR = EvaluateStmt(Result, Info, IS->getThen(), Case); if (ESR == ESR_Failed) return ESR; if (ESR != ESR_CaseNotFound) return Scope.destroy() ? ESR : ESR_Failed; if (!IS->getElse()) return ESR_CaseNotFound; ESR = EvaluateStmt(Result, Info, IS->getElse(), Case); if (ESR == ESR_Failed) return ESR; if (ESR != ESR_CaseNotFound) return Scope.destroy() ? ESR : ESR_Failed; return ESR_CaseNotFound; } case Stmt::WhileStmtClass: { EvalStmtResult ESR = EvaluateLoopBody(Result, Info, cast(S)->getBody(), Case); if (ESR != ESR_Continue) return ESR; break; } case Stmt::ForStmtClass: { const ForStmt *FS = cast(S); BlockScopeRAII Scope(Info); // Step into the init statement in case it brings an (uninitialized) // variable into scope. if (const Stmt *Init = FS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, Init, Case); if (ESR != ESR_CaseNotFound) { assert(ESR != ESR_Succeeded); return ESR; } } EvalStmtResult ESR = EvaluateLoopBody(Result, Info, FS->getBody(), Case); if (ESR != ESR_Continue) return ESR; if (FS->getInc()) { FullExpressionRAII IncScope(Info); if (!EvaluateIgnoredValue(Info, FS->getInc()) || !IncScope.destroy()) return ESR_Failed; } break; } case Stmt::DeclStmtClass: { // Start the lifetime of any uninitialized variables we encounter. They // might be used by the selected branch of the switch. const DeclStmt *DS = cast(S); for (const auto *D : DS->decls()) { if (const auto *VD = dyn_cast(D)) { if (VD->hasLocalStorage() && !VD->getInit()) if (!EvaluateVarDecl(Info, VD)) return ESR_Failed; // FIXME: If the variable has initialization that can't be jumped // over, bail out of any immediately-surrounding compound-statement // too. There can't be any case labels here. } } return ESR_CaseNotFound; } default: return ESR_CaseNotFound; } } switch (S->getStmtClass()) { default: if (const Expr *E = dyn_cast(S)) { // Don't bother evaluating beyond an expression-statement which couldn't // be evaluated. // FIXME: Do we need the FullExpressionRAII object here? // VisitExprWithCleanups should create one when necessary. FullExpressionRAII Scope(Info); if (!EvaluateIgnoredValue(Info, E) || !Scope.destroy()) return ESR_Failed; return ESR_Succeeded; } Info.FFDiag(S->getBeginLoc()); return ESR_Failed; case Stmt::NullStmtClass: return ESR_Succeeded; case Stmt::DeclStmtClass: { const DeclStmt *DS = cast(S); for (const auto *D : DS->decls()) { // Each declaration initialization is its own full-expression. FullExpressionRAII Scope(Info); if (!EvaluateDecl(Info, D) && !Info.noteFailure()) return ESR_Failed; if (!Scope.destroy()) return ESR_Failed; } return ESR_Succeeded; } case Stmt::ReturnStmtClass: { const Expr *RetExpr = cast(S)->getRetValue(); FullExpressionRAII Scope(Info); if (RetExpr && !(Result.Slot ? EvaluateInPlace(Result.Value, Info, *Result.Slot, RetExpr) : Evaluate(Result.Value, Info, RetExpr))) return ESR_Failed; return Scope.destroy() ? ESR_Returned : ESR_Failed; } case Stmt::CompoundStmtClass: { BlockScopeRAII Scope(Info); const CompoundStmt *CS = cast(S); for (const auto *BI : CS->body()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, BI, Case); if (ESR == ESR_Succeeded) Case = nullptr; else if (ESR != ESR_CaseNotFound) { if (ESR != ESR_Failed && !Scope.destroy()) return ESR_Failed; return ESR; } } if (Case) return ESR_CaseNotFound; return Scope.destroy() ? ESR_Succeeded : ESR_Failed; } case Stmt::IfStmtClass: { const IfStmt *IS = cast(S); // Evaluate the condition, as either a var decl or as an expression. BlockScopeRAII Scope(Info); if (const Stmt *Init = IS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, Init); if (ESR != ESR_Succeeded) { if (ESR != ESR_Failed && !Scope.destroy()) return ESR_Failed; return ESR; } } bool Cond; if (!EvaluateCond(Info, IS->getConditionVariable(), IS->getCond(), Cond)) return ESR_Failed; if (const Stmt *SubStmt = Cond ? IS->getThen() : IS->getElse()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, SubStmt); if (ESR != ESR_Succeeded) { if (ESR != ESR_Failed && !Scope.destroy()) return ESR_Failed; return ESR; } } return Scope.destroy() ? ESR_Succeeded : ESR_Failed; } case Stmt::WhileStmtClass: { const WhileStmt *WS = cast(S); while (true) { BlockScopeRAII Scope(Info); bool Continue; if (!EvaluateCond(Info, WS->getConditionVariable(), WS->getCond(), Continue)) return ESR_Failed; if (!Continue) break; EvalStmtResult ESR = EvaluateLoopBody(Result, Info, WS->getBody()); if (ESR != ESR_Continue) { if (ESR != ESR_Failed && !Scope.destroy()) return ESR_Failed; return ESR; } if (!Scope.destroy()) return ESR_Failed; } return ESR_Succeeded; } case Stmt::DoStmtClass: { const DoStmt *DS = cast(S); bool Continue; do { EvalStmtResult ESR = EvaluateLoopBody(Result, Info, DS->getBody(), Case); if (ESR != ESR_Continue) return ESR; Case = nullptr; FullExpressionRAII CondScope(Info); if (!EvaluateAsBooleanCondition(DS->getCond(), Continue, Info) || !CondScope.destroy()) return ESR_Failed; } while (Continue); return ESR_Succeeded; } case Stmt::ForStmtClass: { const ForStmt *FS = cast(S); BlockScopeRAII ForScope(Info); if (FS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, FS->getInit()); if (ESR != ESR_Succeeded) { if (ESR != ESR_Failed && !ForScope.destroy()) return ESR_Failed; return ESR; } } while (true) { BlockScopeRAII IterScope(Info); bool Continue = true; if (FS->getCond() && !EvaluateCond(Info, FS->getConditionVariable(), FS->getCond(), Continue)) return ESR_Failed; if (!Continue) break; EvalStmtResult ESR = EvaluateLoopBody(Result, Info, FS->getBody()); if (ESR != ESR_Continue) { if (ESR != ESR_Failed && (!IterScope.destroy() || !ForScope.destroy())) return ESR_Failed; return ESR; } if (FS->getInc()) { FullExpressionRAII IncScope(Info); if (!EvaluateIgnoredValue(Info, FS->getInc()) || !IncScope.destroy()) return ESR_Failed; } if (!IterScope.destroy()) return ESR_Failed; } return ForScope.destroy() ? ESR_Succeeded : ESR_Failed; } case Stmt::CXXForRangeStmtClass: { const CXXForRangeStmt *FS = cast(S); BlockScopeRAII Scope(Info); // Evaluate the init-statement if present. if (FS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, FS->getInit()); if (ESR != ESR_Succeeded) { if (ESR != ESR_Failed && !Scope.destroy()) return ESR_Failed; return ESR; } } // Initialize the __range variable. EvalStmtResult ESR = EvaluateStmt(Result, Info, FS->getRangeStmt()); if (ESR != ESR_Succeeded) { if (ESR != ESR_Failed && !Scope.destroy()) return ESR_Failed; return ESR; } // Create the __begin and __end iterators. ESR = EvaluateStmt(Result, Info, FS->getBeginStmt()); if (ESR != ESR_Succeeded) { if (ESR != ESR_Failed && !Scope.destroy()) return ESR_Failed; return ESR; } ESR = EvaluateStmt(Result, Info, FS->getEndStmt()); if (ESR != ESR_Succeeded) { if (ESR != ESR_Failed && !Scope.destroy()) return ESR_Failed; return ESR; } while (true) { // Condition: __begin != __end. { bool Continue = true; FullExpressionRAII CondExpr(Info); if (!EvaluateAsBooleanCondition(FS->getCond(), Continue, Info)) return ESR_Failed; if (!Continue) break; } // User's variable declaration, initialized by *__begin. BlockScopeRAII InnerScope(Info); ESR = EvaluateStmt(Result, Info, FS->getLoopVarStmt()); if (ESR != ESR_Succeeded) { if (ESR != ESR_Failed && (!InnerScope.destroy() || !Scope.destroy())) return ESR_Failed; return ESR; } // Loop body. ESR = EvaluateLoopBody(Result, Info, FS->getBody()); if (ESR != ESR_Continue) { if (ESR != ESR_Failed && (!InnerScope.destroy() || !Scope.destroy())) return ESR_Failed; return ESR; } // Increment: ++__begin if (!EvaluateIgnoredValue(Info, FS->getInc())) return ESR_Failed; if (!InnerScope.destroy()) return ESR_Failed; } return Scope.destroy() ? ESR_Succeeded : ESR_Failed; } case Stmt::SwitchStmtClass: return EvaluateSwitch(Result, Info, cast(S)); case Stmt::ContinueStmtClass: return ESR_Continue; case Stmt::BreakStmtClass: return ESR_Break; case Stmt::LabelStmtClass: return EvaluateStmt(Result, Info, cast(S)->getSubStmt(), Case); case Stmt::AttributedStmtClass: // As a general principle, C++11 attributes can be ignored without // any semantic impact. return EvaluateStmt(Result, Info, cast(S)->getSubStmt(), Case); case Stmt::CaseStmtClass: case Stmt::DefaultStmtClass: return EvaluateStmt(Result, Info, cast(S)->getSubStmt(), Case); case Stmt::CXXTryStmtClass: // Evaluate try blocks by evaluating all sub statements. return EvaluateStmt(Result, Info, cast(S)->getTryBlock(), Case); } } /// CheckTrivialDefaultConstructor - Check whether a constructor is a trivial /// default constructor. If so, we'll fold it whether or not it's marked as /// constexpr. If it is marked as constexpr, we will never implicitly define it, /// so we need special handling. static bool CheckTrivialDefaultConstructor(EvalInfo &Info, SourceLocation Loc, const CXXConstructorDecl *CD, bool IsValueInitialization) { if (!CD->isTrivial() || !CD->isDefaultConstructor()) return false; // Value-initialization does not call a trivial default constructor, so such a // call is a core constant expression whether or not the constructor is // constexpr. if (!CD->isConstexpr() && !IsValueInitialization) { if (Info.getLangOpts().CPlusPlus11) { // FIXME: If DiagDecl is an implicitly-declared special member function, // we should be much more explicit about why it's not constexpr. Info.CCEDiag(Loc, diag::note_constexpr_invalid_function, 1) << /*IsConstexpr*/0 << /*IsConstructor*/1 << CD; Info.Note(CD->getLocation(), diag::note_declared_at); } else { Info.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr); } } return true; } /// CheckConstexprFunction - Check that a function can be called in a constant /// expression. static bool CheckConstexprFunction(EvalInfo &Info, SourceLocation CallLoc, const FunctionDecl *Declaration, const FunctionDecl *Definition, const Stmt *Body) { // Potential constant expressions can contain calls to declared, but not yet // defined, constexpr functions. if (Info.checkingPotentialConstantExpression() && !Definition && Declaration->isConstexpr()) return false; // Bail out if the function declaration itself is invalid. We will // have produced a relevant diagnostic while parsing it, so just // note the problematic sub-expression. if (Declaration->isInvalidDecl()) { Info.FFDiag(CallLoc, diag::note_invalid_subexpr_in_const_expr); return false; } // DR1872: An instantiated virtual constexpr function can't be called in a // constant expression (prior to C++20). We can still constant-fold such a // call. if (!Info.Ctx.getLangOpts().CPlusPlus20 && isa(Declaration) && cast(Declaration)->isVirtual()) Info.CCEDiag(CallLoc, diag::note_constexpr_virtual_call); if (Definition && Definition->isInvalidDecl()) { Info.FFDiag(CallLoc, diag::note_invalid_subexpr_in_const_expr); return false; } if (const auto *CtorDecl = dyn_cast_or_null(Definition)) { for (const auto *InitExpr : CtorDecl->inits()) { if (InitExpr->getInit() && InitExpr->getInit()->containsErrors()) return false; } } // Can we evaluate this function call? if (Definition && Definition->isConstexpr() && Body) return true; if (Info.getLangOpts().CPlusPlus11) { const FunctionDecl *DiagDecl = Definition ? Definition : Declaration; // If this function is not constexpr because it is an inherited // non-constexpr constructor, diagnose that directly. auto *CD = dyn_cast(DiagDecl); if (CD && CD->isInheritingConstructor()) { auto *Inherited = CD->getInheritedConstructor().getConstructor(); if (!Inherited->isConstexpr()) DiagDecl = CD = Inherited; } // FIXME: If DiagDecl is an implicitly-declared special member function // or an inheriting constructor, we should be much more explicit about why // it's not constexpr. if (CD && CD->isInheritingConstructor()) Info.FFDiag(CallLoc, diag::note_constexpr_invalid_inhctor, 1) << CD->getInheritedConstructor().getConstructor()->getParent(); else Info.FFDiag(CallLoc, diag::note_constexpr_invalid_function, 1) << DiagDecl->isConstexpr() << (bool)CD << DiagDecl; Info.Note(DiagDecl->getLocation(), diag::note_declared_at); } else { Info.FFDiag(CallLoc, diag::note_invalid_subexpr_in_const_expr); } return false; } namespace { struct CheckDynamicTypeHandler { AccessKinds AccessKind; typedef bool result_type; bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { return true; } bool found(APSInt &Value, QualType SubobjType) { return true; } bool found(APFloat &Value, QualType SubobjType) { return true; } }; } // end anonymous namespace /// Check that we can access the notional vptr of an object / determine its /// dynamic type. static bool checkDynamicType(EvalInfo &Info, const Expr *E, const LValue &This, AccessKinds AK, bool Polymorphic) { if (This.Designator.Invalid) return false; CompleteObject Obj = findCompleteObject(Info, E, AK, This, QualType()); if (!Obj) return false; if (!Obj.Value) { // The object is not usable in constant expressions, so we can't inspect // its value to see if it's in-lifetime or what the active union members // are. We can still check for a one-past-the-end lvalue. if (This.Designator.isOnePastTheEnd() || This.Designator.isMostDerivedAnUnsizedArray()) { Info.FFDiag(E, This.Designator.isOnePastTheEnd() ? diag::note_constexpr_access_past_end : diag::note_constexpr_access_unsized_array) << AK; return false; } else if (Polymorphic) { // Conservatively refuse to perform a polymorphic operation if we would // not be able to read a notional 'vptr' value. APValue Val; This.moveInto(Val); QualType StarThisType = Info.Ctx.getLValueReferenceType(This.Designator.getType(Info.Ctx)); Info.FFDiag(E, diag::note_constexpr_polymorphic_unknown_dynamic_type) << AK << Val.getAsString(Info.Ctx, StarThisType); return false; } return true; } CheckDynamicTypeHandler Handler{AK}; return Obj && findSubobject(Info, E, Obj, This.Designator, Handler); } /// Check that the pointee of the 'this' pointer in a member function call is /// either within its lifetime or in its period of construction or destruction. static bool checkNonVirtualMemberCallThisPointer(EvalInfo &Info, const Expr *E, const LValue &This, const CXXMethodDecl *NamedMember) { return checkDynamicType( Info, E, This, isa(NamedMember) ? AK_Destroy : AK_MemberCall, false); } struct DynamicType { /// The dynamic class type of the object. const CXXRecordDecl *Type; /// The corresponding path length in the lvalue. unsigned PathLength; }; static const CXXRecordDecl *getBaseClassType(SubobjectDesignator &Designator, unsigned PathLength) { assert(PathLength >= Designator.MostDerivedPathLength && PathLength <= Designator.Entries.size() && "invalid path length"); return (PathLength == Designator.MostDerivedPathLength) ? Designator.MostDerivedType->getAsCXXRecordDecl() : getAsBaseClass(Designator.Entries[PathLength - 1]); } /// Determine the dynamic type of an object. static Optional ComputeDynamicType(EvalInfo &Info, const Expr *E, LValue &This, AccessKinds AK) { // If we don't have an lvalue denoting an object of class type, there is no // meaningful dynamic type. (We consider objects of non-class type to have no // dynamic type.) if (!checkDynamicType(Info, E, This, AK, true)) return None; // Refuse to compute a dynamic type in the presence of virtual bases. This // shouldn't happen other than in constant-folding situations, since literal // types can't have virtual bases. // // Note that consumers of DynamicType assume that the type has no virtual // bases, and will need modifications if this restriction is relaxed. const CXXRecordDecl *Class = This.Designator.MostDerivedType->getAsCXXRecordDecl(); if (!Class || Class->getNumVBases()) { Info.FFDiag(E); return None; } // FIXME: For very deep class hierarchies, it might be beneficial to use a // binary search here instead. But the overwhelmingly common case is that // we're not in the middle of a constructor, so it probably doesn't matter // in practice. ArrayRef Path = This.Designator.Entries; for (unsigned PathLength = This.Designator.MostDerivedPathLength; PathLength <= Path.size(); ++PathLength) { switch (Info.isEvaluatingCtorDtor(This.getLValueBase(), Path.slice(0, PathLength))) { case ConstructionPhase::Bases: case ConstructionPhase::DestroyingBases: // We're constructing or destroying a base class. This is not the dynamic // type. break; case ConstructionPhase::None: case ConstructionPhase::AfterBases: case ConstructionPhase::AfterFields: case ConstructionPhase::Destroying: // We've finished constructing the base classes and not yet started // destroying them again, so this is the dynamic type. return DynamicType{getBaseClassType(This.Designator, PathLength), PathLength}; } } // CWG issue 1517: we're constructing a base class of the object described by // 'This', so that object has not yet begun its period of construction and // any polymorphic operation on it results in undefined behavior. Info.FFDiag(E); return None; } /// Perform virtual dispatch. static const CXXMethodDecl *HandleVirtualDispatch( EvalInfo &Info, const Expr *E, LValue &This, const CXXMethodDecl *Found, llvm::SmallVectorImpl &CovariantAdjustmentPath) { Optional DynType = ComputeDynamicType( Info, E, This, isa(Found) ? AK_Destroy : AK_MemberCall); if (!DynType) return nullptr; // Find the final overrider. It must be declared in one of the classes on the // path from the dynamic type to the static type. // FIXME: If we ever allow literal types to have virtual base classes, that // won't be true. const CXXMethodDecl *Callee = Found; unsigned PathLength = DynType->PathLength; for (/**/; PathLength <= This.Designator.Entries.size(); ++PathLength) { const CXXRecordDecl *Class = getBaseClassType(This.Designator, PathLength); const CXXMethodDecl *Overrider = Found->getCorrespondingMethodDeclaredInClass(Class, false); if (Overrider) { Callee = Overrider; break; } } // C++2a [class.abstract]p6: // the effect of making a virtual call to a pure virtual function [...] is // undefined if (Callee->isPure()) { Info.FFDiag(E, diag::note_constexpr_pure_virtual_call, 1) << Callee; Info.Note(Callee->getLocation(), diag::note_declared_at); return nullptr; } // If necessary, walk the rest of the path to determine the sequence of // covariant adjustment steps to apply. if (!Info.Ctx.hasSameUnqualifiedType(Callee->getReturnType(), Found->getReturnType())) { CovariantAdjustmentPath.push_back(Callee->getReturnType()); for (unsigned CovariantPathLength = PathLength + 1; CovariantPathLength != This.Designator.Entries.size(); ++CovariantPathLength) { const CXXRecordDecl *NextClass = getBaseClassType(This.Designator, CovariantPathLength); const CXXMethodDecl *Next = Found->getCorrespondingMethodDeclaredInClass(NextClass, false); if (Next && !Info.Ctx.hasSameUnqualifiedType( Next->getReturnType(), CovariantAdjustmentPath.back())) CovariantAdjustmentPath.push_back(Next->getReturnType()); } if (!Info.Ctx.hasSameUnqualifiedType(Found->getReturnType(), CovariantAdjustmentPath.back())) CovariantAdjustmentPath.push_back(Found->getReturnType()); } // Perform 'this' adjustment. if (!CastToDerivedClass(Info, E, This, Callee->getParent(), PathLength)) return nullptr; return Callee; } /// Perform the adjustment from a value returned by a virtual function to /// a value of the statically expected type, which may be a pointer or /// reference to a base class of the returned type. static bool HandleCovariantReturnAdjustment(EvalInfo &Info, const Expr *E, APValue &Result, ArrayRef Path) { assert(Result.isLValue() && "unexpected kind of APValue for covariant return"); if (Result.isNullPointer()) return true; LValue LVal; LVal.setFrom(Info.Ctx, Result); const CXXRecordDecl *OldClass = Path[0]->getPointeeCXXRecordDecl(); for (unsigned I = 1; I != Path.size(); ++I) { const CXXRecordDecl *NewClass = Path[I]->getPointeeCXXRecordDecl(); assert(OldClass && NewClass && "unexpected kind of covariant return"); if (OldClass != NewClass && !CastToBaseClass(Info, E, LVal, OldClass, NewClass)) return false; OldClass = NewClass; } LVal.moveInto(Result); return true; } /// Determine whether \p Base, which is known to be a direct base class of /// \p Derived, is a public base class. static bool isBaseClassPublic(const CXXRecordDecl *Derived, const CXXRecordDecl *Base) { for (const CXXBaseSpecifier &BaseSpec : Derived->bases()) { auto *BaseClass = BaseSpec.getType()->getAsCXXRecordDecl(); if (BaseClass && declaresSameEntity(BaseClass, Base)) return BaseSpec.getAccessSpecifier() == AS_public; } llvm_unreachable("Base is not a direct base of Derived"); } /// Apply the given dynamic cast operation on the provided lvalue. /// /// This implements the hard case of dynamic_cast, requiring a "runtime check" /// to find a suitable target subobject. static bool HandleDynamicCast(EvalInfo &Info, const ExplicitCastExpr *E, LValue &Ptr) { // We can't do anything with a non-symbolic pointer value. SubobjectDesignator &D = Ptr.Designator; if (D.Invalid) return false; // C++ [expr.dynamic.cast]p6: // If v is a null pointer value, the result is a null pointer value. if (Ptr.isNullPointer() && !E->isGLValue()) return true; // For all the other cases, we need the pointer to point to an object within // its lifetime / period of construction / destruction, and we need to know // its dynamic type. Optional DynType = ComputeDynamicType(Info, E, Ptr, AK_DynamicCast); if (!DynType) return false; // C++ [expr.dynamic.cast]p7: // If T is "pointer to cv void", then the result is a pointer to the most // derived object if (E->getType()->isVoidPointerType()) return CastToDerivedClass(Info, E, Ptr, DynType->Type, DynType->PathLength); const CXXRecordDecl *C = E->getTypeAsWritten()->getPointeeCXXRecordDecl(); assert(C && "dynamic_cast target is not void pointer nor class"); CanQualType CQT = Info.Ctx.getCanonicalType(Info.Ctx.getRecordType(C)); auto RuntimeCheckFailed = [&] (CXXBasePaths *Paths) { // C++ [expr.dynamic.cast]p9: if (!E->isGLValue()) { // The value of a failed cast to pointer type is the null pointer value // of the required result type. Ptr.setNull(Info.Ctx, E->getType()); return true; } // A failed cast to reference type throws [...] std::bad_cast. unsigned DiagKind; if (!Paths && (declaresSameEntity(DynType->Type, C) || DynType->Type->isDerivedFrom(C))) DiagKind = 0; else if (!Paths || Paths->begin() == Paths->end()) DiagKind = 1; else if (Paths->isAmbiguous(CQT)) DiagKind = 2; else { assert(Paths->front().Access != AS_public && "why did the cast fail?"); DiagKind = 3; } Info.FFDiag(E, diag::note_constexpr_dynamic_cast_to_reference_failed) << DiagKind << Ptr.Designator.getType(Info.Ctx) << Info.Ctx.getRecordType(DynType->Type) << E->getType().getUnqualifiedType(); return false; }; // Runtime check, phase 1: // Walk from the base subobject towards the derived object looking for the // target type. for (int PathLength = Ptr.Designator.Entries.size(); PathLength >= (int)DynType->PathLength; --PathLength) { const CXXRecordDecl *Class = getBaseClassType(Ptr.Designator, PathLength); if (declaresSameEntity(Class, C)) return CastToDerivedClass(Info, E, Ptr, Class, PathLength); // We can only walk across public inheritance edges. if (PathLength > (int)DynType->PathLength && !isBaseClassPublic(getBaseClassType(Ptr.Designator, PathLength - 1), Class)) return RuntimeCheckFailed(nullptr); } // Runtime check, phase 2: // Search the dynamic type for an unambiguous public base of type C. CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true, /*DetectVirtual=*/false); if (DynType->Type->isDerivedFrom(C, Paths) && !Paths.isAmbiguous(CQT) && Paths.front().Access == AS_public) { // Downcast to the dynamic type... if (!CastToDerivedClass(Info, E, Ptr, DynType->Type, DynType->PathLength)) return false; // ... then upcast to the chosen base class subobject. for (CXXBasePathElement &Elem : Paths.front()) if (!HandleLValueBase(Info, E, Ptr, Elem.Class, Elem.Base)) return false; return true; } // Otherwise, the runtime check fails. return RuntimeCheckFailed(&Paths); } namespace { struct StartLifetimeOfUnionMemberHandler { EvalInfo &Info; const Expr *LHSExpr; const FieldDecl *Field; bool DuringInit; bool Failed = false; static const AccessKinds AccessKind = AK_Assign; typedef bool result_type; bool failed() { return Failed; } bool found(APValue &Subobj, QualType SubobjType) { // We are supposed to perform no initialization but begin the lifetime of // the object. We interpret that as meaning to do what default // initialization of the object would do if all constructors involved were // trivial: // * All base, non-variant member, and array element subobjects' lifetimes // begin // * No variant members' lifetimes begin // * All scalar subobjects whose lifetimes begin have indeterminate values assert(SubobjType->isUnionType()); if (declaresSameEntity(Subobj.getUnionField(), Field)) { // This union member is already active. If it's also in-lifetime, there's // nothing to do. if (Subobj.getUnionValue().hasValue()) return true; } else if (DuringInit) { // We're currently in the process of initializing a different union // member. If we carried on, that initialization would attempt to // store to an inactive union member, resulting in undefined behavior. Info.FFDiag(LHSExpr, diag::note_constexpr_union_member_change_during_init); return false; } APValue Result; Failed = !getDefaultInitValue(Field->getType(), Result); Subobj.setUnion(Field, Result); return true; } bool found(APSInt &Value, QualType SubobjType) { llvm_unreachable("wrong value kind for union object"); } bool found(APFloat &Value, QualType SubobjType) { llvm_unreachable("wrong value kind for union object"); } }; } // end anonymous namespace const AccessKinds StartLifetimeOfUnionMemberHandler::AccessKind; /// Handle a builtin simple-assignment or a call to a trivial assignment /// operator whose left-hand side might involve a union member access. If it /// does, implicitly start the lifetime of any accessed union elements per /// C++20 [class.union]5. static bool HandleUnionActiveMemberChange(EvalInfo &Info, const Expr *LHSExpr, const LValue &LHS) { if (LHS.InvalidBase || LHS.Designator.Invalid) return false; llvm::SmallVector, 4> UnionPathLengths; // C++ [class.union]p5: // define the set S(E) of subexpressions of E as follows: unsigned PathLength = LHS.Designator.Entries.size(); for (const Expr *E = LHSExpr; E != nullptr;) { // -- If E is of the form A.B, S(E) contains the elements of S(A)... if (auto *ME = dyn_cast(E)) { auto *FD = dyn_cast(ME->getMemberDecl()); // Note that we can't implicitly start the lifetime of a reference, // so we don't need to proceed any further if we reach one. if (!FD || FD->getType()->isReferenceType()) break; // ... and also contains A.B if B names a union member ... if (FD->getParent()->isUnion()) { // ... of a non-class, non-array type, or of a class type with a // trivial default constructor that is not deleted, or an array of // such types. auto *RD = FD->getType()->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); if (!RD || RD->hasTrivialDefaultConstructor()) UnionPathLengths.push_back({PathLength - 1, FD}); } E = ME->getBase(); --PathLength; assert(declaresSameEntity(FD, LHS.Designator.Entries[PathLength] .getAsBaseOrMember().getPointer())); // -- If E is of the form A[B] and is interpreted as a built-in array // subscripting operator, S(E) is [S(the array operand, if any)]. } else if (auto *ASE = dyn_cast(E)) { // Step over an ArrayToPointerDecay implicit cast. auto *Base = ASE->getBase()->IgnoreImplicit(); if (!Base->getType()->isArrayType()) break; E = Base; --PathLength; } else if (auto *ICE = dyn_cast(E)) { // Step over a derived-to-base conversion. E = ICE->getSubExpr(); if (ICE->getCastKind() == CK_NoOp) continue; if (ICE->getCastKind() != CK_DerivedToBase && ICE->getCastKind() != CK_UncheckedDerivedToBase) break; // Walk path backwards as we walk up from the base to the derived class. for (const CXXBaseSpecifier *Elt : llvm::reverse(ICE->path())) { --PathLength; (void)Elt; assert(declaresSameEntity(Elt->getType()->getAsCXXRecordDecl(), LHS.Designator.Entries[PathLength] .getAsBaseOrMember().getPointer())); } // -- Otherwise, S(E) is empty. } else { break; } } // Common case: no unions' lifetimes are started. if (UnionPathLengths.empty()) return true; // if modification of X [would access an inactive union member], an object // of the type of X is implicitly created CompleteObject Obj = findCompleteObject(Info, LHSExpr, AK_Assign, LHS, LHSExpr->getType()); if (!Obj) return false; for (std::pair LengthAndField : llvm::reverse(UnionPathLengths)) { // Form a designator for the union object. SubobjectDesignator D = LHS.Designator; D.truncate(Info.Ctx, LHS.Base, LengthAndField.first); bool DuringInit = Info.isEvaluatingCtorDtor(LHS.Base, D.Entries) == ConstructionPhase::AfterBases; StartLifetimeOfUnionMemberHandler StartLifetime{ Info, LHSExpr, LengthAndField.second, DuringInit}; if (!findSubobject(Info, LHSExpr, Obj, D, StartLifetime)) return false; } return true; } namespace { typedef SmallVector ArgVector; } /// EvaluateArgs - Evaluate the arguments to a function call. static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, EvalInfo &Info, const FunctionDecl *Callee) { bool Success = true; llvm::SmallBitVector ForbiddenNullArgs; if (Callee->hasAttr()) { ForbiddenNullArgs.resize(Args.size()); for (const auto *Attr : Callee->specific_attrs()) { if (!Attr->args_size()) { ForbiddenNullArgs.set(); break; } else for (auto Idx : Attr->args()) { unsigned ASTIdx = Idx.getASTIndex(); if (ASTIdx >= Args.size()) continue; ForbiddenNullArgs[ASTIdx] = 1; } } } // FIXME: This is the wrong evaluation order for an assignment operator // called via operator syntax. for (unsigned Idx = 0; Idx < Args.size(); Idx++) { if (!Evaluate(ArgValues[Idx], Info, Args[Idx])) { // If we're checking for a potential constant expression, evaluate all // initializers even if some of them fail. if (!Info.noteFailure()) return false; Success = false; } else if (!ForbiddenNullArgs.empty() && ForbiddenNullArgs[Idx] && ArgValues[Idx].isLValue() && ArgValues[Idx].isNullPointer()) { Info.CCEDiag(Args[Idx], diag::note_non_null_attribute_failed); if (!Info.noteFailure()) return false; Success = false; } } return Success; } /// Evaluate a function call. static bool HandleFunctionCall(SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, ArrayRef Args, const Stmt *Body, EvalInfo &Info, APValue &Result, const LValue *ResultSlot) { ArgVector ArgValues(Args.size()); if (!EvaluateArgs(Args, ArgValues, Info, Callee)) return false; if (!Info.CheckCallLimit(CallLoc)) return false; CallStackFrame Frame(Info, CallLoc, Callee, This, ArgValues.data()); // For a trivial copy or move assignment, perform an APValue copy. This is // essential for unions, where the operations performed by the assignment // operator cannot be represented as statements. // // Skip this for non-union classes with no fields; in that case, the defaulted // copy/move does not actually read the object. const CXXMethodDecl *MD = dyn_cast(Callee); if (MD && MD->isDefaulted() && (MD->getParent()->isUnion() || (MD->isTrivial() && isReadByLvalueToRvalueConversion(MD->getParent())))) { assert(This && (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator())); LValue RHS; RHS.setFrom(Info.Ctx, ArgValues[0]); APValue RHSValue; if (!handleLValueToRValueConversion(Info, Args[0], Args[0]->getType(), RHS, RHSValue, MD->getParent()->isUnion())) return false; if (Info.getLangOpts().CPlusPlus20 && MD->isTrivial() && !HandleUnionActiveMemberChange(Info, Args[0], *This)) return false; if (!handleAssignment(Info, Args[0], *This, MD->getThisType(), RHSValue)) return false; This->moveInto(Result); return true; } else if (MD && isLambdaCallOperator(MD)) { // We're in a lambda; determine the lambda capture field maps unless we're // just constexpr checking a lambda's call operator. constexpr checking is // done before the captures have been added to the closure object (unless // we're inferring constexpr-ness), so we don't have access to them in this // case. But since we don't need the captures to constexpr check, we can // just ignore them. if (!Info.checkingPotentialConstantExpression()) MD->getParent()->getCaptureFields(Frame.LambdaCaptureFields, Frame.LambdaThisCaptureField); } StmtResult Ret = {Result, ResultSlot}; EvalStmtResult ESR = EvaluateStmt(Ret, Info, Body); if (ESR == ESR_Succeeded) { if (Callee->getReturnType()->isVoidType()) return true; Info.FFDiag(Callee->getEndLoc(), diag::note_constexpr_no_return); } return ESR == ESR_Returned; } /// Evaluate a constructor call. static bool HandleConstructorCall(const Expr *E, const LValue &This, APValue *ArgValues, const CXXConstructorDecl *Definition, EvalInfo &Info, APValue &Result) { SourceLocation CallLoc = E->getExprLoc(); if (!Info.CheckCallLimit(CallLoc)) return false; const CXXRecordDecl *RD = Definition->getParent(); if (RD->getNumVBases()) { Info.FFDiag(CallLoc, diag::note_constexpr_virtual_base) << RD; return false; } EvalInfo::EvaluatingConstructorRAII EvalObj( Info, ObjectUnderConstruction{This.getLValueBase(), This.Designator.Entries}, RD->getNumBases()); CallStackFrame Frame(Info, CallLoc, Definition, &This, ArgValues); // FIXME: Creating an APValue just to hold a nonexistent return value is // wasteful. APValue RetVal; StmtResult Ret = {RetVal, nullptr}; // If it's a delegating constructor, delegate. if (Definition->isDelegatingConstructor()) { CXXConstructorDecl::init_const_iterator I = Definition->init_begin(); { FullExpressionRAII InitScope(Info); if (!EvaluateInPlace(Result, Info, This, (*I)->getInit()) || !InitScope.destroy()) return false; } return EvaluateStmt(Ret, Info, Definition->getBody()) != ESR_Failed; } // For a trivial copy or move constructor, perform an APValue copy. This is // essential for unions (or classes with anonymous union members), where the // operations performed by the constructor cannot be represented by // ctor-initializers. // // Skip this for empty non-union classes; we should not perform an // lvalue-to-rvalue conversion on them because their copy constructor does not // actually read them. if (Definition->isDefaulted() && Definition->isCopyOrMoveConstructor() && (Definition->getParent()->isUnion() || (Definition->isTrivial() && isReadByLvalueToRvalueConversion(Definition->getParent())))) { LValue RHS; RHS.setFrom(Info.Ctx, ArgValues[0]); return handleLValueToRValueConversion( Info, E, Definition->getParamDecl(0)->getType().getNonReferenceType(), RHS, Result, Definition->getParent()->isUnion()); } // Reserve space for the struct members. if (!Result.hasValue()) { if (!RD->isUnion()) Result = APValue(APValue::UninitStruct(), RD->getNumBases(), std::distance(RD->field_begin(), RD->field_end())); else // A union starts with no active member. Result = APValue((const FieldDecl*)nullptr); } if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); // A scope for temporaries lifetime-extended by reference members. BlockScopeRAII LifetimeExtendedScope(Info); bool Success = true; unsigned BasesSeen = 0; #ifndef NDEBUG CXXRecordDecl::base_class_const_iterator BaseIt = RD->bases_begin(); #endif CXXRecordDecl::field_iterator FieldIt = RD->field_begin(); auto SkipToField = [&](FieldDecl *FD, bool Indirect) { // We might be initializing the same field again if this is an indirect // field initialization. if (FieldIt == RD->field_end() || FieldIt->getFieldIndex() > FD->getFieldIndex()) { assert(Indirect && "fields out of order?"); return; } // Default-initialize any fields with no explicit initializer. for (; !declaresSameEntity(*FieldIt, FD); ++FieldIt) { assert(FieldIt != RD->field_end() && "missing field?"); if (!FieldIt->isUnnamedBitfield()) Success &= getDefaultInitValue( FieldIt->getType(), Result.getStructField(FieldIt->getFieldIndex())); } ++FieldIt; }; for (const auto *I : Definition->inits()) { LValue Subobject = This; LValue SubobjectParent = This; APValue *Value = &Result; // Determine the subobject to initialize. FieldDecl *FD = nullptr; if (I->isBaseInitializer()) { QualType BaseType(I->getBaseClass(), 0); #ifndef NDEBUG // Non-virtual base classes are initialized in the order in the class // definition. We have already checked for virtual base classes. assert(!BaseIt->isVirtual() && "virtual base for literal type"); assert(Info.Ctx.hasSameType(BaseIt->getType(), BaseType) && "base class initializers not in expected order"); ++BaseIt; #endif if (!HandleLValueDirectBase(Info, I->getInit(), Subobject, RD, BaseType->getAsCXXRecordDecl(), &Layout)) return false; Value = &Result.getStructBase(BasesSeen++); } else if ((FD = I->getMember())) { if (!HandleLValueMember(Info, I->getInit(), Subobject, FD, &Layout)) return false; if (RD->isUnion()) { Result = APValue(FD); Value = &Result.getUnionValue(); } else { SkipToField(FD, false); Value = &Result.getStructField(FD->getFieldIndex()); } } else if (IndirectFieldDecl *IFD = I->getIndirectMember()) { // Walk the indirect field decl's chain to find the object to initialize, // and make sure we've initialized every step along it. auto IndirectFieldChain = IFD->chain(); for (auto *C : IndirectFieldChain) { FD = cast(C); CXXRecordDecl *CD = cast(FD->getParent()); // Switch the union field if it differs. This happens if we had // preceding zero-initialization, and we're now initializing a union // subobject other than the first. // FIXME: In this case, the values of the other subobjects are // specified, since zero-initialization sets all padding bits to zero. if (!Value->hasValue() || (Value->isUnion() && Value->getUnionField() != FD)) { if (CD->isUnion()) *Value = APValue(FD); else // FIXME: This immediately starts the lifetime of all members of // an anonymous struct. It would be preferable to strictly start // member lifetime in initialization order. Success &= getDefaultInitValue(Info.Ctx.getRecordType(CD), *Value); } // Store Subobject as its parent before updating it for the last element // in the chain. if (C == IndirectFieldChain.back()) SubobjectParent = Subobject; if (!HandleLValueMember(Info, I->getInit(), Subobject, FD)) return false; if (CD->isUnion()) Value = &Value->getUnionValue(); else { if (C == IndirectFieldChain.front() && !RD->isUnion()) SkipToField(FD, true); Value = &Value->getStructField(FD->getFieldIndex()); } } } else { llvm_unreachable("unknown base initializer kind"); } // Need to override This for implicit field initializers as in this case // This refers to innermost anonymous struct/union containing initializer, // not to currently constructed class. const Expr *Init = I->getInit(); ThisOverrideRAII ThisOverride(*Info.CurrentCall, &SubobjectParent, isa(Init)); FullExpressionRAII InitScope(Info); if (!EvaluateInPlace(*Value, Info, Subobject, Init) || (FD && FD->isBitField() && !truncateBitfieldValue(Info, Init, *Value, FD))) { // If we're checking for a potential constant expression, evaluate all // initializers even if some of them fail. if (!Info.noteFailure()) return false; Success = false; } // This is the point at which the dynamic type of the object becomes this // class type. if (I->isBaseInitializer() && BasesSeen == RD->getNumBases()) EvalObj.finishedConstructingBases(); } // Default-initialize any remaining fields. if (!RD->isUnion()) { for (; FieldIt != RD->field_end(); ++FieldIt) { if (!FieldIt->isUnnamedBitfield()) Success &= getDefaultInitValue( FieldIt->getType(), Result.getStructField(FieldIt->getFieldIndex())); } } EvalObj.finishedConstructingFields(); return Success && EvaluateStmt(Ret, Info, Definition->getBody()) != ESR_Failed && LifetimeExtendedScope.destroy(); } static bool HandleConstructorCall(const Expr *E, const LValue &This, ArrayRef Args, const CXXConstructorDecl *Definition, EvalInfo &Info, APValue &Result) { ArgVector ArgValues(Args.size()); if (!EvaluateArgs(Args, ArgValues, Info, Definition)) return false; return HandleConstructorCall(E, This, ArgValues.data(), Definition, Info, Result); } static bool HandleDestructionImpl(EvalInfo &Info, SourceLocation CallLoc, const LValue &This, APValue &Value, QualType T) { // Objects can only be destroyed while they're within their lifetimes. // FIXME: We have no representation for whether an object of type nullptr_t // is in its lifetime; it usually doesn't matter. Perhaps we should model it // as indeterminate instead? if (Value.isAbsent() && !T->isNullPtrType()) { APValue Printable; This.moveInto(Printable); Info.FFDiag(CallLoc, diag::note_constexpr_destroy_out_of_lifetime) << Printable.getAsString(Info.Ctx, Info.Ctx.getLValueReferenceType(T)); return false; } // Invent an expression for location purposes. // FIXME: We shouldn't need to do this. OpaqueValueExpr LocE(CallLoc, Info.Ctx.IntTy, VK_RValue); // For arrays, destroy elements right-to-left. if (const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(T)) { uint64_t Size = CAT->getSize().getZExtValue(); QualType ElemT = CAT->getElementType(); LValue ElemLV = This; ElemLV.addArray(Info, &LocE, CAT); if (!HandleLValueArrayAdjustment(Info, &LocE, ElemLV, ElemT, Size)) return false; // Ensure that we have actual array elements available to destroy; the // destructors might mutate the value, so we can't run them on the array // filler. if (Size && Size > Value.getArrayInitializedElts()) expandArray(Value, Value.getArraySize() - 1); for (; Size != 0; --Size) { APValue &Elem = Value.getArrayInitializedElt(Size - 1); if (!HandleLValueArrayAdjustment(Info, &LocE, ElemLV, ElemT, -1) || !HandleDestructionImpl(Info, CallLoc, ElemLV, Elem, ElemT)) return false; } // End the lifetime of this array now. Value = APValue(); return true; } const CXXRecordDecl *RD = T->getAsCXXRecordDecl(); if (!RD) { if (T.isDestructedType()) { Info.FFDiag(CallLoc, diag::note_constexpr_unsupported_destruction) << T; return false; } Value = APValue(); return true; } if (RD->getNumVBases()) { Info.FFDiag(CallLoc, diag::note_constexpr_virtual_base) << RD; return false; } const CXXDestructorDecl *DD = RD->getDestructor(); if (!DD && !RD->hasTrivialDestructor()) { Info.FFDiag(CallLoc); return false; } if (!DD || DD->isTrivial() || (RD->isAnonymousStructOrUnion() && RD->isUnion())) { // A trivial destructor just ends the lifetime of the object. Check for // this case before checking for a body, because we might not bother // building a body for a trivial destructor. Note that it doesn't matter // whether the destructor is constexpr in this case; all trivial // destructors are constexpr. // // If an anonymous union would be destroyed, some enclosing destructor must // have been explicitly defined, and the anonymous union destruction should // have no effect. Value = APValue(); return true; } if (!Info.CheckCallLimit(CallLoc)) return false; const FunctionDecl *Definition = nullptr; const Stmt *Body = DD->getBody(Definition); if (!CheckConstexprFunction(Info, CallLoc, DD, Definition, Body)) return false; CallStackFrame Frame(Info, CallLoc, Definition, &This, nullptr); // We're now in the period of destruction of this object. unsigned BasesLeft = RD->getNumBases(); EvalInfo::EvaluatingDestructorRAII EvalObj( Info, ObjectUnderConstruction{This.getLValueBase(), This.Designator.Entries}); if (!EvalObj.DidInsert) { // C++2a [class.dtor]p19: // the behavior is undefined if the destructor is invoked for an object // whose lifetime has ended // (Note that formally the lifetime ends when the period of destruction // begins, even though certain uses of the object remain valid until the // period of destruction ends.) Info.FFDiag(CallLoc, diag::note_constexpr_double_destroy); return false; } // FIXME: Creating an APValue just to hold a nonexistent return value is // wasteful. APValue RetVal; StmtResult Ret = {RetVal, nullptr}; if (EvaluateStmt(Ret, Info, Definition->getBody()) == ESR_Failed) return false; // A union destructor does not implicitly destroy its members. if (RD->isUnion()) return true; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); // We don't have a good way to iterate fields in reverse, so collect all the // fields first and then walk them backwards. SmallVector Fields(RD->field_begin(), RD->field_end()); for (const FieldDecl *FD : llvm::reverse(Fields)) { if (FD->isUnnamedBitfield()) continue; LValue Subobject = This; if (!HandleLValueMember(Info, &LocE, Subobject, FD, &Layout)) return false; APValue *SubobjectValue = &Value.getStructField(FD->getFieldIndex()); if (!HandleDestructionImpl(Info, CallLoc, Subobject, *SubobjectValue, FD->getType())) return false; } if (BasesLeft != 0) EvalObj.startedDestroyingBases(); // Destroy base classes in reverse order. for (const CXXBaseSpecifier &Base : llvm::reverse(RD->bases())) { --BasesLeft; QualType BaseType = Base.getType(); LValue Subobject = This; if (!HandleLValueDirectBase(Info, &LocE, Subobject, RD, BaseType->getAsCXXRecordDecl(), &Layout)) return false; APValue *SubobjectValue = &Value.getStructBase(BasesLeft); if (!HandleDestructionImpl(Info, CallLoc, Subobject, *SubobjectValue, BaseType)) return false; } assert(BasesLeft == 0 && "NumBases was wrong?"); // The period of destruction ends now. The object is gone. Value = APValue(); return true; } namespace { struct DestroyObjectHandler { EvalInfo &Info; const Expr *E; const LValue &This; const AccessKinds AccessKind; typedef bool result_type; bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { return HandleDestructionImpl(Info, E->getExprLoc(), This, Subobj, SubobjType); } bool found(APSInt &Value, QualType SubobjType) { Info.FFDiag(E, diag::note_constexpr_destroy_complex_elem); return false; } bool found(APFloat &Value, QualType SubobjType) { Info.FFDiag(E, diag::note_constexpr_destroy_complex_elem); return false; } }; } /// Perform a destructor or pseudo-destructor call on the given object, which /// might in general not be a complete object. static bool HandleDestruction(EvalInfo &Info, const Expr *E, const LValue &This, QualType ThisType) { CompleteObject Obj = findCompleteObject(Info, E, AK_Destroy, This, ThisType); DestroyObjectHandler Handler = {Info, E, This, AK_Destroy}; return Obj && findSubobject(Info, E, Obj, This.Designator, Handler); } /// Destroy and end the lifetime of the given complete object. static bool HandleDestruction(EvalInfo &Info, SourceLocation Loc, APValue::LValueBase LVBase, APValue &Value, QualType T) { // If we've had an unmodeled side-effect, we can't rely on mutable state // (such as the object we're about to destroy) being correct. if (Info.EvalStatus.HasSideEffects) return false; LValue LV; LV.set({LVBase}); return HandleDestructionImpl(Info, Loc, LV, Value, T); } /// Perform a call to 'perator new' or to `__builtin_operator_new'. static bool HandleOperatorNewCall(EvalInfo &Info, const CallExpr *E, LValue &Result) { if (Info.checkingPotentialConstantExpression() || Info.SpeculativeEvaluationDepth) return false; // This is permitted only within a call to std::allocator::allocate. auto Caller = Info.getStdAllocatorCaller("allocate"); if (!Caller) { Info.FFDiag(E->getExprLoc(), Info.getLangOpts().CPlusPlus20 ? diag::note_constexpr_new_untyped : diag::note_constexpr_new); return false; } QualType ElemType = Caller.ElemType; if (ElemType->isIncompleteType() || ElemType->isFunctionType()) { Info.FFDiag(E->getExprLoc(), diag::note_constexpr_new_not_complete_object_type) << (ElemType->isIncompleteType() ? 0 : 1) << ElemType; return false; } APSInt ByteSize; if (!EvaluateInteger(E->getArg(0), ByteSize, Info)) return false; bool IsNothrow = false; for (unsigned I = 1, N = E->getNumArgs(); I != N; ++I) { EvaluateIgnoredValue(Info, E->getArg(I)); IsNothrow |= E->getType()->isNothrowT(); } CharUnits ElemSize; if (!HandleSizeof(Info, E->getExprLoc(), ElemType, ElemSize)) return false; APInt Size, Remainder; APInt ElemSizeAP(ByteSize.getBitWidth(), ElemSize.getQuantity()); APInt::udivrem(ByteSize, ElemSizeAP, Size, Remainder); if (Remainder != 0) { // This likely indicates a bug in the implementation of 'std::allocator'. Info.FFDiag(E->getExprLoc(), diag::note_constexpr_operator_new_bad_size) << ByteSize << APSInt(ElemSizeAP, true) << ElemType; return false; } if (ByteSize.getActiveBits() > ConstantArrayType::getMaxSizeBits(Info.Ctx)) { if (IsNothrow) { Result.setNull(Info.Ctx, E->getType()); return true; } Info.FFDiag(E, diag::note_constexpr_new_too_large) << APSInt(Size, true); return false; } QualType AllocType = Info.Ctx.getConstantArrayType(ElemType, Size, nullptr, ArrayType::Normal, 0); APValue *Val = Info.createHeapAlloc(E, AllocType, Result); *Val = APValue(APValue::UninitArray(), 0, Size.getZExtValue()); Result.addArray(Info, E, cast(AllocType)); return true; } static bool hasVirtualDestructor(QualType T) { if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) if (CXXDestructorDecl *DD = RD->getDestructor()) return DD->isVirtual(); return false; } static const FunctionDecl *getVirtualOperatorDelete(QualType T) { if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) if (CXXDestructorDecl *DD = RD->getDestructor()) return DD->isVirtual() ? DD->getOperatorDelete() : nullptr; return nullptr; } /// Check that the given object is a suitable pointer to a heap allocation that /// still exists and is of the right kind for the purpose of a deletion. /// /// On success, returns the heap allocation to deallocate. On failure, produces /// a diagnostic and returns None. static Optional CheckDeleteKind(EvalInfo &Info, const Expr *E, const LValue &Pointer, DynAlloc::Kind DeallocKind) { auto PointerAsString = [&] { return Pointer.toString(Info.Ctx, Info.Ctx.VoidPtrTy); }; DynamicAllocLValue DA = Pointer.Base.dyn_cast(); if (!DA) { Info.FFDiag(E, diag::note_constexpr_delete_not_heap_alloc) << PointerAsString(); if (Pointer.Base) NoteLValueLocation(Info, Pointer.Base); return None; } Optional Alloc = Info.lookupDynamicAlloc(DA); if (!Alloc) { Info.FFDiag(E, diag::note_constexpr_double_delete); return None; } QualType AllocType = Pointer.Base.getDynamicAllocType(); if (DeallocKind != (*Alloc)->getKind()) { Info.FFDiag(E, diag::note_constexpr_new_delete_mismatch) << DeallocKind << (*Alloc)->getKind() << AllocType; NoteLValueLocation(Info, Pointer.Base); return None; } bool Subobject = false; if (DeallocKind == DynAlloc::New) { Subobject = Pointer.Designator.MostDerivedPathLength != 0 || Pointer.Designator.isOnePastTheEnd(); } else { Subobject = Pointer.Designator.Entries.size() != 1 || Pointer.Designator.Entries[0].getAsArrayIndex() != 0; } if (Subobject) { Info.FFDiag(E, diag::note_constexpr_delete_subobject) << PointerAsString() << Pointer.Designator.isOnePastTheEnd(); return None; } return Alloc; } // Perform a call to 'operator delete' or '__builtin_operator_delete'. bool HandleOperatorDeleteCall(EvalInfo &Info, const CallExpr *E) { if (Info.checkingPotentialConstantExpression() || Info.SpeculativeEvaluationDepth) return false; // This is permitted only within a call to std::allocator::deallocate. if (!Info.getStdAllocatorCaller("deallocate")) { Info.FFDiag(E->getExprLoc()); return true; } LValue Pointer; if (!EvaluatePointer(E->getArg(0), Pointer, Info)) return false; for (unsigned I = 1, N = E->getNumArgs(); I != N; ++I) EvaluateIgnoredValue(Info, E->getArg(I)); if (Pointer.Designator.Invalid) return false; // Deleting a null pointer has no effect. if (Pointer.isNullPointer()) return true; if (!CheckDeleteKind(Info, E, Pointer, DynAlloc::StdAllocator)) return false; Info.HeapAllocs.erase(Pointer.Base.get()); return true; } //===----------------------------------------------------------------------===// // Generic Evaluation //===----------------------------------------------------------------------===// namespace { class BitCastBuffer { // FIXME: We're going to need bit-level granularity when we support // bit-fields. // FIXME: Its possible under the C++ standard for 'char' to not be 8 bits, but // we don't support a host or target where that is the case. Still, we should // use a more generic type in case we ever do. SmallVector, 32> Bytes; static_assert(std::numeric_limits::digits >= 8, "Need at least 8 bit unsigned char"); bool TargetIsLittleEndian; public: BitCastBuffer(CharUnits Width, bool TargetIsLittleEndian) : Bytes(Width.getQuantity()), TargetIsLittleEndian(TargetIsLittleEndian) {} LLVM_NODISCARD bool readObject(CharUnits Offset, CharUnits Width, SmallVectorImpl &Output) const { for (CharUnits I = Offset, E = Offset + Width; I != E; ++I) { // If a byte of an integer is uninitialized, then the whole integer is // uninitalized. if (!Bytes[I.getQuantity()]) return false; Output.push_back(*Bytes[I.getQuantity()]); } if (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian) std::reverse(Output.begin(), Output.end()); return true; } void writeObject(CharUnits Offset, SmallVectorImpl &Input) { if (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian) std::reverse(Input.begin(), Input.end()); size_t Index = 0; for (unsigned char Byte : Input) { assert(!Bytes[Offset.getQuantity() + Index] && "overwriting a byte?"); Bytes[Offset.getQuantity() + Index] = Byte; ++Index; } } size_t size() { return Bytes.size(); } }; /// Traverse an APValue to produce an BitCastBuffer, emulating how the current /// target would represent the value at runtime. class APValueToBufferConverter { EvalInfo &Info; BitCastBuffer Buffer; const CastExpr *BCE; APValueToBufferConverter(EvalInfo &Info, CharUnits ObjectWidth, const CastExpr *BCE) : Info(Info), Buffer(ObjectWidth, Info.Ctx.getTargetInfo().isLittleEndian()), BCE(BCE) {} bool visit(const APValue &Val, QualType Ty) { return visit(Val, Ty, CharUnits::fromQuantity(0)); } // Write out Val with type Ty into Buffer starting at Offset. bool visit(const APValue &Val, QualType Ty, CharUnits Offset) { assert((size_t)Offset.getQuantity() <= Buffer.size()); // As a special case, nullptr_t has an indeterminate value. if (Ty->isNullPtrType()) return true; // Dig through Src to find the byte at SrcOffset. switch (Val.getKind()) { case APValue::Indeterminate: case APValue::None: return true; case APValue::Int: return visitInt(Val.getInt(), Ty, Offset); case APValue::Float: return visitFloat(Val.getFloat(), Ty, Offset); case APValue::Array: return visitArray(Val, Ty, Offset); case APValue::Struct: return visitRecord(Val, Ty, Offset); case APValue::ComplexInt: case APValue::ComplexFloat: case APValue::Vector: case APValue::FixedPoint: // FIXME: We should support these. case APValue::Union: case APValue::MemberPointer: case APValue::AddrLabelDiff: { Info.FFDiag(BCE->getBeginLoc(), diag::note_constexpr_bit_cast_unsupported_type) << Ty; return false; } case APValue::LValue: llvm_unreachable("LValue subobject in bit_cast?"); } llvm_unreachable("Unhandled APValue::ValueKind"); } bool visitRecord(const APValue &Val, QualType Ty, CharUnits Offset) { const RecordDecl *RD = Ty->getAsRecordDecl(); const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); // Visit the base classes. if (auto *CXXRD = dyn_cast(RD)) { for (size_t I = 0, E = CXXRD->getNumBases(); I != E; ++I) { const CXXBaseSpecifier &BS = CXXRD->bases_begin()[I]; CXXRecordDecl *BaseDecl = BS.getType()->getAsCXXRecordDecl(); if (!visitRecord(Val.getStructBase(I), BS.getType(), Layout.getBaseClassOffset(BaseDecl) + Offset)) return false; } } // Visit the fields. unsigned FieldIdx = 0; for (FieldDecl *FD : RD->fields()) { if (FD->isBitField()) { Info.FFDiag(BCE->getBeginLoc(), diag::note_constexpr_bit_cast_unsupported_bitfield); return false; } uint64_t FieldOffsetBits = Layout.getFieldOffset(FieldIdx); assert(FieldOffsetBits % Info.Ctx.getCharWidth() == 0 && "only bit-fields can have sub-char alignment"); CharUnits FieldOffset = Info.Ctx.toCharUnitsFromBits(FieldOffsetBits) + Offset; QualType FieldTy = FD->getType(); if (!visit(Val.getStructField(FieldIdx), FieldTy, FieldOffset)) return false; ++FieldIdx; } return true; } bool visitArray(const APValue &Val, QualType Ty, CharUnits Offset) { const auto *CAT = dyn_cast_or_null(Ty->getAsArrayTypeUnsafe()); if (!CAT) return false; CharUnits ElemWidth = Info.Ctx.getTypeSizeInChars(CAT->getElementType()); unsigned NumInitializedElts = Val.getArrayInitializedElts(); unsigned ArraySize = Val.getArraySize(); // First, initialize the initialized elements. for (unsigned I = 0; I != NumInitializedElts; ++I) { const APValue &SubObj = Val.getArrayInitializedElt(I); if (!visit(SubObj, CAT->getElementType(), Offset + I * ElemWidth)) return false; } // Next, initialize the rest of the array using the filler. if (Val.hasArrayFiller()) { const APValue &Filler = Val.getArrayFiller(); for (unsigned I = NumInitializedElts; I != ArraySize; ++I) { if (!visit(Filler, CAT->getElementType(), Offset + I * ElemWidth)) return false; } } return true; } bool visitInt(const APSInt &Val, QualType Ty, CharUnits Offset) { CharUnits Width = Info.Ctx.getTypeSizeInChars(Ty); SmallVector Bytes(Width.getQuantity()); llvm::StoreIntToMemory(Val, &*Bytes.begin(), Width.getQuantity()); Buffer.writeObject(Offset, Bytes); return true; } bool visitFloat(const APFloat &Val, QualType Ty, CharUnits Offset) { APSInt AsInt(Val.bitcastToAPInt()); return visitInt(AsInt, Ty, Offset); } public: static Optional convert(EvalInfo &Info, const APValue &Src, const CastExpr *BCE) { CharUnits DstSize = Info.Ctx.getTypeSizeInChars(BCE->getType()); APValueToBufferConverter Converter(Info, DstSize, BCE); if (!Converter.visit(Src, BCE->getSubExpr()->getType())) return None; return Converter.Buffer; } }; /// Write an BitCastBuffer into an APValue. class BufferToAPValueConverter { EvalInfo &Info; const BitCastBuffer &Buffer; const CastExpr *BCE; BufferToAPValueConverter(EvalInfo &Info, const BitCastBuffer &Buffer, const CastExpr *BCE) : Info(Info), Buffer(Buffer), BCE(BCE) {} // Emit an unsupported bit_cast type error. Sema refuses to build a bit_cast // with an invalid type, so anything left is a deficiency on our part (FIXME). // Ideally this will be unreachable. llvm::NoneType unsupportedType(QualType Ty) { Info.FFDiag(BCE->getBeginLoc(), diag::note_constexpr_bit_cast_unsupported_type) << Ty; return None; } Optional visit(const BuiltinType *T, CharUnits Offset, const EnumType *EnumSugar = nullptr) { if (T->isNullPtrType()) { uint64_t NullValue = Info.Ctx.getTargetNullPointerValue(QualType(T, 0)); return APValue((Expr *)nullptr, /*Offset=*/CharUnits::fromQuantity(NullValue), APValue::NoLValuePath{}, /*IsNullPtr=*/true); } CharUnits SizeOf = Info.Ctx.getTypeSizeInChars(T); SmallVector Bytes; if (!Buffer.readObject(Offset, SizeOf, Bytes)) { // If this is std::byte or unsigned char, then its okay to store an // indeterminate value. bool IsStdByte = EnumSugar && EnumSugar->isStdByteType(); bool IsUChar = !EnumSugar && (T->isSpecificBuiltinType(BuiltinType::UChar) || T->isSpecificBuiltinType(BuiltinType::Char_U)); if (!IsStdByte && !IsUChar) { QualType DisplayType(EnumSugar ? (const Type *)EnumSugar : T, 0); Info.FFDiag(BCE->getExprLoc(), diag::note_constexpr_bit_cast_indet_dest) << DisplayType << Info.Ctx.getLangOpts().CharIsSigned; return None; } return APValue::IndeterminateValue(); } APSInt Val(SizeOf.getQuantity() * Info.Ctx.getCharWidth(), true); llvm::LoadIntFromMemory(Val, &*Bytes.begin(), Bytes.size()); if (T->isIntegralOrEnumerationType()) { Val.setIsSigned(T->isSignedIntegerOrEnumerationType()); return APValue(Val); } if (T->isRealFloatingType()) { const llvm::fltSemantics &Semantics = Info.Ctx.getFloatTypeSemantics(QualType(T, 0)); return APValue(APFloat(Semantics, Val)); } return unsupportedType(QualType(T, 0)); } Optional visit(const RecordType *RTy, CharUnits Offset) { const RecordDecl *RD = RTy->getAsRecordDecl(); const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); unsigned NumBases = 0; if (auto *CXXRD = dyn_cast(RD)) NumBases = CXXRD->getNumBases(); APValue ResultVal(APValue::UninitStruct(), NumBases, std::distance(RD->field_begin(), RD->field_end())); // Visit the base classes. if (auto *CXXRD = dyn_cast(RD)) { for (size_t I = 0, E = CXXRD->getNumBases(); I != E; ++I) { const CXXBaseSpecifier &BS = CXXRD->bases_begin()[I]; CXXRecordDecl *BaseDecl = BS.getType()->getAsCXXRecordDecl(); if (BaseDecl->isEmpty() || Info.Ctx.getASTRecordLayout(BaseDecl).getNonVirtualSize().isZero()) continue; Optional SubObj = visitType( BS.getType(), Layout.getBaseClassOffset(BaseDecl) + Offset); if (!SubObj) return None; ResultVal.getStructBase(I) = *SubObj; } } // Visit the fields. unsigned FieldIdx = 0; for (FieldDecl *FD : RD->fields()) { // FIXME: We don't currently support bit-fields. A lot of the logic for // this is in CodeGen, so we need to factor it around. if (FD->isBitField()) { Info.FFDiag(BCE->getBeginLoc(), diag::note_constexpr_bit_cast_unsupported_bitfield); return None; } uint64_t FieldOffsetBits = Layout.getFieldOffset(FieldIdx); assert(FieldOffsetBits % Info.Ctx.getCharWidth() == 0); CharUnits FieldOffset = CharUnits::fromQuantity(FieldOffsetBits / Info.Ctx.getCharWidth()) + Offset; QualType FieldTy = FD->getType(); Optional SubObj = visitType(FieldTy, FieldOffset); if (!SubObj) return None; ResultVal.getStructField(FieldIdx) = *SubObj; ++FieldIdx; } return ResultVal; } Optional visit(const EnumType *Ty, CharUnits Offset) { QualType RepresentationType = Ty->getDecl()->getIntegerType(); assert(!RepresentationType.isNull() && "enum forward decl should be caught by Sema"); const auto *AsBuiltin = RepresentationType.getCanonicalType()->castAs(); // Recurse into the underlying type. Treat std::byte transparently as // unsigned char. return visit(AsBuiltin, Offset, /*EnumTy=*/Ty); } Optional visit(const ConstantArrayType *Ty, CharUnits Offset) { size_t Size = Ty->getSize().getLimitedValue(); CharUnits ElementWidth = Info.Ctx.getTypeSizeInChars(Ty->getElementType()); APValue ArrayValue(APValue::UninitArray(), Size, Size); for (size_t I = 0; I != Size; ++I) { Optional ElementValue = visitType(Ty->getElementType(), Offset + I * ElementWidth); if (!ElementValue) return None; ArrayValue.getArrayInitializedElt(I) = std::move(*ElementValue); } return ArrayValue; } Optional visit(const Type *Ty, CharUnits Offset) { return unsupportedType(QualType(Ty, 0)); } Optional visitType(QualType Ty, CharUnits Offset) { QualType Can = Ty.getCanonicalType(); switch (Can->getTypeClass()) { #define TYPE(Class, Base) \ case Type::Class: \ return visit(cast(Can.getTypePtr()), Offset); #define ABSTRACT_TYPE(Class, Base) #define NON_CANONICAL_TYPE(Class, Base) \ case Type::Class: \ llvm_unreachable("non-canonical type should be impossible!"); #define DEPENDENT_TYPE(Class, Base) \ case Type::Class: \ llvm_unreachable( \ "dependent types aren't supported in the constant evaluator!"); #define NON_CANONICAL_UNLESS_DEPENDENT(Class, Base) \ case Type::Class: \ llvm_unreachable("either dependent or not canonical!"); #include "clang/AST/TypeNodes.inc" } llvm_unreachable("Unhandled Type::TypeClass"); } public: // Pull out a full value of type DstType. static Optional convert(EvalInfo &Info, BitCastBuffer &Buffer, const CastExpr *BCE) { BufferToAPValueConverter Converter(Info, Buffer, BCE); return Converter.visitType(BCE->getType(), CharUnits::fromQuantity(0)); } }; static bool checkBitCastConstexprEligibilityType(SourceLocation Loc, QualType Ty, EvalInfo *Info, const ASTContext &Ctx, bool CheckingDest) { Ty = Ty.getCanonicalType(); auto diag = [&](int Reason) { if (Info) Info->FFDiag(Loc, diag::note_constexpr_bit_cast_invalid_type) << CheckingDest << (Reason == 4) << Reason; return false; }; auto note = [&](int Construct, QualType NoteTy, SourceLocation NoteLoc) { if (Info) Info->Note(NoteLoc, diag::note_constexpr_bit_cast_invalid_subtype) << NoteTy << Construct << Ty; return false; }; if (Ty->isUnionType()) return diag(0); if (Ty->isPointerType()) return diag(1); if (Ty->isMemberPointerType()) return diag(2); if (Ty.isVolatileQualified()) return diag(3); if (RecordDecl *Record = Ty->getAsRecordDecl()) { if (auto *CXXRD = dyn_cast(Record)) { for (CXXBaseSpecifier &BS : CXXRD->bases()) if (!checkBitCastConstexprEligibilityType(Loc, BS.getType(), Info, Ctx, CheckingDest)) return note(1, BS.getType(), BS.getBeginLoc()); } for (FieldDecl *FD : Record->fields()) { if (FD->getType()->isReferenceType()) return diag(4); if (!checkBitCastConstexprEligibilityType(Loc, FD->getType(), Info, Ctx, CheckingDest)) return note(0, FD->getType(), FD->getBeginLoc()); } } if (Ty->isArrayType() && !checkBitCastConstexprEligibilityType(Loc, Ctx.getBaseElementType(Ty), Info, Ctx, CheckingDest)) return false; return true; } static bool checkBitCastConstexprEligibility(EvalInfo *Info, const ASTContext &Ctx, const CastExpr *BCE) { bool DestOK = checkBitCastConstexprEligibilityType( BCE->getBeginLoc(), BCE->getType(), Info, Ctx, true); bool SourceOK = DestOK && checkBitCastConstexprEligibilityType( BCE->getBeginLoc(), BCE->getSubExpr()->getType(), Info, Ctx, false); return SourceOK; } static bool handleLValueToRValueBitCast(EvalInfo &Info, APValue &DestValue, APValue &SourceValue, const CastExpr *BCE) { assert(CHAR_BIT == 8 && Info.Ctx.getTargetInfo().getCharWidth() == 8 && "no host or target supports non 8-bit chars"); assert(SourceValue.isLValue() && "LValueToRValueBitcast requires an lvalue operand!"); if (!checkBitCastConstexprEligibility(&Info, Info.Ctx, BCE)) return false; LValue SourceLValue; APValue SourceRValue; SourceLValue.setFrom(Info.Ctx, SourceValue); if (!handleLValueToRValueConversion( Info, BCE, BCE->getSubExpr()->getType().withConst(), SourceLValue, SourceRValue, /*WantObjectRepresentation=*/true)) return false; // Read out SourceValue into a char buffer. Optional Buffer = APValueToBufferConverter::convert(Info, SourceRValue, BCE); if (!Buffer) return false; // Write out the buffer into a new APValue. Optional MaybeDestValue = BufferToAPValueConverter::convert(Info, *Buffer, BCE); if (!MaybeDestValue) return false; DestValue = std::move(*MaybeDestValue); return true; } template class ExprEvaluatorBase : public ConstStmtVisitor { private: Derived &getDerived() { return static_cast(*this); } bool DerivedSuccess(const APValue &V, const Expr *E) { return getDerived().Success(V, E); } bool DerivedZeroInitialization(const Expr *E) { return getDerived().ZeroInitialization(E); } // Check whether a conditional operator with a non-constant condition is a // potential constant expression. If neither arm is a potential constant // expression, then the conditional operator is not either. template void CheckPotentialConstantConditional(const ConditionalOperator *E) { assert(Info.checkingPotentialConstantExpression()); // Speculatively evaluate both arms. SmallVector Diag; { SpeculativeEvaluationRAII Speculate(Info, &Diag); StmtVisitorTy::Visit(E->getFalseExpr()); if (Diag.empty()) return; } { SpeculativeEvaluationRAII Speculate(Info, &Diag); Diag.clear(); StmtVisitorTy::Visit(E->getTrueExpr()); if (Diag.empty()) return; } Error(E, diag::note_constexpr_conditional_never_const); } template bool HandleConditionalOperator(const ConditionalOperator *E) { bool BoolResult; if (!EvaluateAsBooleanCondition(E->getCond(), BoolResult, Info)) { if (Info.checkingPotentialConstantExpression() && Info.noteFailure()) { CheckPotentialConstantConditional(E); return false; } if (Info.noteFailure()) { StmtVisitorTy::Visit(E->getTrueExpr()); StmtVisitorTy::Visit(E->getFalseExpr()); } return false; } Expr *EvalExpr = BoolResult ? E->getTrueExpr() : E->getFalseExpr(); return StmtVisitorTy::Visit(EvalExpr); } protected: EvalInfo &Info; typedef ConstStmtVisitor StmtVisitorTy; typedef ExprEvaluatorBase ExprEvaluatorBaseTy; OptionalDiagnostic CCEDiag(const Expr *E, diag::kind D) { return Info.CCEDiag(E, D); } bool ZeroInitialization(const Expr *E) { return Error(E); } public: ExprEvaluatorBase(EvalInfo &Info) : Info(Info) {} EvalInfo &getEvalInfo() { return Info; } /// Report an evaluation error. This should only be called when an error is /// first discovered. When propagating an error, just return false. bool Error(const Expr *E, diag::kind D) { Info.FFDiag(E, D); return false; } bool Error(const Expr *E) { return Error(E, diag::note_invalid_subexpr_in_const_expr); } bool VisitStmt(const Stmt *) { llvm_unreachable("Expression evaluator should not be called on stmts"); } bool VisitExpr(const Expr *E) { return Error(E); } bool VisitConstantExpr(const ConstantExpr *E) { if (E->hasAPValueResult()) return DerivedSuccess(E->getAPValueResult(), E); return StmtVisitorTy::Visit(E->getSubExpr()); } bool VisitParenExpr(const ParenExpr *E) { return StmtVisitorTy::Visit(E->getSubExpr()); } bool VisitUnaryExtension(const UnaryOperator *E) { return StmtVisitorTy::Visit(E->getSubExpr()); } bool VisitUnaryPlus(const UnaryOperator *E) { return StmtVisitorTy::Visit(E->getSubExpr()); } bool VisitChooseExpr(const ChooseExpr *E) { return StmtVisitorTy::Visit(E->getChosenSubExpr()); } bool VisitGenericSelectionExpr(const GenericSelectionExpr *E) { return StmtVisitorTy::Visit(E->getResultExpr()); } bool VisitSubstNonTypeTemplateParmExpr(const SubstNonTypeTemplateParmExpr *E) { return StmtVisitorTy::Visit(E->getReplacement()); } bool VisitCXXDefaultArgExpr(const CXXDefaultArgExpr *E) { TempVersionRAII RAII(*Info.CurrentCall); SourceLocExprScopeGuard Guard(E, Info.CurrentCall->CurSourceLocExprScope); return StmtVisitorTy::Visit(E->getExpr()); } bool VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *E) { TempVersionRAII RAII(*Info.CurrentCall); // The initializer may not have been parsed yet, or might be erroneous. if (!E->getExpr()) return Error(E); SourceLocExprScopeGuard Guard(E, Info.CurrentCall->CurSourceLocExprScope); return StmtVisitorTy::Visit(E->getExpr()); } bool VisitExprWithCleanups(const ExprWithCleanups *E) { FullExpressionRAII Scope(Info); return StmtVisitorTy::Visit(E->getSubExpr()) && Scope.destroy(); } // Temporaries are registered when created, so we don't care about // CXXBindTemporaryExpr. bool VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *E) { return StmtVisitorTy::Visit(E->getSubExpr()); } bool VisitCXXReinterpretCastExpr(const CXXReinterpretCastExpr *E) { CCEDiag(E, diag::note_constexpr_invalid_cast) << 0; return static_cast(this)->VisitCastExpr(E); } bool VisitCXXDynamicCastExpr(const CXXDynamicCastExpr *E) { if (!Info.Ctx.getLangOpts().CPlusPlus20) CCEDiag(E, diag::note_constexpr_invalid_cast) << 1; return static_cast(this)->VisitCastExpr(E); } bool VisitBuiltinBitCastExpr(const BuiltinBitCastExpr *E) { return static_cast(this)->VisitCastExpr(E); } bool VisitBinaryOperator(const BinaryOperator *E) { switch (E->getOpcode()) { default: return Error(E); case BO_Comma: VisitIgnoredValue(E->getLHS()); return StmtVisitorTy::Visit(E->getRHS()); case BO_PtrMemD: case BO_PtrMemI: { LValue Obj; if (!HandleMemberPointerAccess(Info, E, Obj)) return false; APValue Result; if (!handleLValueToRValueConversion(Info, E, E->getType(), Obj, Result)) return false; return DerivedSuccess(Result, E); } } } bool VisitCXXRewrittenBinaryOperator(const CXXRewrittenBinaryOperator *E) { return StmtVisitorTy::Visit(E->getSemanticForm()); } bool VisitBinaryConditionalOperator(const BinaryConditionalOperator *E) { // Evaluate and cache the common expression. We treat it as a temporary, // even though it's not quite the same thing. LValue CommonLV; if (!Evaluate(Info.CurrentCall->createTemporary( E->getOpaqueValue(), getStorageType(Info.Ctx, E->getOpaqueValue()), false, CommonLV), Info, E->getCommon())) return false; return HandleConditionalOperator(E); } bool VisitConditionalOperator(const ConditionalOperator *E) { bool IsBcpCall = false; // If the condition (ignoring parens) is a __builtin_constant_p call, // the result is a constant expression if it can be folded without // side-effects. This is an important GNU extension. See GCC PR38377 // for discussion. if (const CallExpr *CallCE = dyn_cast(E->getCond()->IgnoreParenCasts())) if (CallCE->getBuiltinCallee() == Builtin::BI__builtin_constant_p) IsBcpCall = true; // Always assume __builtin_constant_p(...) ? ... : ... is a potential // constant expression; we can't check whether it's potentially foldable. // FIXME: We should instead treat __builtin_constant_p as non-constant if // it would return 'false' in this mode. if (Info.checkingPotentialConstantExpression() && IsBcpCall) return false; FoldConstant Fold(Info, IsBcpCall); if (!HandleConditionalOperator(E)) { Fold.keepDiagnostics(); return false; } return true; } bool VisitOpaqueValueExpr(const OpaqueValueExpr *E) { if (APValue *Value = Info.CurrentCall->getCurrentTemporary(E)) return DerivedSuccess(*Value, E); const Expr *Source = E->getSourceExpr(); if (!Source) return Error(E); if (Source == E) { // sanity checking. assert(0 && "OpaqueValueExpr recursively refers to itself"); return Error(E); } return StmtVisitorTy::Visit(Source); } bool VisitPseudoObjectExpr(const PseudoObjectExpr *E) { for (const Expr *SemE : E->semantics()) { if (auto *OVE = dyn_cast(SemE)) { // FIXME: We can't handle the case where an OpaqueValueExpr is also the // result expression: there could be two different LValues that would // refer to the same object in that case, and we can't model that. if (SemE == E->getResultExpr()) return Error(E); // Unique OVEs get evaluated if and when we encounter them when // emitting the rest of the semantic form, rather than eagerly. if (OVE->isUnique()) continue; LValue LV; if (!Evaluate(Info.CurrentCall->createTemporary( OVE, getStorageType(Info.Ctx, OVE), false, LV), Info, OVE->getSourceExpr())) return false; } else if (SemE == E->getResultExpr()) { if (!StmtVisitorTy::Visit(SemE)) return false; } else { if (!EvaluateIgnoredValue(Info, SemE)) return false; } } return true; } bool VisitCallExpr(const CallExpr *E) { APValue Result; if (!handleCallExpr(E, Result, nullptr)) return false; return DerivedSuccess(Result, E); } bool handleCallExpr(const CallExpr *E, APValue &Result, const LValue *ResultSlot) { const Expr *Callee = E->getCallee()->IgnoreParens(); QualType CalleeType = Callee->getType(); const FunctionDecl *FD = nullptr; LValue *This = nullptr, ThisVal; auto Args = llvm::makeArrayRef(E->getArgs(), E->getNumArgs()); bool HasQualifier = false; // Extract function decl and 'this' pointer from the callee. if (CalleeType->isSpecificBuiltinType(BuiltinType::BoundMember)) { const CXXMethodDecl *Member = nullptr; if (const MemberExpr *ME = dyn_cast(Callee)) { // Explicit bound member calls, such as x.f() or p->g(); if (!EvaluateObjectArgument(Info, ME->getBase(), ThisVal)) return false; Member = dyn_cast(ME->getMemberDecl()); if (!Member) return Error(Callee); This = &ThisVal; HasQualifier = ME->hasQualifier(); } else if (const BinaryOperator *BE = dyn_cast(Callee)) { // Indirect bound member calls ('.*' or '->*'). const ValueDecl *D = HandleMemberPointerAccess(Info, BE, ThisVal, false); if (!D) return false; Member = dyn_cast(D); if (!Member) return Error(Callee); This = &ThisVal; } else if (const auto *PDE = dyn_cast(Callee)) { if (!Info.getLangOpts().CPlusPlus20) Info.CCEDiag(PDE, diag::note_constexpr_pseudo_destructor); return EvaluateObjectArgument(Info, PDE->getBase(), ThisVal) && HandleDestruction(Info, PDE, ThisVal, PDE->getDestroyedType()); } else return Error(Callee); FD = Member; } else if (CalleeType->isFunctionPointerType()) { LValue Call; if (!EvaluatePointer(Callee, Call, Info)) return false; if (!Call.getLValueOffset().isZero()) return Error(Callee); FD = dyn_cast_or_null( Call.getLValueBase().dyn_cast()); if (!FD) return Error(Callee); // Don't call function pointers which have been cast to some other type. // Per DR (no number yet), the caller and callee can differ in noexcept. if (!Info.Ctx.hasSameFunctionTypeIgnoringExceptionSpec( CalleeType->getPointeeType(), FD->getType())) { return Error(E); } // Overloaded operator calls to member functions are represented as normal // calls with '*this' as the first argument. const CXXMethodDecl *MD = dyn_cast(FD); if (MD && !MD->isStatic()) { // FIXME: When selecting an implicit conversion for an overloaded // operator delete, we sometimes try to evaluate calls to conversion // operators without a 'this' parameter! if (Args.empty()) return Error(E); if (!EvaluateObjectArgument(Info, Args[0], ThisVal)) return false; This = &ThisVal; Args = Args.slice(1); } else if (MD && MD->isLambdaStaticInvoker()) { // Map the static invoker for the lambda back to the call operator. // Conveniently, we don't have to slice out the 'this' argument (as is // being done for the non-static case), since a static member function // doesn't have an implicit argument passed in. const CXXRecordDecl *ClosureClass = MD->getParent(); assert( ClosureClass->captures_begin() == ClosureClass->captures_end() && "Number of captures must be zero for conversion to function-ptr"); const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator(); // Set 'FD', the function that will be called below, to the call // operator. If the closure object represents a generic lambda, find // the corresponding specialization of the call operator. if (ClosureClass->isGenericLambda()) { assert(MD->isFunctionTemplateSpecialization() && "A generic lambda's static-invoker function must be a " "template specialization"); const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs(); FunctionTemplateDecl *CallOpTemplate = LambdaCallOp->getDescribedFunctionTemplate(); void *InsertPos = nullptr; FunctionDecl *CorrespondingCallOpSpecialization = CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos); assert(CorrespondingCallOpSpecialization && "We must always have a function call operator specialization " "that corresponds to our static invoker specialization"); FD = cast(CorrespondingCallOpSpecialization); } else FD = LambdaCallOp; } else if (FD->isReplaceableGlobalAllocationFunction()) { if (FD->getDeclName().getCXXOverloadedOperator() == OO_New || FD->getDeclName().getCXXOverloadedOperator() == OO_Array_New) { LValue Ptr; if (!HandleOperatorNewCall(Info, E, Ptr)) return false; Ptr.moveInto(Result); return true; } else { return HandleOperatorDeleteCall(Info, E); } } } else return Error(E); SmallVector CovariantAdjustmentPath; if (This) { auto *NamedMember = dyn_cast(FD); if (NamedMember && NamedMember->isVirtual() && !HasQualifier) { // Perform virtual dispatch, if necessary. FD = HandleVirtualDispatch(Info, E, *This, NamedMember, CovariantAdjustmentPath); if (!FD) return false; } else { // Check that the 'this' pointer points to an object of the right type. // FIXME: If this is an assignment operator call, we may need to change // the active union member before we check this. if (!checkNonVirtualMemberCallThisPointer(Info, E, *This, NamedMember)) return false; } } // Destructor calls are different enough that they have their own codepath. if (auto *DD = dyn_cast(FD)) { assert(This && "no 'this' pointer for destructor call"); return HandleDestruction(Info, E, *This, Info.Ctx.getRecordType(DD->getParent())); } const FunctionDecl *Definition = nullptr; Stmt *Body = FD->getBody(Definition); if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body) || !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, Body, Info, Result, ResultSlot)) return false; if (!CovariantAdjustmentPath.empty() && !HandleCovariantReturnAdjustment(Info, E, Result, CovariantAdjustmentPath)) return false; return true; } bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { return StmtVisitorTy::Visit(E->getInitializer()); } bool VisitInitListExpr(const InitListExpr *E) { if (E->getNumInits() == 0) return DerivedZeroInitialization(E); if (E->getNumInits() == 1) return StmtVisitorTy::Visit(E->getInit(0)); return Error(E); } bool VisitImplicitValueInitExpr(const ImplicitValueInitExpr *E) { return DerivedZeroInitialization(E); } bool VisitCXXScalarValueInitExpr(const CXXScalarValueInitExpr *E) { return DerivedZeroInitialization(E); } bool VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *E) { return DerivedZeroInitialization(E); } /// A member expression where the object is a prvalue is itself a prvalue. bool VisitMemberExpr(const MemberExpr *E) { assert(!Info.Ctx.getLangOpts().CPlusPlus11 && "missing temporary materialization conversion"); assert(!E->isArrow() && "missing call to bound member function?"); APValue Val; if (!Evaluate(Val, Info, E->getBase())) return false; QualType BaseTy = E->getBase()->getType(); const FieldDecl *FD = dyn_cast(E->getMemberDecl()); if (!FD) return Error(E); assert(!FD->getType()->isReferenceType() && "prvalue reference?"); assert(BaseTy->castAs()->getDecl()->getCanonicalDecl() == FD->getParent()->getCanonicalDecl() && "record / field mismatch"); // Note: there is no lvalue base here. But this case should only ever // happen in C or in C++98, where we cannot be evaluating a constexpr // constructor, which is the only case the base matters. CompleteObject Obj(APValue::LValueBase(), &Val, BaseTy); SubobjectDesignator Designator(BaseTy); Designator.addDeclUnchecked(FD); APValue Result; return extractSubobject(Info, E, Obj, Designator, Result) && DerivedSuccess(Result, E); } bool VisitExtVectorElementExpr(const ExtVectorElementExpr *E) { APValue Val; if (!Evaluate(Val, Info, E->getBase())) return false; if (Val.isVector()) { SmallVector Indices; E->getEncodedElementAccess(Indices); if (Indices.size() == 1) { // Return scalar. return DerivedSuccess(Val.getVectorElt(Indices[0]), E); } else { // Construct new APValue vector. SmallVector Elts; for (unsigned I = 0; I < Indices.size(); ++I) { Elts.push_back(Val.getVectorElt(Indices[I])); } APValue VecResult(Elts.data(), Indices.size()); return DerivedSuccess(VecResult, E); } } return false; } bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: break; case CK_AtomicToNonAtomic: { APValue AtomicVal; // This does not need to be done in place even for class/array types: // atomic-to-non-atomic conversion implies copying the object // representation. if (!Evaluate(AtomicVal, Info, E->getSubExpr())) return false; return DerivedSuccess(AtomicVal, E); } case CK_NoOp: case CK_UserDefinedConversion: return StmtVisitorTy::Visit(E->getSubExpr()); case CK_LValueToRValue: { LValue LVal; if (!EvaluateLValue(E->getSubExpr(), LVal, Info)) return false; APValue RVal; // Note, we use the subexpression's type in order to retain cv-qualifiers. if (!handleLValueToRValueConversion(Info, E, E->getSubExpr()->getType(), LVal, RVal)) return false; return DerivedSuccess(RVal, E); } case CK_LValueToRValueBitCast: { APValue DestValue, SourceValue; if (!Evaluate(SourceValue, Info, E->getSubExpr())) return false; if (!handleLValueToRValueBitCast(Info, DestValue, SourceValue, E)) return false; return DerivedSuccess(DestValue, E); } case CK_AddressSpaceConversion: { APValue Value; if (!Evaluate(Value, Info, E->getSubExpr())) return false; return DerivedSuccess(Value, E); } } return Error(E); } bool VisitUnaryPostInc(const UnaryOperator *UO) { return VisitUnaryPostIncDec(UO); } bool VisitUnaryPostDec(const UnaryOperator *UO) { return VisitUnaryPostIncDec(UO); } bool VisitUnaryPostIncDec(const UnaryOperator *UO) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(UO); LValue LVal; if (!EvaluateLValue(UO->getSubExpr(), LVal, Info)) return false; APValue RVal; if (!handleIncDec(this->Info, UO, LVal, UO->getSubExpr()->getType(), UO->isIncrementOp(), &RVal)) return false; return DerivedSuccess(RVal, UO); } bool VisitStmtExpr(const StmtExpr *E) { // We will have checked the full-expressions inside the statement expression // when they were completed, and don't need to check them again now. if (Info.checkingForUndefinedBehavior()) return Error(E); const CompoundStmt *CS = E->getSubStmt(); if (CS->body_empty()) return true; BlockScopeRAII Scope(Info); for (CompoundStmt::const_body_iterator BI = CS->body_begin(), BE = CS->body_end(); /**/; ++BI) { if (BI + 1 == BE) { const Expr *FinalExpr = dyn_cast(*BI); if (!FinalExpr) { Info.FFDiag((*BI)->getBeginLoc(), diag::note_constexpr_stmt_expr_unsupported); return false; } return this->Visit(FinalExpr) && Scope.destroy(); } APValue ReturnValue; StmtResult Result = { ReturnValue, nullptr }; EvalStmtResult ESR = EvaluateStmt(Result, Info, *BI); if (ESR != ESR_Succeeded) { // FIXME: If the statement-expression terminated due to 'return', // 'break', or 'continue', it would be nice to propagate that to // the outer statement evaluation rather than bailing out. if (ESR != ESR_Failed) Info.FFDiag((*BI)->getBeginLoc(), diag::note_constexpr_stmt_expr_unsupported); return false; } } llvm_unreachable("Return from function from the loop above."); } /// Visit a value which is evaluated, but whose value is ignored. void VisitIgnoredValue(const Expr *E) { EvaluateIgnoredValue(Info, E); } /// Potentially visit a MemberExpr's base expression. void VisitIgnoredBaseExpression(const Expr *E) { // While MSVC doesn't evaluate the base expression, it does diagnose the // presence of side-effecting behavior. if (Info.getLangOpts().MSVCCompat && !E->HasSideEffects(Info.Ctx)) return; VisitIgnoredValue(E); } }; } // namespace //===----------------------------------------------------------------------===// // Common base class for lvalue and temporary evaluation. //===----------------------------------------------------------------------===// namespace { template class LValueExprEvaluatorBase : public ExprEvaluatorBase { protected: LValue &Result; bool InvalidBaseOK; typedef LValueExprEvaluatorBase LValueExprEvaluatorBaseTy; typedef ExprEvaluatorBase ExprEvaluatorBaseTy; bool Success(APValue::LValueBase B) { Result.set(B); return true; } bool evaluatePointer(const Expr *E, LValue &Result) { return EvaluatePointer(E, Result, this->Info, InvalidBaseOK); } public: LValueExprEvaluatorBase(EvalInfo &Info, LValue &Result, bool InvalidBaseOK) : ExprEvaluatorBaseTy(Info), Result(Result), InvalidBaseOK(InvalidBaseOK) {} bool Success(const APValue &V, const Expr *E) { Result.setFrom(this->Info.Ctx, V); return true; } bool VisitMemberExpr(const MemberExpr *E) { // Handle non-static data members. QualType BaseTy; bool EvalOK; if (E->isArrow()) { EvalOK = evaluatePointer(E->getBase(), Result); BaseTy = E->getBase()->getType()->castAs()->getPointeeType(); } else if (E->getBase()->isRValue()) { assert(E->getBase()->getType()->isRecordType()); EvalOK = EvaluateTemporary(E->getBase(), Result, this->Info); BaseTy = E->getBase()->getType(); } else { EvalOK = this->Visit(E->getBase()); BaseTy = E->getBase()->getType(); } if (!EvalOK) { if (!InvalidBaseOK) return false; Result.setInvalid(E); return true; } const ValueDecl *MD = E->getMemberDecl(); if (const FieldDecl *FD = dyn_cast(E->getMemberDecl())) { assert(BaseTy->castAs()->getDecl()->getCanonicalDecl() == FD->getParent()->getCanonicalDecl() && "record / field mismatch"); (void)BaseTy; if (!HandleLValueMember(this->Info, E, Result, FD)) return false; } else if (const IndirectFieldDecl *IFD = dyn_cast(MD)) { if (!HandleLValueIndirectMember(this->Info, E, Result, IFD)) return false; } else return this->Error(E); if (MD->getType()->isReferenceType()) { APValue RefValue; if (!handleLValueToRValueConversion(this->Info, E, MD->getType(), Result, RefValue)) return false; return Success(RefValue, E); } return true; } bool VisitBinaryOperator(const BinaryOperator *E) { switch (E->getOpcode()) { default: return ExprEvaluatorBaseTy::VisitBinaryOperator(E); case BO_PtrMemD: case BO_PtrMemI: return HandleMemberPointerAccess(this->Info, E, Result); } } bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_DerivedToBase: case CK_UncheckedDerivedToBase: if (!this->Visit(E->getSubExpr())) return false; // Now figure out the necessary offset to add to the base LV to get from // the derived class to the base class. return HandleLValueBasePath(this->Info, E, E->getSubExpr()->getType(), Result); } } }; } //===----------------------------------------------------------------------===// // LValue Evaluation // // This is used for evaluating lvalues (in C and C++), xvalues (in C++11), // function designators (in C), decl references to void objects (in C), and // temporaries (if building with -Wno-address-of-temporary). // // LValue evaluation produces values comprising a base expression of one of the // following types: // - Declarations // * VarDecl // * FunctionDecl // - Literals // * CompoundLiteralExpr in C (and in global scope in C++) // * StringLiteral // * PredefinedExpr // * ObjCStringLiteralExpr // * ObjCEncodeExpr // * AddrLabelExpr // * BlockExpr // * CallExpr for a MakeStringConstant builtin // - typeid(T) expressions, as TypeInfoLValues // - Locals and temporaries // * MaterializeTemporaryExpr // * Any Expr, with a CallIndex indicating the function in which the temporary // was evaluated, for cases where the MaterializeTemporaryExpr is missing // from the AST (FIXME). // * A MaterializeTemporaryExpr that has static storage duration, with no // CallIndex, for a lifetime-extended temporary. // * The ConstantExpr that is currently being evaluated during evaluation of an // immediate invocation. // plus an offset in bytes. //===----------------------------------------------------------------------===// namespace { class LValueExprEvaluator : public LValueExprEvaluatorBase { public: LValueExprEvaluator(EvalInfo &Info, LValue &Result, bool InvalidBaseOK) : LValueExprEvaluatorBaseTy(Info, Result, InvalidBaseOK) {} bool VisitVarDecl(const Expr *E, const VarDecl *VD); bool VisitUnaryPreIncDec(const UnaryOperator *UO); bool VisitDeclRefExpr(const DeclRefExpr *E); bool VisitPredefinedExpr(const PredefinedExpr *E) { return Success(E); } bool VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E); bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); bool VisitMemberExpr(const MemberExpr *E); bool VisitStringLiteral(const StringLiteral *E) { return Success(E); } bool VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return Success(E); } bool VisitCXXTypeidExpr(const CXXTypeidExpr *E); bool VisitCXXUuidofExpr(const CXXUuidofExpr *E); bool VisitArraySubscriptExpr(const ArraySubscriptExpr *E); bool VisitUnaryDeref(const UnaryOperator *E); bool VisitUnaryReal(const UnaryOperator *E); bool VisitUnaryImag(const UnaryOperator *E); bool VisitUnaryPreInc(const UnaryOperator *UO) { return VisitUnaryPreIncDec(UO); } bool VisitUnaryPreDec(const UnaryOperator *UO) { return VisitUnaryPreIncDec(UO); } bool VisitBinAssign(const BinaryOperator *BO); bool VisitCompoundAssignOperator(const CompoundAssignOperator *CAO); bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return LValueExprEvaluatorBaseTy::VisitCastExpr(E); case CK_LValueBitCast: this->CCEDiag(E, diag::note_constexpr_invalid_cast) << 2; if (!Visit(E->getSubExpr())) return false; Result.Designator.setInvalid(); return true; case CK_BaseToDerived: if (!Visit(E->getSubExpr())) return false; return HandleBaseToDerivedCast(Info, E, Result); case CK_Dynamic: if (!Visit(E->getSubExpr())) return false; return HandleDynamicCast(Info, cast(E), Result); } } }; } // end anonymous namespace /// Evaluate an expression as an lvalue. This can be legitimately called on /// expressions which are not glvalues, in three cases: /// * function designators in C, and /// * "extern void" objects /// * @selector() expressions in Objective-C static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info, bool InvalidBaseOK) { assert(E->isGLValue() || E->getType()->isFunctionType() || E->getType()->isVoidType() || isa(E)); return LValueExprEvaluator(Info, Result, InvalidBaseOK).Visit(E); } bool LValueExprEvaluator::VisitDeclRefExpr(const DeclRefExpr *E) { if (const FunctionDecl *FD = dyn_cast(E->getDecl())) return Success(FD); if (const VarDecl *VD = dyn_cast(E->getDecl())) return VisitVarDecl(E, VD); if (const BindingDecl *BD = dyn_cast(E->getDecl())) return Visit(BD->getBinding()); if (const MSGuidDecl *GD = dyn_cast(E->getDecl())) return Success(GD); return Error(E); } bool LValueExprEvaluator::VisitVarDecl(const Expr *E, const VarDecl *VD) { // If we are within a lambda's call operator, check whether the 'VD' referred // to within 'E' actually represents a lambda-capture that maps to a // data-member/field within the closure object, and if so, evaluate to the // field or what the field refers to. if (Info.CurrentCall && isLambdaCallOperator(Info.CurrentCall->Callee) && isa(E) && cast(E)->refersToEnclosingVariableOrCapture()) { // We don't always have a complete capture-map when checking or inferring if // the function call operator meets the requirements of a constexpr function // - but we don't need to evaluate the captures to determine constexprness // (dcl.constexpr C++17). if (Info.checkingPotentialConstantExpression()) return false; if (auto *FD = Info.CurrentCall->LambdaCaptureFields.lookup(VD)) { // Start with 'Result' referring to the complete closure object... Result = *Info.CurrentCall->This; // ... then update it to refer to the field of the closure object // that represents the capture. if (!HandleLValueMember(Info, E, Result, FD)) return false; // And if the field is of reference type, update 'Result' to refer to what // the field refers to. if (FD->getType()->isReferenceType()) { APValue RVal; if (!handleLValueToRValueConversion(Info, E, FD->getType(), Result, RVal)) return false; Result.setFrom(Info.Ctx, RVal); } return true; } } CallStackFrame *Frame = nullptr; if (VD->hasLocalStorage() && Info.CurrentCall->Index > 1) { // Only if a local variable was declared in the function currently being // evaluated, do we expect to be able to find its value in the current // frame. (Otherwise it was likely declared in an enclosing context and // could either have a valid evaluatable value (for e.g. a constexpr // variable) or be ill-formed (and trigger an appropriate evaluation // diagnostic)). if (Info.CurrentCall->Callee && Info.CurrentCall->Callee->Equals(VD->getDeclContext())) { Frame = Info.CurrentCall; } } if (!VD->getType()->isReferenceType()) { if (Frame) { Result.set({VD, Frame->Index, Info.CurrentCall->getCurrentTemporaryVersion(VD)}); return true; } return Success(VD); } APValue *V; if (!evaluateVarDeclInit(Info, E, VD, Frame, V, nullptr)) return false; if (!V->hasValue()) { // FIXME: Is it possible for V to be indeterminate here? If so, we should // adjust the diagnostic to say that. if (!Info.checkingPotentialConstantExpression()) Info.FFDiag(E, diag::note_constexpr_use_uninit_reference); return false; } return Success(*V, E); } bool LValueExprEvaluator::VisitMaterializeTemporaryExpr( const MaterializeTemporaryExpr *E) { // Walk through the expression to find the materialized temporary itself. SmallVector CommaLHSs; SmallVector Adjustments; const Expr *Inner = E->getSubExpr()->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); // If we passed any comma operators, evaluate their LHSs. for (unsigned I = 0, N = CommaLHSs.size(); I != N; ++I) if (!EvaluateIgnoredValue(Info, CommaLHSs[I])) return false; // A materialized temporary with static storage duration can appear within the // result of a constant expression evaluation, so we need to preserve its // value for use outside this evaluation. APValue *Value; if (E->getStorageDuration() == SD_Static) { Value = E->getOrCreateValue(true); *Value = APValue(); Result.set(E); } else { Value = &Info.CurrentCall->createTemporary( E, E->getType(), E->getStorageDuration() == SD_Automatic, Result); } QualType Type = Inner->getType(); // Materialize the temporary itself. if (!EvaluateInPlace(*Value, Info, Result, Inner)) { *Value = APValue(); return false; } // Adjust our lvalue to refer to the desired subobject. for (unsigned I = Adjustments.size(); I != 0; /**/) { --I; switch (Adjustments[I].Kind) { case SubobjectAdjustment::DerivedToBaseAdjustment: if (!HandleLValueBasePath(Info, Adjustments[I].DerivedToBase.BasePath, Type, Result)) return false; Type = Adjustments[I].DerivedToBase.BasePath->getType(); break; case SubobjectAdjustment::FieldAdjustment: if (!HandleLValueMember(Info, E, Result, Adjustments[I].Field)) return false; Type = Adjustments[I].Field->getType(); break; case SubobjectAdjustment::MemberPointerAdjustment: if (!HandleMemberPointerAccess(this->Info, Type, Result, Adjustments[I].Ptr.RHS)) return false; Type = Adjustments[I].Ptr.MPT->getPointeeType(); break; } } return true; } bool LValueExprEvaluator::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { assert((!Info.getLangOpts().CPlusPlus || E->isFileScope()) && "lvalue compound literal in c++?"); // Defer visiting the literal until the lvalue-to-rvalue conversion. We can // only see this when folding in C, so there's no standard to follow here. return Success(E); } bool LValueExprEvaluator::VisitCXXTypeidExpr(const CXXTypeidExpr *E) { TypeInfoLValue TypeInfo; if (!E->isPotentiallyEvaluated()) { if (E->isTypeOperand()) TypeInfo = TypeInfoLValue(E->getTypeOperand(Info.Ctx).getTypePtr()); else TypeInfo = TypeInfoLValue(E->getExprOperand()->getType().getTypePtr()); } else { if (!Info.Ctx.getLangOpts().CPlusPlus20) { Info.CCEDiag(E, diag::note_constexpr_typeid_polymorphic) << E->getExprOperand()->getType() << E->getExprOperand()->getSourceRange(); } if (!Visit(E->getExprOperand())) return false; Optional DynType = ComputeDynamicType(Info, E, Result, AK_TypeId); if (!DynType) return false; TypeInfo = TypeInfoLValue(Info.Ctx.getRecordType(DynType->Type).getTypePtr()); } return Success(APValue::LValueBase::getTypeInfo(TypeInfo, E->getType())); } bool LValueExprEvaluator::VisitCXXUuidofExpr(const CXXUuidofExpr *E) { return Success(E->getGuidDecl()); } bool LValueExprEvaluator::VisitMemberExpr(const MemberExpr *E) { // Handle static data members. if (const VarDecl *VD = dyn_cast(E->getMemberDecl())) { VisitIgnoredBaseExpression(E->getBase()); return VisitVarDecl(E, VD); } // Handle static member functions. if (const CXXMethodDecl *MD = dyn_cast(E->getMemberDecl())) { if (MD->isStatic()) { VisitIgnoredBaseExpression(E->getBase()); return Success(MD); } } // Handle non-static data members. return LValueExprEvaluatorBaseTy::VisitMemberExpr(E); } bool LValueExprEvaluator::VisitArraySubscriptExpr(const ArraySubscriptExpr *E) { // FIXME: Deal with vectors as array subscript bases. if (E->getBase()->getType()->isVectorType()) return Error(E); bool Success = true; if (!evaluatePointer(E->getBase(), Result)) { if (!Info.noteFailure()) return false; Success = false; } APSInt Index; if (!EvaluateInteger(E->getIdx(), Index, Info)) return false; return Success && HandleLValueArrayAdjustment(Info, E, Result, E->getType(), Index); } bool LValueExprEvaluator::VisitUnaryDeref(const UnaryOperator *E) { return evaluatePointer(E->getSubExpr(), Result); } bool LValueExprEvaluator::VisitUnaryReal(const UnaryOperator *E) { if (!Visit(E->getSubExpr())) return false; // __real is a no-op on scalar lvalues. if (E->getSubExpr()->getType()->isAnyComplexType()) HandleLValueComplexElement(Info, E, Result, E->getType(), false); return true; } bool LValueExprEvaluator::VisitUnaryImag(const UnaryOperator *E) { assert(E->getSubExpr()->getType()->isAnyComplexType() && "lvalue __imag__ on scalar?"); if (!Visit(E->getSubExpr())) return false; HandleLValueComplexElement(Info, E, Result, E->getType(), true); return true; } bool LValueExprEvaluator::VisitUnaryPreIncDec(const UnaryOperator *UO) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(UO); if (!this->Visit(UO->getSubExpr())) return false; return handleIncDec( this->Info, UO, Result, UO->getSubExpr()->getType(), UO->isIncrementOp(), nullptr); } bool LValueExprEvaluator::VisitCompoundAssignOperator( const CompoundAssignOperator *CAO) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(CAO); APValue RHS; // The overall lvalue result is the result of evaluating the LHS. if (!this->Visit(CAO->getLHS())) { if (Info.noteFailure()) Evaluate(RHS, this->Info, CAO->getRHS()); return false; } if (!Evaluate(RHS, this->Info, CAO->getRHS())) return false; return handleCompoundAssignment( this->Info, CAO, Result, CAO->getLHS()->getType(), CAO->getComputationLHSType(), CAO->getOpForCompoundAssignment(CAO->getOpcode()), RHS); } bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(E); APValue NewVal; if (!this->Visit(E->getLHS())) { if (Info.noteFailure()) Evaluate(NewVal, this->Info, E->getRHS()); return false; } if (!Evaluate(NewVal, this->Info, E->getRHS())) return false; if (Info.getLangOpts().CPlusPlus20 && !HandleUnionActiveMemberChange(Info, E->getLHS(), Result)) return false; return handleAssignment(this->Info, E, Result, E->getLHS()->getType(), NewVal); } //===----------------------------------------------------------------------===// // Pointer Evaluation //===----------------------------------------------------------------------===// /// Attempts to compute the number of bytes available at the pointer /// returned by a function with the alloc_size attribute. Returns true if we /// were successful. Places an unsigned number into `Result`. /// /// This expects the given CallExpr to be a call to a function with an /// alloc_size attribute. static bool getBytesReturnedByAllocSizeCall(const ASTContext &Ctx, const CallExpr *Call, llvm::APInt &Result) { const AllocSizeAttr *AllocSize = getAllocSizeAttr(Call); assert(AllocSize && AllocSize->getElemSizeParam().isValid()); unsigned SizeArgNo = AllocSize->getElemSizeParam().getASTIndex(); unsigned BitsInSizeT = Ctx.getTypeSize(Ctx.getSizeType()); if (Call->getNumArgs() <= SizeArgNo) return false; auto EvaluateAsSizeT = [&](const Expr *E, APSInt &Into) { Expr::EvalResult ExprResult; if (!E->EvaluateAsInt(ExprResult, Ctx, Expr::SE_AllowSideEffects)) return false; Into = ExprResult.Val.getInt(); if (Into.isNegative() || !Into.isIntN(BitsInSizeT)) return false; Into = Into.zextOrSelf(BitsInSizeT); return true; }; APSInt SizeOfElem; if (!EvaluateAsSizeT(Call->getArg(SizeArgNo), SizeOfElem)) return false; if (!AllocSize->getNumElemsParam().isValid()) { Result = std::move(SizeOfElem); return true; } APSInt NumberOfElems; unsigned NumArgNo = AllocSize->getNumElemsParam().getASTIndex(); if (!EvaluateAsSizeT(Call->getArg(NumArgNo), NumberOfElems)) return false; bool Overflow; llvm::APInt BytesAvailable = SizeOfElem.umul_ov(NumberOfElems, Overflow); if (Overflow) return false; Result = std::move(BytesAvailable); return true; } /// Convenience function. LVal's base must be a call to an alloc_size /// function. static bool getBytesReturnedByAllocSizeCall(const ASTContext &Ctx, const LValue &LVal, llvm::APInt &Result) { assert(isBaseAnAllocSizeCall(LVal.getLValueBase()) && "Can't get the size of a non alloc_size function"); const auto *Base = LVal.getLValueBase().get(); const CallExpr *CE = tryUnwrapAllocSizeCall(Base); return getBytesReturnedByAllocSizeCall(Ctx, CE, Result); } /// Attempts to evaluate the given LValueBase as the result of a call to /// a function with the alloc_size attribute. If it was possible to do so, this /// function will return true, make Result's Base point to said function call, /// and mark Result's Base as invalid. static bool evaluateLValueAsAllocSize(EvalInfo &Info, APValue::LValueBase Base, LValue &Result) { if (Base.isNull()) return false; // Because we do no form of static analysis, we only support const variables. // // Additionally, we can't support parameters, nor can we support static // variables (in the latter case, use-before-assign isn't UB; in the former, // we have no clue what they'll be assigned to). const auto *VD = dyn_cast_or_null(Base.dyn_cast()); if (!VD || !VD->isLocalVarDecl() || !VD->getType().isConstQualified()) return false; const Expr *Init = VD->getAnyInitializer(); if (!Init) return false; const Expr *E = Init->IgnoreParens(); if (!tryUnwrapAllocSizeCall(E)) return false; // Store E instead of E unwrapped so that the type of the LValue's base is // what the user wanted. Result.setInvalid(E); QualType Pointee = E->getType()->castAs()->getPointeeType(); Result.addUnsizedArray(Info, E, Pointee); return true; } namespace { class PointerExprEvaluator : public ExprEvaluatorBase { LValue &Result; bool InvalidBaseOK; bool Success(const Expr *E) { Result.set(E); return true; } bool evaluateLValue(const Expr *E, LValue &Result) { return EvaluateLValue(E, Result, Info, InvalidBaseOK); } bool evaluatePointer(const Expr *E, LValue &Result) { return EvaluatePointer(E, Result, Info, InvalidBaseOK); } bool visitNonBuiltinCallExpr(const CallExpr *E); public: PointerExprEvaluator(EvalInfo &info, LValue &Result, bool InvalidBaseOK) : ExprEvaluatorBaseTy(info), Result(Result), InvalidBaseOK(InvalidBaseOK) {} bool Success(const APValue &V, const Expr *E) { Result.setFrom(Info.Ctx, V); return true; } bool ZeroInitialization(const Expr *E) { Result.setNull(Info.Ctx, E->getType()); return true; } bool VisitBinaryOperator(const BinaryOperator *E); bool VisitCastExpr(const CastExpr* E); bool VisitUnaryAddrOf(const UnaryOperator *E); bool VisitObjCStringLiteral(const ObjCStringLiteral *E) { return Success(E); } bool VisitObjCBoxedExpr(const ObjCBoxedExpr *E) { if (E->isExpressibleAsConstantInitializer()) return Success(E); if (Info.noteFailure()) EvaluateIgnoredValue(Info, E->getSubExpr()); return Error(E); } bool VisitAddrLabelExpr(const AddrLabelExpr *E) { return Success(E); } bool VisitCallExpr(const CallExpr *E); bool VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp); bool VisitBlockExpr(const BlockExpr *E) { if (!E->getBlockDecl()->hasCaptures()) return Success(E); return Error(E); } bool VisitCXXThisExpr(const CXXThisExpr *E) { // Can't look at 'this' when checking a potential constant expression. if (Info.checkingPotentialConstantExpression()) return false; if (!Info.CurrentCall->This) { if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(E, diag::note_constexpr_this) << E->isImplicit(); else Info.FFDiag(E); return false; } Result = *Info.CurrentCall->This; // If we are inside a lambda's call operator, the 'this' expression refers // to the enclosing '*this' object (either by value or reference) which is // either copied into the closure object's field that represents the '*this' // or refers to '*this'. if (isLambdaCallOperator(Info.CurrentCall->Callee)) { // Ensure we actually have captured 'this'. (an error will have // been previously reported if not). if (!Info.CurrentCall->LambdaThisCaptureField) return false; // Update 'Result' to refer to the data member/field of the closure object // that represents the '*this' capture. if (!HandleLValueMember(Info, E, Result, Info.CurrentCall->LambdaThisCaptureField)) return false; // If we captured '*this' by reference, replace the field with its referent. if (Info.CurrentCall->LambdaThisCaptureField->getType() ->isPointerType()) { APValue RVal; if (!handleLValueToRValueConversion(Info, E, E->getType(), Result, RVal)) return false; Result.setFrom(Info.Ctx, RVal); } } return true; } bool VisitCXXNewExpr(const CXXNewExpr *E); bool VisitSourceLocExpr(const SourceLocExpr *E) { assert(E->isStringType() && "SourceLocExpr isn't a pointer type?"); APValue LValResult = E->EvaluateInContext( Info.Ctx, Info.CurrentCall->CurSourceLocExprScope.getDefaultExpr()); Result.setFrom(Info.Ctx, LValResult); return true; } // FIXME: Missing: @protocol, @selector }; } // end anonymous namespace static bool EvaluatePointer(const Expr* E, LValue& Result, EvalInfo &Info, bool InvalidBaseOK) { assert(E->isRValue() && E->getType()->hasPointerRepresentation()); return PointerExprEvaluator(Info, Result, InvalidBaseOK).Visit(E); } bool PointerExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { if (E->getOpcode() != BO_Add && E->getOpcode() != BO_Sub) return ExprEvaluatorBaseTy::VisitBinaryOperator(E); const Expr *PExp = E->getLHS(); const Expr *IExp = E->getRHS(); if (IExp->getType()->isPointerType()) std::swap(PExp, IExp); bool EvalPtrOK = evaluatePointer(PExp, Result); if (!EvalPtrOK && !Info.noteFailure()) return false; llvm::APSInt Offset; if (!EvaluateInteger(IExp, Offset, Info) || !EvalPtrOK) return false; if (E->getOpcode() == BO_Sub) negateAsSigned(Offset); QualType Pointee = PExp->getType()->castAs()->getPointeeType(); return HandleLValueArrayAdjustment(Info, E, Result, Pointee, Offset); } bool PointerExprEvaluator::VisitUnaryAddrOf(const UnaryOperator *E) { return evaluateLValue(E->getSubExpr(), Result); } bool PointerExprEvaluator::VisitCastExpr(const CastExpr *E) { const Expr *SubExpr = E->getSubExpr(); switch (E->getCastKind()) { default: break; case CK_BitCast: case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: case CK_AnyPointerToBlockPointerCast: case CK_AddressSpaceConversion: if (!Visit(SubExpr)) return false; // Bitcasts to cv void* are static_casts, not reinterpret_casts, so are // permitted in constant expressions in C++11. Bitcasts from cv void* are // also static_casts, but we disallow them as a resolution to DR1312. if (!E->getType()->isVoidPointerType()) { if (!Result.InvalidBase && !Result.Designator.Invalid && !Result.IsNullPtr && Info.Ctx.hasSameUnqualifiedType(Result.Designator.getType(Info.Ctx), E->getType()->getPointeeType()) && Info.getStdAllocatorCaller("allocate")) { // Inside a call to std::allocator::allocate and friends, we permit // casting from void* back to cv1 T* for a pointer that points to a // cv2 T. } else { Result.Designator.setInvalid(); if (SubExpr->getType()->isVoidPointerType()) CCEDiag(E, diag::note_constexpr_invalid_cast) << 3 << SubExpr->getType(); else CCEDiag(E, diag::note_constexpr_invalid_cast) << 2; } } if (E->getCastKind() == CK_AddressSpaceConversion && Result.IsNullPtr) ZeroInitialization(E); return true; case CK_DerivedToBase: case CK_UncheckedDerivedToBase: if (!evaluatePointer(E->getSubExpr(), Result)) return false; if (!Result.Base && Result.Offset.isZero()) return true; // Now figure out the necessary offset to add to the base LV to get from // the derived class to the base class. return HandleLValueBasePath(Info, E, E->getSubExpr()->getType()-> castAs()->getPointeeType(), Result); case CK_BaseToDerived: if (!Visit(E->getSubExpr())) return false; if (!Result.Base && Result.Offset.isZero()) return true; return HandleBaseToDerivedCast(Info, E, Result); case CK_Dynamic: if (!Visit(E->getSubExpr())) return false; return HandleDynamicCast(Info, cast(E), Result); case CK_NullToPointer: VisitIgnoredValue(E->getSubExpr()); return ZeroInitialization(E); case CK_IntegralToPointer: { CCEDiag(E, diag::note_constexpr_invalid_cast) << 2; APValue Value; if (!EvaluateIntegerOrLValue(SubExpr, Value, Info)) break; if (Value.isInt()) { unsigned Size = Info.Ctx.getTypeSize(E->getType()); uint64_t N = Value.getInt().extOrTrunc(Size).getZExtValue(); Result.Base = (Expr*)nullptr; Result.InvalidBase = false; Result.Offset = CharUnits::fromQuantity(N); Result.Designator.setInvalid(); Result.IsNullPtr = false; return true; } else { // Cast is of an lvalue, no need to change value. Result.setFrom(Info.Ctx, Value); return true; } } case CK_ArrayToPointerDecay: { if (SubExpr->isGLValue()) { if (!evaluateLValue(SubExpr, Result)) return false; } else { APValue &Value = Info.CurrentCall->createTemporary( SubExpr, SubExpr->getType(), false, Result); if (!EvaluateInPlace(Value, Info, Result, SubExpr)) return false; } // The result is a pointer to the first element of the array. auto *AT = Info.Ctx.getAsArrayType(SubExpr->getType()); if (auto *CAT = dyn_cast(AT)) Result.addArray(Info, E, CAT); else Result.addUnsizedArray(Info, E, AT->getElementType()); return true; } case CK_FunctionToPointerDecay: return evaluateLValue(SubExpr, Result); case CK_LValueToRValue: { LValue LVal; if (!evaluateLValue(E->getSubExpr(), LVal)) return false; APValue RVal; // Note, we use the subexpression's type in order to retain cv-qualifiers. if (!handleLValueToRValueConversion(Info, E, E->getSubExpr()->getType(), LVal, RVal)) return InvalidBaseOK && evaluateLValueAsAllocSize(Info, LVal.Base, Result); return Success(RVal, E); } } return ExprEvaluatorBaseTy::VisitCastExpr(E); } static CharUnits GetAlignOfType(EvalInfo &Info, QualType T, UnaryExprOrTypeTrait ExprKind) { // C++ [expr.alignof]p3: // When alignof is applied to a reference type, the result is the // alignment of the referenced type. if (const ReferenceType *Ref = T->getAs()) T = Ref->getPointeeType(); if (T.getQualifiers().hasUnaligned()) return CharUnits::One(); const bool AlignOfReturnsPreferred = Info.Ctx.getLangOpts().getClangABICompat() <= LangOptions::ClangABI::Ver7; // __alignof is defined to return the preferred alignment. // Before 8, clang returned the preferred alignment for alignof and _Alignof // as well. if (ExprKind == UETT_PreferredAlignOf || AlignOfReturnsPreferred) return Info.Ctx.toCharUnitsFromBits( Info.Ctx.getPreferredTypeAlign(T.getTypePtr())); // alignof and _Alignof are defined to return the ABI alignment. else if (ExprKind == UETT_AlignOf) return Info.Ctx.getTypeAlignInChars(T.getTypePtr()); else llvm_unreachable("GetAlignOfType on a non-alignment ExprKind"); } static CharUnits GetAlignOfExpr(EvalInfo &Info, const Expr *E, UnaryExprOrTypeTrait ExprKind) { E = E->IgnoreParens(); // The kinds of expressions that we have special-case logic here for // should be kept up to date with the special checks for those // expressions in Sema. // alignof decl is always accepted, even if it doesn't make sense: we default // to 1 in those cases. if (const DeclRefExpr *DRE = dyn_cast(E)) return Info.Ctx.getDeclAlign(DRE->getDecl(), /*RefAsPointee*/true); if (const MemberExpr *ME = dyn_cast(E)) return Info.Ctx.getDeclAlign(ME->getMemberDecl(), /*RefAsPointee*/true); return GetAlignOfType(Info, E->getType(), ExprKind); } static CharUnits getBaseAlignment(EvalInfo &Info, const LValue &Value) { if (const auto *VD = Value.Base.dyn_cast()) return Info.Ctx.getDeclAlign(VD); if (const auto *E = Value.Base.dyn_cast()) return GetAlignOfExpr(Info, E, UETT_AlignOf); return GetAlignOfType(Info, Value.Base.getTypeInfoType(), UETT_AlignOf); } /// Evaluate the value of the alignment argument to __builtin_align_{up,down}, /// __builtin_is_aligned and __builtin_assume_aligned. static bool getAlignmentArgument(const Expr *E, QualType ForType, EvalInfo &Info, APSInt &Alignment) { if (!EvaluateInteger(E, Alignment, Info)) return false; if (Alignment < 0 || !Alignment.isPowerOf2()) { Info.FFDiag(E, diag::note_constexpr_invalid_alignment) << Alignment; return false; } unsigned SrcWidth = Info.Ctx.getIntWidth(ForType); APSInt MaxValue(APInt::getOneBitSet(SrcWidth, SrcWidth - 1)); if (APSInt::compareValues(Alignment, MaxValue) > 0) { Info.FFDiag(E, diag::note_constexpr_alignment_too_big) << MaxValue << ForType << Alignment; return false; } // Ensure both alignment and source value have the same bit width so that we // don't assert when computing the resulting value. APSInt ExtAlignment = APSInt(Alignment.zextOrTrunc(SrcWidth), /*isUnsigned=*/true); assert(APSInt::compareValues(Alignment, ExtAlignment) == 0 && "Alignment should not be changed by ext/trunc"); Alignment = ExtAlignment; assert(Alignment.getBitWidth() == SrcWidth); return true; } // To be clear: this happily visits unsupported builtins. Better name welcomed. bool PointerExprEvaluator::visitNonBuiltinCallExpr(const CallExpr *E) { if (ExprEvaluatorBaseTy::VisitCallExpr(E)) return true; if (!(InvalidBaseOK && getAllocSizeAttr(E))) return false; Result.setInvalid(E); QualType PointeeTy = E->getType()->castAs()->getPointeeType(); Result.addUnsizedArray(Info, E, PointeeTy); return true; } bool PointerExprEvaluator::VisitCallExpr(const CallExpr *E) { if (IsStringLiteralCall(E)) return Success(E); if (unsigned BuiltinOp = E->getBuiltinCallee()) return VisitBuiltinCallExpr(E, BuiltinOp); return visitNonBuiltinCallExpr(E); } // Determine if T is a character type for which we guarantee that // sizeof(T) == 1. static bool isOneByteCharacterType(QualType T) { return T->isCharType() || T->isChar8Type(); } bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { switch (BuiltinOp) { case Builtin::BI__builtin_addressof: return evaluateLValue(E->getArg(0), Result); case Builtin::BI__builtin_assume_aligned: { // We need to be very careful here because: if the pointer does not have the // asserted alignment, then the behavior is undefined, and undefined // behavior is non-constant. if (!evaluatePointer(E->getArg(0), Result)) return false; LValue OffsetResult(Result); APSInt Alignment; if (!getAlignmentArgument(E->getArg(1), E->getArg(0)->getType(), Info, Alignment)) return false; CharUnits Align = CharUnits::fromQuantity(Alignment.getZExtValue()); if (E->getNumArgs() > 2) { APSInt Offset; if (!EvaluateInteger(E->getArg(2), Offset, Info)) return false; int64_t AdditionalOffset = -Offset.getZExtValue(); OffsetResult.Offset += CharUnits::fromQuantity(AdditionalOffset); } // If there is a base object, then it must have the correct alignment. if (OffsetResult.Base) { CharUnits BaseAlignment = getBaseAlignment(Info, OffsetResult); if (BaseAlignment < Align) { Result.Designator.setInvalid(); // FIXME: Add support to Diagnostic for long / long long. CCEDiag(E->getArg(0), diag::note_constexpr_baa_insufficient_alignment) << 0 << (unsigned)BaseAlignment.getQuantity() << (unsigned)Align.getQuantity(); return false; } } // The offset must also have the correct alignment. if (OffsetResult.Offset.alignTo(Align) != OffsetResult.Offset) { Result.Designator.setInvalid(); (OffsetResult.Base ? CCEDiag(E->getArg(0), diag::note_constexpr_baa_insufficient_alignment) << 1 : CCEDiag(E->getArg(0), diag::note_constexpr_baa_value_insufficient_alignment)) << (int)OffsetResult.Offset.getQuantity() << (unsigned)Align.getQuantity(); return false; } return true; } case Builtin::BI__builtin_align_up: case Builtin::BI__builtin_align_down: { if (!evaluatePointer(E->getArg(0), Result)) return false; APSInt Alignment; if (!getAlignmentArgument(E->getArg(1), E->getArg(0)->getType(), Info, Alignment)) return false; CharUnits BaseAlignment = getBaseAlignment(Info, Result); CharUnits PtrAlign = BaseAlignment.alignmentAtOffset(Result.Offset); // For align_up/align_down, we can return the same value if the alignment // is known to be greater or equal to the requested value. if (PtrAlign.getQuantity() >= Alignment) return true; // The alignment could be greater than the minimum at run-time, so we cannot // infer much about the resulting pointer value. One case is possible: // For `_Alignas(32) char buf[N]; __builtin_align_down(&buf[idx], 32)` we // can infer the correct index if the requested alignment is smaller than // the base alignment so we can perform the computation on the offset. if (BaseAlignment.getQuantity() >= Alignment) { assert(Alignment.getBitWidth() <= 64 && "Cannot handle > 64-bit address-space"); uint64_t Alignment64 = Alignment.getZExtValue(); CharUnits NewOffset = CharUnits::fromQuantity( BuiltinOp == Builtin::BI__builtin_align_down ? llvm::alignDown(Result.Offset.getQuantity(), Alignment64) : llvm::alignTo(Result.Offset.getQuantity(), Alignment64)); Result.adjustOffset(NewOffset - Result.Offset); // TODO: diagnose out-of-bounds values/only allow for arrays? return true; } // Otherwise, we cannot constant-evaluate the result. Info.FFDiag(E->getArg(0), diag::note_constexpr_alignment_adjust) << Alignment; return false; } case Builtin::BI__builtin_operator_new: return HandleOperatorNewCall(Info, E, Result); case Builtin::BI__builtin_launder: return evaluatePointer(E->getArg(0), Result); case Builtin::BIstrchr: case Builtin::BIwcschr: case Builtin::BImemchr: case Builtin::BIwmemchr: if (Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_invalid_function) << /*isConstexpr*/0 << /*isConstructor*/0 << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'"); else Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); LLVM_FALLTHROUGH; case Builtin::BI__builtin_strchr: case Builtin::BI__builtin_wcschr: case Builtin::BI__builtin_memchr: case Builtin::BI__builtin_char_memchr: case Builtin::BI__builtin_wmemchr: { if (!Visit(E->getArg(0))) return false; APSInt Desired; if (!EvaluateInteger(E->getArg(1), Desired, Info)) return false; uint64_t MaxLength = uint64_t(-1); if (BuiltinOp != Builtin::BIstrchr && BuiltinOp != Builtin::BIwcschr && BuiltinOp != Builtin::BI__builtin_strchr && BuiltinOp != Builtin::BI__builtin_wcschr) { APSInt N; if (!EvaluateInteger(E->getArg(2), N, Info)) return false; MaxLength = N.getExtValue(); } // We cannot find the value if there are no candidates to match against. if (MaxLength == 0u) return ZeroInitialization(E); if (!Result.checkNullPointerForFoldAccess(Info, E, AK_Read) || Result.Designator.Invalid) return false; QualType CharTy = Result.Designator.getType(Info.Ctx); bool IsRawByte = BuiltinOp == Builtin::BImemchr || BuiltinOp == Builtin::BI__builtin_memchr; assert(IsRawByte || Info.Ctx.hasSameUnqualifiedType( CharTy, E->getArg(0)->getType()->getPointeeType())); // Pointers to const void may point to objects of incomplete type. if (IsRawByte && CharTy->isIncompleteType()) { Info.FFDiag(E, diag::note_constexpr_ltor_incomplete_type) << CharTy; return false; } // Give up on byte-oriented matching against multibyte elements. // FIXME: We can compare the bytes in the correct order. if (IsRawByte && !isOneByteCharacterType(CharTy)) { Info.FFDiag(E, diag::note_constexpr_memchr_unsupported) << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'") << CharTy; return false; } // Figure out what value we're actually looking for (after converting to // the corresponding unsigned type if necessary). uint64_t DesiredVal; bool StopAtNull = false; switch (BuiltinOp) { case Builtin::BIstrchr: case Builtin::BI__builtin_strchr: // strchr compares directly to the passed integer, and therefore // always fails if given an int that is not a char. if (!APSInt::isSameValue(HandleIntToIntCast(Info, E, CharTy, E->getArg(1)->getType(), Desired), Desired)) return ZeroInitialization(E); StopAtNull = true; LLVM_FALLTHROUGH; case Builtin::BImemchr: case Builtin::BI__builtin_memchr: case Builtin::BI__builtin_char_memchr: // memchr compares by converting both sides to unsigned char. That's also // correct for strchr if we get this far (to cope with plain char being // unsigned in the strchr case). DesiredVal = Desired.trunc(Info.Ctx.getCharWidth()).getZExtValue(); break; case Builtin::BIwcschr: case Builtin::BI__builtin_wcschr: StopAtNull = true; LLVM_FALLTHROUGH; case Builtin::BIwmemchr: case Builtin::BI__builtin_wmemchr: // wcschr and wmemchr are given a wchar_t to look for. Just use it. DesiredVal = Desired.getZExtValue(); break; } for (; MaxLength; --MaxLength) { APValue Char; if (!handleLValueToRValueConversion(Info, E, CharTy, Result, Char) || !Char.isInt()) return false; if (Char.getInt().getZExtValue() == DesiredVal) return true; if (StopAtNull && !Char.getInt()) break; if (!HandleLValueArrayAdjustment(Info, E, Result, CharTy, 1)) return false; } // Not found: return nullptr. return ZeroInitialization(E); } case Builtin::BImemcpy: case Builtin::BImemmove: case Builtin::BIwmemcpy: case Builtin::BIwmemmove: if (Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_invalid_function) << /*isConstexpr*/0 << /*isConstructor*/0 << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'"); else Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); LLVM_FALLTHROUGH; case Builtin::BI__builtin_memcpy: case Builtin::BI__builtin_memmove: case Builtin::BI__builtin_wmemcpy: case Builtin::BI__builtin_wmemmove: { bool WChar = BuiltinOp == Builtin::BIwmemcpy || BuiltinOp == Builtin::BIwmemmove || BuiltinOp == Builtin::BI__builtin_wmemcpy || BuiltinOp == Builtin::BI__builtin_wmemmove; bool Move = BuiltinOp == Builtin::BImemmove || BuiltinOp == Builtin::BIwmemmove || BuiltinOp == Builtin::BI__builtin_memmove || BuiltinOp == Builtin::BI__builtin_wmemmove; // The result of mem* is the first argument. if (!Visit(E->getArg(0))) return false; LValue Dest = Result; LValue Src; if (!EvaluatePointer(E->getArg(1), Src, Info)) return false; APSInt N; if (!EvaluateInteger(E->getArg(2), N, Info)) return false; assert(!N.isSigned() && "memcpy and friends take an unsigned size"); // If the size is zero, we treat this as always being a valid no-op. // (Even if one of the src and dest pointers is null.) if (!N) return true; // Otherwise, if either of the operands is null, we can't proceed. Don't // try to determine the type of the copied objects, because there aren't // any. if (!Src.Base || !Dest.Base) { APValue Val; (!Src.Base ? Src : Dest).moveInto(Val); Info.FFDiag(E, diag::note_constexpr_memcpy_null) << Move << WChar << !!Src.Base << Val.getAsString(Info.Ctx, E->getArg(0)->getType()); return false; } if (Src.Designator.Invalid || Dest.Designator.Invalid) return false; // We require that Src and Dest are both pointers to arrays of // trivially-copyable type. (For the wide version, the designator will be // invalid if the designated object is not a wchar_t.) QualType T = Dest.Designator.getType(Info.Ctx); QualType SrcT = Src.Designator.getType(Info.Ctx); if (!Info.Ctx.hasSameUnqualifiedType(T, SrcT)) { // FIXME: Consider using our bit_cast implementation to support this. Info.FFDiag(E, diag::note_constexpr_memcpy_type_pun) << Move << SrcT << T; return false; } if (T->isIncompleteType()) { Info.FFDiag(E, diag::note_constexpr_memcpy_incomplete_type) << Move << T; return false; } if (!T.isTriviallyCopyableType(Info.Ctx)) { Info.FFDiag(E, diag::note_constexpr_memcpy_nontrivial) << Move << T; return false; } // Figure out how many T's we're copying. uint64_t TSize = Info.Ctx.getTypeSizeInChars(T).getQuantity(); if (!WChar) { uint64_t Remainder; llvm::APInt OrigN = N; llvm::APInt::udivrem(OrigN, TSize, N, Remainder); if (Remainder) { Info.FFDiag(E, diag::note_constexpr_memcpy_unsupported) << Move << WChar << 0 << T << OrigN.toString(10, /*Signed*/false) << (unsigned)TSize; return false; } } // Check that the copying will remain within the arrays, just so that we // can give a more meaningful diagnostic. This implicitly also checks that // N fits into 64 bits. uint64_t RemainingSrcSize = Src.Designator.validIndexAdjustments().second; uint64_t RemainingDestSize = Dest.Designator.validIndexAdjustments().second; if (N.ugt(RemainingSrcSize) || N.ugt(RemainingDestSize)) { Info.FFDiag(E, diag::note_constexpr_memcpy_unsupported) << Move << WChar << (N.ugt(RemainingSrcSize) ? 1 : 2) << T << N.toString(10, /*Signed*/false); return false; } uint64_t NElems = N.getZExtValue(); uint64_t NBytes = NElems * TSize; // Check for overlap. int Direction = 1; if (HasSameBase(Src, Dest)) { uint64_t SrcOffset = Src.getLValueOffset().getQuantity(); uint64_t DestOffset = Dest.getLValueOffset().getQuantity(); if (DestOffset >= SrcOffset && DestOffset - SrcOffset < NBytes) { // Dest is inside the source region. if (!Move) { Info.FFDiag(E, diag::note_constexpr_memcpy_overlap) << WChar; return false; } // For memmove and friends, copy backwards. if (!HandleLValueArrayAdjustment(Info, E, Src, T, NElems - 1) || !HandleLValueArrayAdjustment(Info, E, Dest, T, NElems - 1)) return false; Direction = -1; } else if (!Move && SrcOffset >= DestOffset && SrcOffset - DestOffset < NBytes) { // Src is inside the destination region for memcpy: invalid. Info.FFDiag(E, diag::note_constexpr_memcpy_overlap) << WChar; return false; } } while (true) { APValue Val; // FIXME: Set WantObjectRepresentation to true if we're copying a // char-like type? if (!handleLValueToRValueConversion(Info, E, T, Src, Val) || !handleAssignment(Info, E, Dest, T, Val)) return false; // Do not iterate past the last element; if we're copying backwards, that // might take us off the start of the array. if (--NElems == 0) return true; if (!HandleLValueArrayAdjustment(Info, E, Src, T, Direction) || !HandleLValueArrayAdjustment(Info, E, Dest, T, Direction)) return false; } } default: break; } return visitNonBuiltinCallExpr(E); } static bool EvaluateArrayNewInitList(EvalInfo &Info, LValue &This, APValue &Result, const InitListExpr *ILE, QualType AllocType); static bool EvaluateArrayNewConstructExpr(EvalInfo &Info, LValue &This, APValue &Result, const CXXConstructExpr *CCE, QualType AllocType); bool PointerExprEvaluator::VisitCXXNewExpr(const CXXNewExpr *E) { if (!Info.getLangOpts().CPlusPlus20) Info.CCEDiag(E, diag::note_constexpr_new); // We cannot speculatively evaluate a delete expression. if (Info.SpeculativeEvaluationDepth) return false; FunctionDecl *OperatorNew = E->getOperatorNew(); bool IsNothrow = false; bool IsPlacement = false; if (OperatorNew->isReservedGlobalPlacementOperator() && Info.CurrentCall->isStdFunction() && !E->isArray()) { // FIXME Support array placement new. assert(E->getNumPlacementArgs() == 1); if (!EvaluatePointer(E->getPlacementArg(0), Result, Info)) return false; if (Result.Designator.Invalid) return false; IsPlacement = true; } else if (!OperatorNew->isReplaceableGlobalAllocationFunction()) { Info.FFDiag(E, diag::note_constexpr_new_non_replaceable) << isa(OperatorNew) << OperatorNew; return false; } else if (E->getNumPlacementArgs()) { // The only new-placement list we support is of the form (std::nothrow). // // FIXME: There is no restriction on this, but it's not clear that any // other form makes any sense. We get here for cases such as: // // new (std::align_val_t{N}) X(int) // // (which should presumably be valid only if N is a multiple of // alignof(int), and in any case can't be deallocated unless N is // alignof(X) and X has new-extended alignment). if (E->getNumPlacementArgs() != 1 || !E->getPlacementArg(0)->getType()->isNothrowT()) return Error(E, diag::note_constexpr_new_placement); LValue Nothrow; if (!EvaluateLValue(E->getPlacementArg(0), Nothrow, Info)) return false; IsNothrow = true; } const Expr *Init = E->getInitializer(); const InitListExpr *ResizedArrayILE = nullptr; const CXXConstructExpr *ResizedArrayCCE = nullptr; + bool ValueInit = false; QualType AllocType = E->getAllocatedType(); if (Optional ArraySize = E->getArraySize()) { const Expr *Stripped = *ArraySize; for (; auto *ICE = dyn_cast(Stripped); Stripped = ICE->getSubExpr()) if (ICE->getCastKind() != CK_NoOp && ICE->getCastKind() != CK_IntegralCast) break; llvm::APSInt ArrayBound; if (!EvaluateInteger(Stripped, ArrayBound, Info)) return false; // C++ [expr.new]p9: // The expression is erroneous if: // -- [...] its value before converting to size_t [or] applying the // second standard conversion sequence is less than zero if (ArrayBound.isSigned() && ArrayBound.isNegative()) { if (IsNothrow) return ZeroInitialization(E); Info.FFDiag(*ArraySize, diag::note_constexpr_new_negative) << ArrayBound << (*ArraySize)->getSourceRange(); return false; } // -- its value is such that the size of the allocated object would // exceed the implementation-defined limit if (ConstantArrayType::getNumAddressingBits(Info.Ctx, AllocType, ArrayBound) > ConstantArrayType::getMaxSizeBits(Info.Ctx)) { if (IsNothrow) return ZeroInitialization(E); Info.FFDiag(*ArraySize, diag::note_constexpr_new_too_large) << ArrayBound << (*ArraySize)->getSourceRange(); return false; } // -- the new-initializer is a braced-init-list and the number of // array elements for which initializers are provided [...] // exceeds the number of elements to initialize - if (Init && !isa(Init)) { + if (!Init) { + // No initialization is performed. + } else if (isa(Init) || + isa(Init)) { + ValueInit = true; + } else if (auto *CCE = dyn_cast(Init)) { + ResizedArrayCCE = CCE; + } else { auto *CAT = Info.Ctx.getAsConstantArrayType(Init->getType()); assert(CAT && "unexpected type for array initializer"); unsigned Bits = std::max(CAT->getSize().getBitWidth(), ArrayBound.getBitWidth()); llvm::APInt InitBound = CAT->getSize().zextOrSelf(Bits); llvm::APInt AllocBound = ArrayBound.zextOrSelf(Bits); if (InitBound.ugt(AllocBound)) { if (IsNothrow) return ZeroInitialization(E); Info.FFDiag(*ArraySize, diag::note_constexpr_new_too_small) << AllocBound.toString(10, /*Signed=*/false) << InitBound.toString(10, /*Signed=*/false) << (*ArraySize)->getSourceRange(); return false; } // If the sizes differ, we must have an initializer list, and we need // special handling for this case when we initialize. if (InitBound != AllocBound) ResizedArrayILE = cast(Init); - } else if (Init) { - ResizedArrayCCE = cast(Init); } AllocType = Info.Ctx.getConstantArrayType(AllocType, ArrayBound, nullptr, ArrayType::Normal, 0); } else { assert(!AllocType->isArrayType() && "array allocation with non-array new"); } APValue *Val; if (IsPlacement) { AccessKinds AK = AK_Construct; struct FindObjectHandler { EvalInfo &Info; const Expr *E; QualType AllocType; const AccessKinds AccessKind; APValue *Value; typedef bool result_type; bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { // FIXME: Reject the cases where [basic.life]p8 would not permit the // old name of the object to be used to name the new object. if (!Info.Ctx.hasSameUnqualifiedType(SubobjType, AllocType)) { Info.FFDiag(E, diag::note_constexpr_placement_new_wrong_type) << SubobjType << AllocType; return false; } Value = &Subobj; return true; } bool found(APSInt &Value, QualType SubobjType) { Info.FFDiag(E, diag::note_constexpr_construct_complex_elem); return false; } bool found(APFloat &Value, QualType SubobjType) { Info.FFDiag(E, diag::note_constexpr_construct_complex_elem); return false; } } Handler = {Info, E, AllocType, AK, nullptr}; CompleteObject Obj = findCompleteObject(Info, E, AK, Result, AllocType); if (!Obj || !findSubobject(Info, E, Obj, Result.Designator, Handler)) return false; Val = Handler.Value; // [basic.life]p1: // The lifetime of an object o of type T ends when [...] the storage // which the object occupies is [...] reused by an object that is not // nested within o (6.6.2). *Val = APValue(); } else { // Perform the allocation and obtain a pointer to the resulting object. Val = Info.createHeapAlloc(E, AllocType, Result); if (!Val) return false; } - if (ResizedArrayILE) { + if (ValueInit) { + ImplicitValueInitExpr VIE(AllocType); + if (!EvaluateInPlace(*Val, Info, Result, &VIE)) + return false; + } else if (ResizedArrayILE) { if (!EvaluateArrayNewInitList(Info, Result, *Val, ResizedArrayILE, AllocType)) return false; } else if (ResizedArrayCCE) { if (!EvaluateArrayNewConstructExpr(Info, Result, *Val, ResizedArrayCCE, AllocType)) return false; } else if (Init) { if (!EvaluateInPlace(*Val, Info, Result, Init)) return false; } else if (!getDefaultInitValue(AllocType, *Val)) { return false; } // Array new returns a pointer to the first element, not a pointer to the // array. if (auto *AT = AllocType->getAsArrayTypeUnsafe()) Result.addArray(Info, E, cast(AT)); return true; } //===----------------------------------------------------------------------===// // Member Pointer Evaluation //===----------------------------------------------------------------------===// namespace { class MemberPointerExprEvaluator : public ExprEvaluatorBase { MemberPtr &Result; bool Success(const ValueDecl *D) { Result = MemberPtr(D); return true; } public: MemberPointerExprEvaluator(EvalInfo &Info, MemberPtr &Result) : ExprEvaluatorBaseTy(Info), Result(Result) {} bool Success(const APValue &V, const Expr *E) { Result.setFrom(V); return true; } bool ZeroInitialization(const Expr *E) { return Success((const ValueDecl*)nullptr); } bool VisitCastExpr(const CastExpr *E); bool VisitUnaryAddrOf(const UnaryOperator *E); }; } // end anonymous namespace static bool EvaluateMemberPointer(const Expr *E, MemberPtr &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isMemberPointerType()); return MemberPointerExprEvaluator(Info, Result).Visit(E); } bool MemberPointerExprEvaluator::VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_NullToMemberPointer: VisitIgnoredValue(E->getSubExpr()); return ZeroInitialization(E); case CK_BaseToDerivedMemberPointer: { if (!Visit(E->getSubExpr())) return false; if (E->path_empty()) return true; // Base-to-derived member pointer casts store the path in derived-to-base // order, so iterate backwards. The CXXBaseSpecifier also provides us with // the wrong end of the derived->base arc, so stagger the path by one class. typedef std::reverse_iterator ReverseIter; for (ReverseIter PathI(E->path_end() - 1), PathE(E->path_begin()); PathI != PathE; ++PathI) { assert(!(*PathI)->isVirtual() && "memptr cast through vbase"); const CXXRecordDecl *Derived = (*PathI)->getType()->getAsCXXRecordDecl(); if (!Result.castToDerived(Derived)) return Error(E); } const Type *FinalTy = E->getType()->castAs()->getClass(); if (!Result.castToDerived(FinalTy->getAsCXXRecordDecl())) return Error(E); return true; } case CK_DerivedToBaseMemberPointer: if (!Visit(E->getSubExpr())) return false; for (CastExpr::path_const_iterator PathI = E->path_begin(), PathE = E->path_end(); PathI != PathE; ++PathI) { assert(!(*PathI)->isVirtual() && "memptr cast through vbase"); const CXXRecordDecl *Base = (*PathI)->getType()->getAsCXXRecordDecl(); if (!Result.castToBase(Base)) return Error(E); } return true; } } bool MemberPointerExprEvaluator::VisitUnaryAddrOf(const UnaryOperator *E) { // C++11 [expr.unary.op]p3 has very strict rules on how the address of a // member can be formed. return Success(cast(E->getSubExpr())->getDecl()); } //===----------------------------------------------------------------------===// // Record Evaluation //===----------------------------------------------------------------------===// namespace { class RecordExprEvaluator : public ExprEvaluatorBase { const LValue &This; APValue &Result; public: RecordExprEvaluator(EvalInfo &info, const LValue &This, APValue &Result) : ExprEvaluatorBaseTy(info), This(This), Result(Result) {} bool Success(const APValue &V, const Expr *E) { Result = V; return true; } bool ZeroInitialization(const Expr *E) { return ZeroInitialization(E, E->getType()); } bool ZeroInitialization(const Expr *E, QualType T); bool VisitCallExpr(const CallExpr *E) { return handleCallExpr(E, Result, &This); } bool VisitCastExpr(const CastExpr *E); bool VisitInitListExpr(const InitListExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E) { return VisitCXXConstructExpr(E, E->getType()); } bool VisitLambdaExpr(const LambdaExpr *E); bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E, QualType T); bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E); bool VisitBinCmp(const BinaryOperator *E); }; } /// Perform zero-initialization on an object of non-union class type. /// C++11 [dcl.init]p5: /// To zero-initialize an object or reference of type T means: /// [...] /// -- if T is a (possibly cv-qualified) non-union class type, /// each non-static data member and each base-class subobject is /// zero-initialized static bool HandleClassZeroInitialization(EvalInfo &Info, const Expr *E, const RecordDecl *RD, const LValue &This, APValue &Result) { assert(!RD->isUnion() && "Expected non-union class type"); const CXXRecordDecl *CD = dyn_cast(RD); Result = APValue(APValue::UninitStruct(), CD ? CD->getNumBases() : 0, std::distance(RD->field_begin(), RD->field_end())); if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); if (CD) { unsigned Index = 0; for (CXXRecordDecl::base_class_const_iterator I = CD->bases_begin(), End = CD->bases_end(); I != End; ++I, ++Index) { const CXXRecordDecl *Base = I->getType()->getAsCXXRecordDecl(); LValue Subobject = This; if (!HandleLValueDirectBase(Info, E, Subobject, CD, Base, &Layout)) return false; if (!HandleClassZeroInitialization(Info, E, Base, Subobject, Result.getStructBase(Index))) return false; } } for (const auto *I : RD->fields()) { // -- if T is a reference type, no initialization is performed. if (I->getType()->isReferenceType()) continue; LValue Subobject = This; if (!HandleLValueMember(Info, E, Subobject, I, &Layout)) return false; ImplicitValueInitExpr VIE(I->getType()); if (!EvaluateInPlace( Result.getStructField(I->getFieldIndex()), Info, Subobject, &VIE)) return false; } return true; } bool RecordExprEvaluator::ZeroInitialization(const Expr *E, QualType T) { const RecordDecl *RD = T->castAs()->getDecl(); if (RD->isInvalidDecl()) return false; if (RD->isUnion()) { // C++11 [dcl.init]p5: If T is a (possibly cv-qualified) union type, the // object's first non-static named data member is zero-initialized RecordDecl::field_iterator I = RD->field_begin(); if (I == RD->field_end()) { Result = APValue((const FieldDecl*)nullptr); return true; } LValue Subobject = This; if (!HandleLValueMember(Info, E, Subobject, *I)) return false; Result = APValue(*I); ImplicitValueInitExpr VIE(I->getType()); return EvaluateInPlace(Result.getUnionValue(), Info, Subobject, &VIE); } if (isa(RD) && cast(RD)->getNumVBases()) { Info.FFDiag(E, diag::note_constexpr_virtual_base) << RD; return false; } return HandleClassZeroInitialization(Info, E, RD, This, Result); } bool RecordExprEvaluator::VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_ConstructorConversion: return Visit(E->getSubExpr()); case CK_DerivedToBase: case CK_UncheckedDerivedToBase: { APValue DerivedObject; if (!Evaluate(DerivedObject, Info, E->getSubExpr())) return false; if (!DerivedObject.isStruct()) return Error(E->getSubExpr()); // Derived-to-base rvalue conversion: just slice off the derived part. APValue *Value = &DerivedObject; const CXXRecordDecl *RD = E->getSubExpr()->getType()->getAsCXXRecordDecl(); for (CastExpr::path_const_iterator PathI = E->path_begin(), PathE = E->path_end(); PathI != PathE; ++PathI) { assert(!(*PathI)->isVirtual() && "record rvalue with virtual base"); const CXXRecordDecl *Base = (*PathI)->getType()->getAsCXXRecordDecl(); Value = &Value->getStructBase(getBaseIndex(RD, Base)); RD = Base; } Result = *Value; return true; } } } bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) { if (E->isTransparent()) return Visit(E->getInit(0)); const RecordDecl *RD = E->getType()->castAs()->getDecl(); if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); auto *CXXRD = dyn_cast(RD); EvalInfo::EvaluatingConstructorRAII EvalObj( Info, ObjectUnderConstruction{This.getLValueBase(), This.Designator.Entries}, CXXRD && CXXRD->getNumBases()); if (RD->isUnion()) { const FieldDecl *Field = E->getInitializedFieldInUnion(); Result = APValue(Field); if (!Field) return true; // If the initializer list for a union does not contain any elements, the // first element of the union is value-initialized. // FIXME: The element should be initialized from an initializer list. // Is this difference ever observable for initializer lists which // we don't build? ImplicitValueInitExpr VIE(Field->getType()); const Expr *InitExpr = E->getNumInits() ? E->getInit(0) : &VIE; LValue Subobject = This; if (!HandleLValueMember(Info, InitExpr, Subobject, Field, &Layout)) return false; // Temporarily override This, in case there's a CXXDefaultInitExpr in here. ThisOverrideRAII ThisOverride(*Info.CurrentCall, &This, isa(InitExpr)); return EvaluateInPlace(Result.getUnionValue(), Info, Subobject, InitExpr); } if (!Result.hasValue()) Result = APValue(APValue::UninitStruct(), CXXRD ? CXXRD->getNumBases() : 0, std::distance(RD->field_begin(), RD->field_end())); unsigned ElementNo = 0; bool Success = true; // Initialize base classes. if (CXXRD && CXXRD->getNumBases()) { for (const auto &Base : CXXRD->bases()) { assert(ElementNo < E->getNumInits() && "missing init for base class"); const Expr *Init = E->getInit(ElementNo); LValue Subobject = This; if (!HandleLValueBase(Info, Init, Subobject, CXXRD, &Base)) return false; APValue &FieldVal = Result.getStructBase(ElementNo); if (!EvaluateInPlace(FieldVal, Info, Subobject, Init)) { if (!Info.noteFailure()) return false; Success = false; } ++ElementNo; } EvalObj.finishedConstructingBases(); } // Initialize members. for (const auto *Field : RD->fields()) { // Anonymous bit-fields are not considered members of the class for // purposes of aggregate initialization. if (Field->isUnnamedBitfield()) continue; LValue Subobject = This; bool HaveInit = ElementNo < E->getNumInits(); // FIXME: Diagnostics here should point to the end of the initializer // list, not the start. if (!HandleLValueMember(Info, HaveInit ? E->getInit(ElementNo) : E, Subobject, Field, &Layout)) return false; // Perform an implicit value-initialization for members beyond the end of // the initializer list. ImplicitValueInitExpr VIE(HaveInit ? Info.Ctx.IntTy : Field->getType()); const Expr *Init = HaveInit ? E->getInit(ElementNo++) : &VIE; // Temporarily override This, in case there's a CXXDefaultInitExpr in here. ThisOverrideRAII ThisOverride(*Info.CurrentCall, &This, isa(Init)); APValue &FieldVal = Result.getStructField(Field->getFieldIndex()); if (!EvaluateInPlace(FieldVal, Info, Subobject, Init) || (Field->isBitField() && !truncateBitfieldValue(Info, Init, FieldVal, Field))) { if (!Info.noteFailure()) return false; Success = false; } } EvalObj.finishedConstructingFields(); return Success; } bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E, QualType T) { // Note that E's type is not necessarily the type of our class here; we might // be initializing an array element instead. const CXXConstructorDecl *FD = E->getConstructor(); if (FD->isInvalidDecl() || FD->getParent()->isInvalidDecl()) return false; bool ZeroInit = E->requiresZeroInitialization(); if (CheckTrivialDefaultConstructor(Info, E->getExprLoc(), FD, ZeroInit)) { // If we've already performed zero-initialization, we're already done. if (Result.hasValue()) return true; if (ZeroInit) return ZeroInitialization(E, T); return getDefaultInitValue(T, Result); } const FunctionDecl *Definition = nullptr; auto Body = FD->getBody(Definition); if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body)) return false; // Avoid materializing a temporary for an elidable copy/move constructor. if (E->isElidable() && !ZeroInit) if (const MaterializeTemporaryExpr *ME = dyn_cast(E->getArg(0))) return Visit(ME->getSubExpr()); if (ZeroInit && !ZeroInitialization(E, T)) return false; auto Args = llvm::makeArrayRef(E->getArgs(), E->getNumArgs()); return HandleConstructorCall(E, This, Args, cast(Definition), Info, Result); } bool RecordExprEvaluator::VisitCXXInheritedCtorInitExpr( const CXXInheritedCtorInitExpr *E) { if (!Info.CurrentCall) { assert(Info.checkingPotentialConstantExpression()); return false; } const CXXConstructorDecl *FD = E->getConstructor(); if (FD->isInvalidDecl() || FD->getParent()->isInvalidDecl()) return false; const FunctionDecl *Definition = nullptr; auto Body = FD->getBody(Definition); if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body)) return false; return HandleConstructorCall(E, This, Info.CurrentCall->Arguments, cast(Definition), Info, Result); } bool RecordExprEvaluator::VisitCXXStdInitializerListExpr( const CXXStdInitializerListExpr *E) { const ConstantArrayType *ArrayType = Info.Ctx.getAsConstantArrayType(E->getSubExpr()->getType()); LValue Array; if (!EvaluateLValue(E->getSubExpr(), Array, Info)) return false; // Get a pointer to the first element of the array. Array.addArray(Info, E, ArrayType); auto InvalidType = [&] { Info.FFDiag(E, diag::note_constexpr_unsupported_layout) << E->getType(); return false; }; // FIXME: Perform the checks on the field types in SemaInit. RecordDecl *Record = E->getType()->castAs()->getDecl(); RecordDecl::field_iterator Field = Record->field_begin(); if (Field == Record->field_end()) return InvalidType(); // Start pointer. if (!Field->getType()->isPointerType() || !Info.Ctx.hasSameType(Field->getType()->getPointeeType(), ArrayType->getElementType())) return InvalidType(); // FIXME: What if the initializer_list type has base classes, etc? Result = APValue(APValue::UninitStruct(), 0, 2); Array.moveInto(Result.getStructField(0)); if (++Field == Record->field_end()) return InvalidType(); if (Field->getType()->isPointerType() && Info.Ctx.hasSameType(Field->getType()->getPointeeType(), ArrayType->getElementType())) { // End pointer. if (!HandleLValueArrayAdjustment(Info, E, Array, ArrayType->getElementType(), ArrayType->getSize().getZExtValue())) return false; Array.moveInto(Result.getStructField(1)); } else if (Info.Ctx.hasSameType(Field->getType(), Info.Ctx.getSizeType())) // Length. Result.getStructField(1) = APValue(APSInt(ArrayType->getSize())); else return InvalidType(); if (++Field != Record->field_end()) return InvalidType(); return true; } bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { const CXXRecordDecl *ClosureClass = E->getLambdaClass(); if (ClosureClass->isInvalidDecl()) return false; const size_t NumFields = std::distance(ClosureClass->field_begin(), ClosureClass->field_end()); assert(NumFields == (size_t)std::distance(E->capture_init_begin(), E->capture_init_end()) && "The number of lambda capture initializers should equal the number of " "fields within the closure type"); Result = APValue(APValue::UninitStruct(), /*NumBases*/0, NumFields); // Iterate through all the lambda's closure object's fields and initialize // them. auto *CaptureInitIt = E->capture_init_begin(); const LambdaCapture *CaptureIt = ClosureClass->captures_begin(); bool Success = true; for (const auto *Field : ClosureClass->fields()) { assert(CaptureInitIt != E->capture_init_end()); // Get the initializer for this field Expr *const CurFieldInit = *CaptureInitIt++; // If there is no initializer, either this is a VLA or an error has // occurred. if (!CurFieldInit) return Error(E); APValue &FieldVal = Result.getStructField(Field->getFieldIndex()); if (!EvaluateInPlace(FieldVal, Info, This, CurFieldInit)) { if (!Info.keepEvaluatingAfterFailure()) return false; Success = false; } ++CaptureIt; } return Success; } static bool EvaluateRecord(const Expr *E, const LValue &This, APValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isRecordType() && "can't evaluate expression as a record rvalue"); return RecordExprEvaluator(Info, This, Result).Visit(E); } //===----------------------------------------------------------------------===// // Temporary Evaluation // // Temporaries are represented in the AST as rvalues, but generally behave like // lvalues. The full-object of which the temporary is a subobject is implicitly // materialized so that a reference can bind to it. //===----------------------------------------------------------------------===// namespace { class TemporaryExprEvaluator : public LValueExprEvaluatorBase { public: TemporaryExprEvaluator(EvalInfo &Info, LValue &Result) : LValueExprEvaluatorBaseTy(Info, Result, false) {} /// Visit an expression which constructs the value of this temporary. bool VisitConstructExpr(const Expr *E) { APValue &Value = Info.CurrentCall->createTemporary(E, E->getType(), false, Result); return EvaluateInPlace(Value, Info, Result, E); } bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return LValueExprEvaluatorBaseTy::VisitCastExpr(E); case CK_ConstructorConversion: return VisitConstructExpr(E->getSubExpr()); } } bool VisitInitListExpr(const InitListExpr *E) { return VisitConstructExpr(E); } bool VisitCXXConstructExpr(const CXXConstructExpr *E) { return VisitConstructExpr(E); } bool VisitCallExpr(const CallExpr *E) { return VisitConstructExpr(E); } bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E) { return VisitConstructExpr(E); } bool VisitLambdaExpr(const LambdaExpr *E) { return VisitConstructExpr(E); } }; } // end anonymous namespace /// Evaluate an expression of record type as a temporary. static bool EvaluateTemporary(const Expr *E, LValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isRecordType()); return TemporaryExprEvaluator(Info, Result).Visit(E); } //===----------------------------------------------------------------------===// // Vector Evaluation //===----------------------------------------------------------------------===// namespace { class VectorExprEvaluator : public ExprEvaluatorBase { APValue &Result; public: VectorExprEvaluator(EvalInfo &info, APValue &Result) : ExprEvaluatorBaseTy(info), Result(Result) {} bool Success(ArrayRef V, const Expr *E) { assert(V.size() == E->getType()->castAs()->getNumElements()); // FIXME: remove this APValue copy. Result = APValue(V.data(), V.size()); return true; } bool Success(const APValue &V, const Expr *E) { assert(V.isVector()); Result = V; return true; } bool ZeroInitialization(const Expr *E); bool VisitUnaryReal(const UnaryOperator *E) { return Visit(E->getSubExpr()); } bool VisitCastExpr(const CastExpr* E); bool VisitInitListExpr(const InitListExpr *E); bool VisitUnaryImag(const UnaryOperator *E); bool VisitBinaryOperator(const BinaryOperator *E); // FIXME: Missing: unary -, unary ~, conditional operator (for GNU // conditional select), shufflevector, ExtVectorElementExpr }; } // end anonymous namespace static bool EvaluateVector(const Expr* E, APValue& Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isVectorType() &&"not a vector rvalue"); return VectorExprEvaluator(Info, Result).Visit(E); } bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) { const VectorType *VTy = E->getType()->castAs(); unsigned NElts = VTy->getNumElements(); const Expr *SE = E->getSubExpr(); QualType SETy = SE->getType(); switch (E->getCastKind()) { case CK_VectorSplat: { APValue Val = APValue(); if (SETy->isIntegerType()) { APSInt IntResult; if (!EvaluateInteger(SE, IntResult, Info)) return false; Val = APValue(std::move(IntResult)); } else if (SETy->isRealFloatingType()) { APFloat FloatResult(0.0); if (!EvaluateFloat(SE, FloatResult, Info)) return false; Val = APValue(std::move(FloatResult)); } else { return Error(E); } // Splat and create vector APValue. SmallVector Elts(NElts, Val); return Success(Elts, E); } case CK_BitCast: { // Evaluate the operand into an APInt we can extract from. llvm::APInt SValInt; if (!EvalAndBitcastToAPInt(Info, SE, SValInt)) return false; // Extract the elements QualType EltTy = VTy->getElementType(); unsigned EltSize = Info.Ctx.getTypeSize(EltTy); bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian(); SmallVector Elts; if (EltTy->isRealFloatingType()) { const llvm::fltSemantics &Sem = Info.Ctx.getFloatTypeSemantics(EltTy); unsigned FloatEltSize = EltSize; if (&Sem == &APFloat::x87DoubleExtended()) FloatEltSize = 80; for (unsigned i = 0; i < NElts; i++) { llvm::APInt Elt; if (BigEndian) Elt = SValInt.rotl(i*EltSize+FloatEltSize).trunc(FloatEltSize); else Elt = SValInt.rotr(i*EltSize).trunc(FloatEltSize); Elts.push_back(APValue(APFloat(Sem, Elt))); } } else if (EltTy->isIntegerType()) { for (unsigned i = 0; i < NElts; i++) { llvm::APInt Elt; if (BigEndian) Elt = SValInt.rotl(i*EltSize+EltSize).zextOrTrunc(EltSize); else Elt = SValInt.rotr(i*EltSize).zextOrTrunc(EltSize); Elts.push_back(APValue(APSInt(Elt, EltTy->isSignedIntegerType()))); } } else { return Error(E); } return Success(Elts, E); } default: return ExprEvaluatorBaseTy::VisitCastExpr(E); } } bool VectorExprEvaluator::VisitInitListExpr(const InitListExpr *E) { const VectorType *VT = E->getType()->castAs(); unsigned NumInits = E->getNumInits(); unsigned NumElements = VT->getNumElements(); QualType EltTy = VT->getElementType(); SmallVector Elements; // The number of initializers can be less than the number of // vector elements. For OpenCL, this can be due to nested vector // initialization. For GCC compatibility, missing trailing elements // should be initialized with zeroes. unsigned CountInits = 0, CountElts = 0; while (CountElts < NumElements) { // Handle nested vector initialization. if (CountInits < NumInits && E->getInit(CountInits)->getType()->isVectorType()) { APValue v; if (!EvaluateVector(E->getInit(CountInits), v, Info)) return Error(E); unsigned vlen = v.getVectorLength(); for (unsigned j = 0; j < vlen; j++) Elements.push_back(v.getVectorElt(j)); CountElts += vlen; } else if (EltTy->isIntegerType()) { llvm::APSInt sInt(32); if (CountInits < NumInits) { if (!EvaluateInteger(E->getInit(CountInits), sInt, Info)) return false; } else // trailing integer zero. sInt = Info.Ctx.MakeIntValue(0, EltTy); Elements.push_back(APValue(sInt)); CountElts++; } else { llvm::APFloat f(0.0); if (CountInits < NumInits) { if (!EvaluateFloat(E->getInit(CountInits), f, Info)) return false; } else // trailing float zero. f = APFloat::getZero(Info.Ctx.getFloatTypeSemantics(EltTy)); Elements.push_back(APValue(f)); CountElts++; } CountInits++; } return Success(Elements, E); } bool VectorExprEvaluator::ZeroInitialization(const Expr *E) { const auto *VT = E->getType()->castAs(); QualType EltTy = VT->getElementType(); APValue ZeroElement; if (EltTy->isIntegerType()) ZeroElement = APValue(Info.Ctx.MakeIntValue(0, EltTy)); else ZeroElement = APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(EltTy))); SmallVector Elements(VT->getNumElements(), ZeroElement); return Success(Elements, E); } bool VectorExprEvaluator::VisitUnaryImag(const UnaryOperator *E) { VisitIgnoredValue(E->getSubExpr()); return ZeroInitialization(E); } bool VectorExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { BinaryOperatorKind Op = E->getOpcode(); assert(Op != BO_PtrMemD && Op != BO_PtrMemI && Op != BO_Cmp && "Operation not supported on vector types"); if (Op == BO_Comma) return ExprEvaluatorBaseTy::VisitBinaryOperator(E); Expr *LHS = E->getLHS(); Expr *RHS = E->getRHS(); assert(LHS->getType()->isVectorType() && RHS->getType()->isVectorType() && "Must both be vector types"); // Checking JUST the types are the same would be fine, except shifts don't // need to have their types be the same (since you always shift by an int). assert(LHS->getType()->getAs()->getNumElements() == E->getType()->getAs()->getNumElements() && RHS->getType()->getAs()->getNumElements() == E->getType()->getAs()->getNumElements() && "All operands must be the same size."); APValue LHSValue; APValue RHSValue; bool LHSOK = Evaluate(LHSValue, Info, LHS); if (!LHSOK && !Info.noteFailure()) return false; if (!Evaluate(RHSValue, Info, RHS) || !LHSOK) return false; if (!handleVectorVectorBinOp(Info, E, Op, LHSValue, RHSValue)) return false; return Success(LHSValue, E); } //===----------------------------------------------------------------------===// // Array Evaluation //===----------------------------------------------------------------------===// namespace { class ArrayExprEvaluator : public ExprEvaluatorBase { const LValue &This; APValue &Result; public: ArrayExprEvaluator(EvalInfo &Info, const LValue &This, APValue &Result) : ExprEvaluatorBaseTy(Info), This(This), Result(Result) {} bool Success(const APValue &V, const Expr *E) { assert(V.isArray() && "expected array"); Result = V; return true; } bool ZeroInitialization(const Expr *E) { const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(E->getType()); if (!CAT) { if (E->getType()->isIncompleteArrayType()) { // We can be asked to zero-initialize a flexible array member; this // is represented as an ImplicitValueInitExpr of incomplete array // type. In this case, the array has zero elements. Result = APValue(APValue::UninitArray(), 0, 0); return true; } // FIXME: We could handle VLAs here. return Error(E); } Result = APValue(APValue::UninitArray(), 0, CAT->getSize().getZExtValue()); if (!Result.hasArrayFiller()) return true; // Zero-initialize all elements. LValue Subobject = This; Subobject.addArray(Info, E, CAT); ImplicitValueInitExpr VIE(CAT->getElementType()); return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject, &VIE); } bool VisitCallExpr(const CallExpr *E) { return handleCallExpr(E, Result, &This); } bool VisitInitListExpr(const InitListExpr *E, QualType AllocType = QualType()); bool VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E, const LValue &Subobject, APValue *Value, QualType Type); bool VisitStringLiteral(const StringLiteral *E, QualType AllocType = QualType()) { expandStringLiteral(Info, E, Result, AllocType); return true; } }; } // end anonymous namespace static bool EvaluateArray(const Expr *E, const LValue &This, APValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isArrayType() && "not an array rvalue"); return ArrayExprEvaluator(Info, This, Result).Visit(E); } static bool EvaluateArrayNewInitList(EvalInfo &Info, LValue &This, APValue &Result, const InitListExpr *ILE, QualType AllocType) { assert(ILE->isRValue() && ILE->getType()->isArrayType() && "not an array rvalue"); return ArrayExprEvaluator(Info, This, Result) .VisitInitListExpr(ILE, AllocType); } static bool EvaluateArrayNewConstructExpr(EvalInfo &Info, LValue &This, APValue &Result, const CXXConstructExpr *CCE, QualType AllocType) { assert(CCE->isRValue() && CCE->getType()->isArrayType() && "not an array rvalue"); return ArrayExprEvaluator(Info, This, Result) .VisitCXXConstructExpr(CCE, This, &Result, AllocType); } // Return true iff the given array filler may depend on the element index. static bool MaybeElementDependentArrayFiller(const Expr *FillerExpr) { // For now, just allow non-class value-initialization and initialization // lists comprised of them. if (isa(FillerExpr)) return false; if (const InitListExpr *ILE = dyn_cast(FillerExpr)) { for (unsigned I = 0, E = ILE->getNumInits(); I != E; ++I) { if (MaybeElementDependentArrayFiller(ILE->getInit(I))) return true; } return false; } return true; } bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E, QualType AllocType) { const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType( AllocType.isNull() ? E->getType() : AllocType); if (!CAT) return Error(E); // C++11 [dcl.init.string]p1: A char array [...] can be initialized by [...] // an appropriately-typed string literal enclosed in braces. if (E->isStringLiteralInit()) { auto *SL = dyn_cast(E->getInit(0)->IgnoreParens()); // FIXME: Support ObjCEncodeExpr here once we support it in // ArrayExprEvaluator generally. if (!SL) return Error(E); return VisitStringLiteral(SL, AllocType); } bool Success = true; assert((!Result.isArray() || Result.getArrayInitializedElts() == 0) && "zero-initialized array shouldn't have any initialized elts"); APValue Filler; if (Result.isArray() && Result.hasArrayFiller()) Filler = Result.getArrayFiller(); unsigned NumEltsToInit = E->getNumInits(); unsigned NumElts = CAT->getSize().getZExtValue(); const Expr *FillerExpr = E->hasArrayFiller() ? E->getArrayFiller() : nullptr; // If the initializer might depend on the array index, run it for each // array element. if (NumEltsToInit != NumElts && MaybeElementDependentArrayFiller(FillerExpr)) NumEltsToInit = NumElts; LLVM_DEBUG(llvm::dbgs() << "The number of elements to initialize: " << NumEltsToInit << ".\n"); Result = APValue(APValue::UninitArray(), NumEltsToInit, NumElts); // If the array was previously zero-initialized, preserve the // zero-initialized values. if (Filler.hasValue()) { for (unsigned I = 0, E = Result.getArrayInitializedElts(); I != E; ++I) Result.getArrayInitializedElt(I) = Filler; if (Result.hasArrayFiller()) Result.getArrayFiller() = Filler; } LValue Subobject = This; Subobject.addArray(Info, E, CAT); for (unsigned Index = 0; Index != NumEltsToInit; ++Index) { const Expr *Init = Index < E->getNumInits() ? E->getInit(Index) : FillerExpr; if (!EvaluateInPlace(Result.getArrayInitializedElt(Index), Info, Subobject, Init) || !HandleLValueArrayAdjustment(Info, Init, Subobject, CAT->getElementType(), 1)) { if (!Info.noteFailure()) return false; Success = false; } } if (!Result.hasArrayFiller()) return Success; // If we get here, we have a trivial filler, which we can just evaluate // once and splat over the rest of the array elements. assert(FillerExpr && "no array filler for incomplete init list"); return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject, FillerExpr) && Success; } bool ArrayExprEvaluator::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E) { LValue CommonLV; if (E->getCommonExpr() && !Evaluate(Info.CurrentCall->createTemporary( E->getCommonExpr(), getStorageType(Info.Ctx, E->getCommonExpr()), false, CommonLV), Info, E->getCommonExpr()->getSourceExpr())) return false; auto *CAT = cast(E->getType()->castAsArrayTypeUnsafe()); uint64_t Elements = CAT->getSize().getZExtValue(); Result = APValue(APValue::UninitArray(), Elements, Elements); LValue Subobject = This; Subobject.addArray(Info, E, CAT); bool Success = true; for (EvalInfo::ArrayInitLoopIndex Index(Info); Index != Elements; ++Index) { if (!EvaluateInPlace(Result.getArrayInitializedElt(Index), Info, Subobject, E->getSubExpr()) || !HandleLValueArrayAdjustment(Info, E, Subobject, CAT->getElementType(), 1)) { if (!Info.noteFailure()) return false; Success = false; } } return Success; } bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) { return VisitCXXConstructExpr(E, This, &Result, E->getType()); } bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E, const LValue &Subobject, APValue *Value, QualType Type) { bool HadZeroInit = Value->hasValue(); if (const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(Type)) { unsigned N = CAT->getSize().getZExtValue(); // Preserve the array filler if we had prior zero-initialization. APValue Filler = HadZeroInit && Value->hasArrayFiller() ? Value->getArrayFiller() : APValue(); *Value = APValue(APValue::UninitArray(), N, N); if (HadZeroInit) for (unsigned I = 0; I != N; ++I) Value->getArrayInitializedElt(I) = Filler; // Initialize the elements. LValue ArrayElt = Subobject; ArrayElt.addArray(Info, E, CAT); for (unsigned I = 0; I != N; ++I) if (!VisitCXXConstructExpr(E, ArrayElt, &Value->getArrayInitializedElt(I), CAT->getElementType()) || !HandleLValueArrayAdjustment(Info, E, ArrayElt, CAT->getElementType(), 1)) return false; return true; } if (!Type->isRecordType()) return Error(E); return RecordExprEvaluator(Info, Subobject, *Value) .VisitCXXConstructExpr(E, Type); } //===----------------------------------------------------------------------===// // Integer Evaluation // // As a GNU extension, we support casting pointers to sufficiently-wide integer // types and back in constant folding. Integer values are thus represented // either as an integer-valued APValue, or as an lvalue-valued APValue. //===----------------------------------------------------------------------===// namespace { class IntExprEvaluator : public ExprEvaluatorBase { APValue &Result; public: IntExprEvaluator(EvalInfo &info, APValue &result) : ExprEvaluatorBaseTy(info), Result(result) {} bool Success(const llvm::APSInt &SI, const Expr *E, APValue &Result) { assert(E->getType()->isIntegralOrEnumerationType() && "Invalid evaluation result."); assert(SI.isSigned() == E->getType()->isSignedIntegerOrEnumerationType() && "Invalid evaluation result."); assert(SI.getBitWidth() == Info.Ctx.getIntWidth(E->getType()) && "Invalid evaluation result."); Result = APValue(SI); return true; } bool Success(const llvm::APSInt &SI, const Expr *E) { return Success(SI, E, Result); } bool Success(const llvm::APInt &I, const Expr *E, APValue &Result) { assert(E->getType()->isIntegralOrEnumerationType() && "Invalid evaluation result."); assert(I.getBitWidth() == Info.Ctx.getIntWidth(E->getType()) && "Invalid evaluation result."); Result = APValue(APSInt(I)); Result.getInt().setIsUnsigned( E->getType()->isUnsignedIntegerOrEnumerationType()); return true; } bool Success(const llvm::APInt &I, const Expr *E) { return Success(I, E, Result); } bool Success(uint64_t Value, const Expr *E, APValue &Result) { assert(E->getType()->isIntegralOrEnumerationType() && "Invalid evaluation result."); Result = APValue(Info.Ctx.MakeIntValue(Value, E->getType())); return true; } bool Success(uint64_t Value, const Expr *E) { return Success(Value, E, Result); } bool Success(CharUnits Size, const Expr *E) { return Success(Size.getQuantity(), E); } bool Success(const APValue &V, const Expr *E) { if (V.isLValue() || V.isAddrLabelDiff() || V.isIndeterminate()) { Result = V; return true; } return Success(V.getInt(), E); } bool ZeroInitialization(const Expr *E) { return Success(0, E); } //===--------------------------------------------------------------------===// // Visitor Methods //===--------------------------------------------------------------------===// bool VisitIntegerLiteral(const IntegerLiteral *E) { return Success(E->getValue(), E); } bool VisitCharacterLiteral(const CharacterLiteral *E) { return Success(E->getValue(), E); } bool CheckReferencedDecl(const Expr *E, const Decl *D); bool VisitDeclRefExpr(const DeclRefExpr *E) { if (CheckReferencedDecl(E, E->getDecl())) return true; return ExprEvaluatorBaseTy::VisitDeclRefExpr(E); } bool VisitMemberExpr(const MemberExpr *E) { if (CheckReferencedDecl(E, E->getMemberDecl())) { VisitIgnoredBaseExpression(E->getBase()); return true; } return ExprEvaluatorBaseTy::VisitMemberExpr(E); } bool VisitCallExpr(const CallExpr *E); bool VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp); bool VisitBinaryOperator(const BinaryOperator *E); bool VisitOffsetOfExpr(const OffsetOfExpr *E); bool VisitUnaryOperator(const UnaryOperator *E); bool VisitCastExpr(const CastExpr* E); bool VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *E); bool VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *E) { return Success(E->getValue(), E); } bool VisitObjCBoolLiteralExpr(const ObjCBoolLiteralExpr *E) { return Success(E->getValue(), E); } bool VisitArrayInitIndexExpr(const ArrayInitIndexExpr *E) { if (Info.ArrayInitIndex == uint64_t(-1)) { // We were asked to evaluate this subexpression independent of the // enclosing ArrayInitLoopExpr. We can't do that. Info.FFDiag(E); return false; } return Success(Info.ArrayInitIndex, E); } // Note, GNU defines __null as an integer, not a pointer. bool VisitGNUNullExpr(const GNUNullExpr *E) { return ZeroInitialization(E); } bool VisitTypeTraitExpr(const TypeTraitExpr *E) { return Success(E->getValue(), E); } bool VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *E) { return Success(E->getValue(), E); } bool VisitExpressionTraitExpr(const ExpressionTraitExpr *E) { return Success(E->getValue(), E); } bool VisitUnaryReal(const UnaryOperator *E); bool VisitUnaryImag(const UnaryOperator *E); bool VisitCXXNoexceptExpr(const CXXNoexceptExpr *E); bool VisitSizeOfPackExpr(const SizeOfPackExpr *E); bool VisitSourceLocExpr(const SourceLocExpr *E); bool VisitConceptSpecializationExpr(const ConceptSpecializationExpr *E); bool VisitRequiresExpr(const RequiresExpr *E); // FIXME: Missing: array subscript of vector, member of vector }; class FixedPointExprEvaluator : public ExprEvaluatorBase { APValue &Result; public: FixedPointExprEvaluator(EvalInfo &info, APValue &result) : ExprEvaluatorBaseTy(info), Result(result) {} bool Success(const llvm::APInt &I, const Expr *E) { return Success( APFixedPoint(I, Info.Ctx.getFixedPointSemantics(E->getType())), E); } bool Success(uint64_t Value, const Expr *E) { return Success( APFixedPoint(Value, Info.Ctx.getFixedPointSemantics(E->getType())), E); } bool Success(const APValue &V, const Expr *E) { return Success(V.getFixedPoint(), E); } bool Success(const APFixedPoint &V, const Expr *E) { assert(E->getType()->isFixedPointType() && "Invalid evaluation result."); assert(V.getWidth() == Info.Ctx.getIntWidth(E->getType()) && "Invalid evaluation result."); Result = APValue(V); return true; } //===--------------------------------------------------------------------===// // Visitor Methods //===--------------------------------------------------------------------===// bool VisitFixedPointLiteral(const FixedPointLiteral *E) { return Success(E->getValue(), E); } bool VisitCastExpr(const CastExpr *E); bool VisitUnaryOperator(const UnaryOperator *E); bool VisitBinaryOperator(const BinaryOperator *E); }; } // end anonymous namespace /// EvaluateIntegerOrLValue - Evaluate an rvalue integral-typed expression, and /// produce either the integer value or a pointer. /// /// GCC has a heinous extension which folds casts between pointer types and /// pointer-sized integral types. We support this by allowing the evaluation of /// an integer rvalue to produce a pointer (represented as an lvalue) instead. /// Some simple arithmetic on such values is supported (they are treated much /// like char*). static bool EvaluateIntegerOrLValue(const Expr *E, APValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isIntegralOrEnumerationType()); return IntExprEvaluator(Info, Result).Visit(E); } static bool EvaluateInteger(const Expr *E, APSInt &Result, EvalInfo &Info) { APValue Val; if (!EvaluateIntegerOrLValue(E, Val, Info)) return false; if (!Val.isInt()) { // FIXME: It would be better to produce the diagnostic for casting // a pointer to an integer. Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } Result = Val.getInt(); return true; } bool IntExprEvaluator::VisitSourceLocExpr(const SourceLocExpr *E) { APValue Evaluated = E->EvaluateInContext( Info.Ctx, Info.CurrentCall->CurSourceLocExprScope.getDefaultExpr()); return Success(Evaluated, E); } static bool EvaluateFixedPoint(const Expr *E, APFixedPoint &Result, EvalInfo &Info) { if (E->getType()->isFixedPointType()) { APValue Val; if (!FixedPointExprEvaluator(Info, Val).Visit(E)) return false; if (!Val.isFixedPoint()) return false; Result = Val.getFixedPoint(); return true; } return false; } static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result, EvalInfo &Info) { if (E->getType()->isIntegerType()) { auto FXSema = Info.Ctx.getFixedPointSemantics(E->getType()); APSInt Val; if (!EvaluateInteger(E, Val, Info)) return false; Result = APFixedPoint(Val, FXSema); return true; } else if (E->getType()->isFixedPointType()) { return EvaluateFixedPoint(E, Result, Info); } return false; } /// Check whether the given declaration can be directly converted to an integral /// rvalue. If not, no diagnostic is produced; there are other things we can /// try. bool IntExprEvaluator::CheckReferencedDecl(const Expr* E, const Decl* D) { // Enums are integer constant exprs. if (const EnumConstantDecl *ECD = dyn_cast(D)) { // Check for signedness/width mismatches between E type and ECD value. bool SameSign = (ECD->getInitVal().isSigned() == E->getType()->isSignedIntegerOrEnumerationType()); bool SameWidth = (ECD->getInitVal().getBitWidth() == Info.Ctx.getIntWidth(E->getType())); if (SameSign && SameWidth) return Success(ECD->getInitVal(), E); else { // Get rid of mismatch (otherwise Success assertions will fail) // by computing a new value matching the type of E. llvm::APSInt Val = ECD->getInitVal(); if (!SameSign) Val.setIsSigned(!ECD->getInitVal().isSigned()); if (!SameWidth) Val = Val.extOrTrunc(Info.Ctx.getIntWidth(E->getType())); return Success(Val, E); } } return false; } /// Values returned by __builtin_classify_type, chosen to match the values /// produced by GCC's builtin. enum class GCCTypeClass { None = -1, Void = 0, Integer = 1, // GCC reserves 2 for character types, but instead classifies them as // integers. Enum = 3, Bool = 4, Pointer = 5, // GCC reserves 6 for references, but appears to never use it (because // expressions never have reference type, presumably). PointerToDataMember = 7, RealFloat = 8, Complex = 9, // GCC reserves 10 for functions, but does not use it since GCC version 6 due // to decay to pointer. (Prior to version 6 it was only used in C++ mode). // GCC claims to reserve 11 for pointers to member functions, but *actually* // uses 12 for that purpose, same as for a class or struct. Maybe it // internally implements a pointer to member as a struct? Who knows. PointerToMemberFunction = 12, // Not a bug, see above. ClassOrStruct = 12, Union = 13, // GCC reserves 14 for arrays, but does not use it since GCC version 6 due to // decay to pointer. (Prior to version 6 it was only used in C++ mode). // GCC reserves 15 for strings, but actually uses 5 (pointer) for string // literals. }; /// EvaluateBuiltinClassifyType - Evaluate __builtin_classify_type the same way /// as GCC. static GCCTypeClass EvaluateBuiltinClassifyType(QualType T, const LangOptions &LangOpts) { assert(!T->isDependentType() && "unexpected dependent type"); QualType CanTy = T.getCanonicalType(); const BuiltinType *BT = dyn_cast(CanTy); switch (CanTy->getTypeClass()) { #define TYPE(ID, BASE) #define DEPENDENT_TYPE(ID, BASE) case Type::ID: #define NON_CANONICAL_TYPE(ID, BASE) case Type::ID: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(ID, BASE) case Type::ID: #include "clang/AST/TypeNodes.inc" case Type::Auto: case Type::DeducedTemplateSpecialization: llvm_unreachable("unexpected non-canonical or dependent type"); case Type::Builtin: switch (BT->getKind()) { #define BUILTIN_TYPE(ID, SINGLETON_ID) #define SIGNED_TYPE(ID, SINGLETON_ID) \ case BuiltinType::ID: return GCCTypeClass::Integer; #define FLOATING_TYPE(ID, SINGLETON_ID) \ case BuiltinType::ID: return GCCTypeClass::RealFloat; #define PLACEHOLDER_TYPE(ID, SINGLETON_ID) \ case BuiltinType::ID: break; #include "clang/AST/BuiltinTypes.def" case BuiltinType::Void: return GCCTypeClass::Void; case BuiltinType::Bool: return GCCTypeClass::Bool; case BuiltinType::Char_U: case BuiltinType::UChar: case BuiltinType::WChar_U: case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::UShort: case BuiltinType::UInt: case BuiltinType::ULong: case BuiltinType::ULongLong: case BuiltinType::UInt128: return GCCTypeClass::Integer; case BuiltinType::UShortAccum: case BuiltinType::UAccum: case BuiltinType::ULongAccum: case BuiltinType::UShortFract: case BuiltinType::UFract: case BuiltinType::ULongFract: case BuiltinType::SatUShortAccum: case BuiltinType::SatUAccum: case BuiltinType::SatULongAccum: case BuiltinType::SatUShortFract: case BuiltinType::SatUFract: case BuiltinType::SatULongFract: return GCCTypeClass::None; case BuiltinType::NullPtr: case BuiltinType::ObjCId: case BuiltinType::ObjCClass: case BuiltinType::ObjCSel: #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ case BuiltinType::Id: #include "clang/Basic/OpenCLExtensionTypes.def" case BuiltinType::OCLSampler: case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: case BuiltinType::OCLReserveID: #define SVE_TYPE(Name, Id, SingletonId) \ case BuiltinType::Id: #include "clang/Basic/AArch64SVEACLETypes.def" return GCCTypeClass::None; case BuiltinType::Dependent: llvm_unreachable("unexpected dependent type"); }; llvm_unreachable("unexpected placeholder type"); case Type::Enum: return LangOpts.CPlusPlus ? GCCTypeClass::Enum : GCCTypeClass::Integer; case Type::Pointer: case Type::ConstantArray: case Type::VariableArray: case Type::IncompleteArray: case Type::FunctionNoProto: case Type::FunctionProto: return GCCTypeClass::Pointer; case Type::MemberPointer: return CanTy->isMemberDataPointerType() ? GCCTypeClass::PointerToDataMember : GCCTypeClass::PointerToMemberFunction; case Type::Complex: return GCCTypeClass::Complex; case Type::Record: return CanTy->isUnionType() ? GCCTypeClass::Union : GCCTypeClass::ClassOrStruct; case Type::Atomic: // GCC classifies _Atomic T the same as T. return EvaluateBuiltinClassifyType( CanTy->castAs()->getValueType(), LangOpts); case Type::BlockPointer: case Type::Vector: case Type::ExtVector: case Type::ConstantMatrix: case Type::ObjCObject: case Type::ObjCInterface: case Type::ObjCObjectPointer: case Type::Pipe: case Type::ExtInt: // GCC classifies vectors as None. We follow its lead and classify all // other types that don't fit into the regular classification the same way. return GCCTypeClass::None; case Type::LValueReference: case Type::RValueReference: llvm_unreachable("invalid type for expression"); } llvm_unreachable("unexpected type class"); } /// EvaluateBuiltinClassifyType - Evaluate __builtin_classify_type the same way /// as GCC. static GCCTypeClass EvaluateBuiltinClassifyType(const CallExpr *E, const LangOptions &LangOpts) { // If no argument was supplied, default to None. This isn't // ideal, however it is what gcc does. if (E->getNumArgs() == 0) return GCCTypeClass::None; // FIXME: Bizarrely, GCC treats a call with more than one argument as not // being an ICE, but still folds it to a constant using the type of the first // argument. return EvaluateBuiltinClassifyType(E->getArg(0)->getType(), LangOpts); } /// EvaluateBuiltinConstantPForLValue - Determine the result of /// __builtin_constant_p when applied to the given pointer. /// /// A pointer is only "constant" if it is null (or a pointer cast to integer) /// or it points to the first character of a string literal. static bool EvaluateBuiltinConstantPForLValue(const APValue &LV) { APValue::LValueBase Base = LV.getLValueBase(); if (Base.isNull()) { // A null base is acceptable. return true; } else if (const Expr *E = Base.dyn_cast()) { if (!isa(E)) return false; return LV.getLValueOffset().isZero(); } else if (Base.is()) { // Surprisingly, GCC considers __builtin_constant_p(&typeid(int)) to // evaluate to true. return true; } else { // Any other base is not constant enough for GCC. return false; } } /// EvaluateBuiltinConstantP - Evaluate __builtin_constant_p as similarly to /// GCC as we can manage. static bool EvaluateBuiltinConstantP(EvalInfo &Info, const Expr *Arg) { // This evaluation is not permitted to have side-effects, so evaluate it in // a speculative evaluation context. SpeculativeEvaluationRAII SpeculativeEval(Info); // Constant-folding is always enabled for the operand of __builtin_constant_p // (even when the enclosing evaluation context otherwise requires a strict // language-specific constant expression). FoldConstant Fold(Info, true); QualType ArgType = Arg->getType(); // __builtin_constant_p always has one operand. The rules which gcc follows // are not precisely documented, but are as follows: // // - If the operand is of integral, floating, complex or enumeration type, // and can be folded to a known value of that type, it returns 1. // - If the operand can be folded to a pointer to the first character // of a string literal (or such a pointer cast to an integral type) // or to a null pointer or an integer cast to a pointer, it returns 1. // // Otherwise, it returns 0. // // FIXME: GCC also intends to return 1 for literals of aggregate types, but // its support for this did not work prior to GCC 9 and is not yet well // understood. if (ArgType->isIntegralOrEnumerationType() || ArgType->isFloatingType() || ArgType->isAnyComplexType() || ArgType->isPointerType() || ArgType->isNullPtrType()) { APValue V; if (!::EvaluateAsRValue(Info, Arg, V) || Info.EvalStatus.HasSideEffects) { Fold.keepDiagnostics(); return false; } // For a pointer (possibly cast to integer), there are special rules. if (V.getKind() == APValue::LValue) return EvaluateBuiltinConstantPForLValue(V); // Otherwise, any constant value is good enough. return V.hasValue(); } // Anything else isn't considered to be sufficiently constant. return false; } /// Retrieves the "underlying object type" of the given expression, /// as used by __builtin_object_size. static QualType getObjectType(APValue::LValueBase B) { if (const ValueDecl *D = B.dyn_cast()) { if (const VarDecl *VD = dyn_cast(D)) return VD->getType(); } else if (const Expr *E = B.dyn_cast()) { if (isa(E)) return E->getType(); } else if (B.is()) { return B.getTypeInfoType(); } else if (B.is()) { return B.getDynamicAllocType(); } return QualType(); } /// A more selective version of E->IgnoreParenCasts for /// tryEvaluateBuiltinObjectSize. This ignores some casts/parens that serve only /// to change the type of E. /// Ex. For E = `(short*)((char*)(&foo))`, returns `&foo` /// /// Always returns an RValue with a pointer representation. static const Expr *ignorePointerCastsAndParens(const Expr *E) { assert(E->isRValue() && E->getType()->hasPointerRepresentation()); auto *NoParens = E->IgnoreParens(); auto *Cast = dyn_cast(NoParens); if (Cast == nullptr) return NoParens; // We only conservatively allow a few kinds of casts, because this code is // inherently a simple solution that seeks to support the common case. auto CastKind = Cast->getCastKind(); if (CastKind != CK_NoOp && CastKind != CK_BitCast && CastKind != CK_AddressSpaceConversion) return NoParens; auto *SubExpr = Cast->getSubExpr(); if (!SubExpr->getType()->hasPointerRepresentation() || !SubExpr->isRValue()) return NoParens; return ignorePointerCastsAndParens(SubExpr); } /// Checks to see if the given LValue's Designator is at the end of the LValue's /// record layout. e.g. /// struct { struct { int a, b; } fst, snd; } obj; /// obj.fst // no /// obj.snd // yes /// obj.fst.a // no /// obj.fst.b // no /// obj.snd.a // no /// obj.snd.b // yes /// /// Please note: this function is specialized for how __builtin_object_size /// views "objects". /// /// If this encounters an invalid RecordDecl or otherwise cannot determine the /// correct result, it will always return true. static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) { assert(!LVal.Designator.Invalid); auto IsLastOrInvalidFieldDecl = [&Ctx](const FieldDecl *FD, bool &Invalid) { const RecordDecl *Parent = FD->getParent(); Invalid = Parent->isInvalidDecl(); if (Invalid || Parent->isUnion()) return true; const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(Parent); return FD->getFieldIndex() + 1 == Layout.getFieldCount(); }; auto &Base = LVal.getLValueBase(); if (auto *ME = dyn_cast_or_null(Base.dyn_cast())) { if (auto *FD = dyn_cast(ME->getMemberDecl())) { bool Invalid; if (!IsLastOrInvalidFieldDecl(FD, Invalid)) return Invalid; } else if (auto *IFD = dyn_cast(ME->getMemberDecl())) { for (auto *FD : IFD->chain()) { bool Invalid; if (!IsLastOrInvalidFieldDecl(cast(FD), Invalid)) return Invalid; } } } unsigned I = 0; QualType BaseType = getType(Base); if (LVal.Designator.FirstEntryIsAnUnsizedArray) { // If we don't know the array bound, conservatively assume we're looking at // the final array element. ++I; if (BaseType->isIncompleteArrayType()) BaseType = Ctx.getAsArrayType(BaseType)->getElementType(); else BaseType = BaseType->castAs()->getPointeeType(); } for (unsigned E = LVal.Designator.Entries.size(); I != E; ++I) { const auto &Entry = LVal.Designator.Entries[I]; if (BaseType->isArrayType()) { // Because __builtin_object_size treats arrays as objects, we can ignore // the index iff this is the last array in the Designator. if (I + 1 == E) return true; const auto *CAT = cast(Ctx.getAsArrayType(BaseType)); uint64_t Index = Entry.getAsArrayIndex(); if (Index + 1 != CAT->getSize()) return false; BaseType = CAT->getElementType(); } else if (BaseType->isAnyComplexType()) { const auto *CT = BaseType->castAs(); uint64_t Index = Entry.getAsArrayIndex(); if (Index != 1) return false; BaseType = CT->getElementType(); } else if (auto *FD = getAsField(Entry)) { bool Invalid; if (!IsLastOrInvalidFieldDecl(FD, Invalid)) return Invalid; BaseType = FD->getType(); } else { assert(getAsBaseClass(Entry) && "Expecting cast to a base class"); return false; } } return true; } /// Tests to see if the LValue has a user-specified designator (that isn't /// necessarily valid). Note that this always returns 'true' if the LValue has /// an unsized array as its first designator entry, because there's currently no /// way to tell if the user typed *foo or foo[0]. static bool refersToCompleteObject(const LValue &LVal) { if (LVal.Designator.Invalid) return false; if (!LVal.Designator.Entries.empty()) return LVal.Designator.isMostDerivedAnUnsizedArray(); if (!LVal.InvalidBase) return true; // If `E` is a MemberExpr, then the first part of the designator is hiding in // the LValueBase. const auto *E = LVal.Base.dyn_cast(); return !E || !isa(E); } /// Attempts to detect a user writing into a piece of memory that's impossible /// to figure out the size of by just using types. static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const LValue &LVal) { const SubobjectDesignator &Designator = LVal.Designator; // Notes: // - Users can only write off of the end when we have an invalid base. Invalid // bases imply we don't know where the memory came from. // - We used to be a bit more aggressive here; we'd only be conservative if // the array at the end was flexible, or if it had 0 or 1 elements. This // broke some common standard library extensions (PR30346), but was // otherwise seemingly fine. It may be useful to reintroduce this behavior // with some sort of list. OTOH, it seems that GCC is always // conservative with the last element in structs (if it's an array), so our // current behavior is more compatible than an explicit list approach would // be. return LVal.InvalidBase && Designator.Entries.size() == Designator.MostDerivedPathLength && Designator.MostDerivedIsArrayElement && isDesignatorAtObjectEnd(Ctx, LVal); } /// Converts the given APInt to CharUnits, assuming the APInt is unsigned. /// Fails if the conversion would cause loss of precision. static bool convertUnsignedAPIntToCharUnits(const llvm::APInt &Int, CharUnits &Result) { auto CharUnitsMax = std::numeric_limits::max(); if (Int.ugt(CharUnitsMax)) return false; Result = CharUnits::fromQuantity(Int.getZExtValue()); return true; } /// Helper for tryEvaluateBuiltinObjectSize -- Given an LValue, this will /// determine how many bytes exist from the beginning of the object to either /// the end of the current subobject, or the end of the object itself, depending /// on what the LValue looks like + the value of Type. /// /// If this returns false, the value of Result is undefined. static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc, unsigned Type, const LValue &LVal, CharUnits &EndOffset) { bool DetermineForCompleteObject = refersToCompleteObject(LVal); auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) { if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType()) return false; return HandleSizeof(Info, ExprLoc, Ty, Result); }; // We want to evaluate the size of the entire object. This is a valid fallback // for when Type=1 and the designator is invalid, because we're asked for an // upper-bound. if (!(Type & 1) || LVal.Designator.Invalid || DetermineForCompleteObject) { // Type=3 wants a lower bound, so we can't fall back to this. if (Type == 3 && !DetermineForCompleteObject) return false; llvm::APInt APEndOffset; if (isBaseAnAllocSizeCall(LVal.getLValueBase()) && getBytesReturnedByAllocSizeCall(Info.Ctx, LVal, APEndOffset)) return convertUnsignedAPIntToCharUnits(APEndOffset, EndOffset); if (LVal.InvalidBase) return false; QualType BaseTy = getObjectType(LVal.getLValueBase()); return CheckedHandleSizeof(BaseTy, EndOffset); } // We want to evaluate the size of a subobject. const SubobjectDesignator &Designator = LVal.Designator; // The following is a moderately common idiom in C: // // struct Foo { int a; char c[1]; }; // struct Foo *F = (struct Foo *)malloc(sizeof(struct Foo) + strlen(Bar)); // strcpy(&F->c[0], Bar); // // In order to not break too much legacy code, we need to support it. if (isUserWritingOffTheEnd(Info.Ctx, LVal)) { // If we can resolve this to an alloc_size call, we can hand that back, // because we know for certain how many bytes there are to write to. llvm::APInt APEndOffset; if (isBaseAnAllocSizeCall(LVal.getLValueBase()) && getBytesReturnedByAllocSizeCall(Info.Ctx, LVal, APEndOffset)) return convertUnsignedAPIntToCharUnits(APEndOffset, EndOffset); // If we cannot determine the size of the initial allocation, then we can't // given an accurate upper-bound. However, we are still able to give // conservative lower-bounds for Type=3. if (Type == 1) return false; } CharUnits BytesPerElem; if (!CheckedHandleSizeof(Designator.MostDerivedType, BytesPerElem)) return false; // According to the GCC documentation, we want the size of the subobject // denoted by the pointer. But that's not quite right -- what we actually // want is the size of the immediately-enclosing array, if there is one. int64_t ElemsRemaining; if (Designator.MostDerivedIsArrayElement && Designator.Entries.size() == Designator.MostDerivedPathLength) { uint64_t ArraySize = Designator.getMostDerivedArraySize(); uint64_t ArrayIndex = Designator.Entries.back().getAsArrayIndex(); ElemsRemaining = ArraySize <= ArrayIndex ? 0 : ArraySize - ArrayIndex; } else { ElemsRemaining = Designator.isOnePastTheEnd() ? 0 : 1; } EndOffset = LVal.getLValueOffset() + BytesPerElem * ElemsRemaining; return true; } /// Tries to evaluate the __builtin_object_size for @p E. If successful, /// returns true and stores the result in @p Size. /// /// If @p WasError is non-null, this will report whether the failure to evaluate /// is to be treated as an Error in IntExprEvaluator. static bool tryEvaluateBuiltinObjectSize(const Expr *E, unsigned Type, EvalInfo &Info, uint64_t &Size) { // Determine the denoted object. LValue LVal; { // The operand of __builtin_object_size is never evaluated for side-effects. // If there are any, but we can determine the pointed-to object anyway, then // ignore the side-effects. SpeculativeEvaluationRAII SpeculativeEval(Info); IgnoreSideEffectsRAII Fold(Info); if (E->isGLValue()) { // It's possible for us to be given GLValues if we're called via // Expr::tryEvaluateObjectSize. APValue RVal; if (!EvaluateAsRValue(Info, E, RVal)) return false; LVal.setFrom(Info.Ctx, RVal); } else if (!EvaluatePointer(ignorePointerCastsAndParens(E), LVal, Info, /*InvalidBaseOK=*/true)) return false; } // If we point to before the start of the object, there are no accessible // bytes. if (LVal.getLValueOffset().isNegative()) { Size = 0; return true; } CharUnits EndOffset; if (!determineEndOffset(Info, E->getExprLoc(), Type, LVal, EndOffset)) return false; // If we've fallen outside of the end offset, just pretend there's nothing to // write to/read from. if (EndOffset <= LVal.getLValueOffset()) Size = 0; else Size = (EndOffset - LVal.getLValueOffset()).getQuantity(); return true; } bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) { if (unsigned BuiltinOp = E->getBuiltinCallee()) return VisitBuiltinCallExpr(E, BuiltinOp); return ExprEvaluatorBaseTy::VisitCallExpr(E); } static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info, APValue &Val, APSInt &Alignment) { QualType SrcTy = E->getArg(0)->getType(); if (!getAlignmentArgument(E->getArg(1), SrcTy, Info, Alignment)) return false; // Even though we are evaluating integer expressions we could get a pointer // argument for the __builtin_is_aligned() case. if (SrcTy->isPointerType()) { LValue Ptr; if (!EvaluatePointer(E->getArg(0), Ptr, Info)) return false; Ptr.moveInto(Val); } else if (!SrcTy->isIntegralOrEnumerationType()) { Info.FFDiag(E->getArg(0)); return false; } else { APSInt SrcInt; if (!EvaluateInteger(E->getArg(0), SrcInt, Info)) return false; assert(SrcInt.getBitWidth() >= Alignment.getBitWidth() && "Bit widths must be the same"); Val = APValue(SrcInt); } assert(Val.hasValue()); return true; } bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { switch (BuiltinOp) { default: return ExprEvaluatorBaseTy::VisitCallExpr(E); case Builtin::BI__builtin_dynamic_object_size: case Builtin::BI__builtin_object_size: { // The type was checked when we built the expression. unsigned Type = E->getArg(1)->EvaluateKnownConstInt(Info.Ctx).getZExtValue(); assert(Type <= 3 && "unexpected type"); uint64_t Size; if (tryEvaluateBuiltinObjectSize(E->getArg(0), Type, Info, Size)) return Success(Size, E); if (E->getArg(0)->HasSideEffects(Info.Ctx)) return Success((Type & 2) ? 0 : -1, E); // Expression had no side effects, but we couldn't statically determine the // size of the referenced object. switch (Info.EvalMode) { case EvalInfo::EM_ConstantExpression: case EvalInfo::EM_ConstantFold: case EvalInfo::EM_IgnoreSideEffects: // Leave it to IR generation. return Error(E); case EvalInfo::EM_ConstantExpressionUnevaluated: // Reduce it to a constant now. return Success((Type & 2) ? 0 : -1, E); } llvm_unreachable("unexpected EvalMode"); } case Builtin::BI__builtin_os_log_format_buffer_size: { analyze_os_log::OSLogBufferLayout Layout; analyze_os_log::computeOSLogBufferLayout(Info.Ctx, E, Layout); return Success(Layout.size().getQuantity(), E); } case Builtin::BI__builtin_is_aligned: { APValue Src; APSInt Alignment; if (!getBuiltinAlignArguments(E, Info, Src, Alignment)) return false; if (Src.isLValue()) { // If we evaluated a pointer, check the minimum known alignment. LValue Ptr; Ptr.setFrom(Info.Ctx, Src); CharUnits BaseAlignment = getBaseAlignment(Info, Ptr); CharUnits PtrAlign = BaseAlignment.alignmentAtOffset(Ptr.Offset); // We can return true if the known alignment at the computed offset is // greater than the requested alignment. assert(PtrAlign.isPowerOfTwo()); assert(Alignment.isPowerOf2()); if (PtrAlign.getQuantity() >= Alignment) return Success(1, E); // If the alignment is not known to be sufficient, some cases could still // be aligned at run time. However, if the requested alignment is less or // equal to the base alignment and the offset is not aligned, we know that // the run-time value can never be aligned. if (BaseAlignment.getQuantity() >= Alignment && PtrAlign.getQuantity() < Alignment) return Success(0, E); // Otherwise we can't infer whether the value is sufficiently aligned. // TODO: __builtin_is_aligned(__builtin_align_{down,up{(expr, N), N) // in cases where we can't fully evaluate the pointer. Info.FFDiag(E->getArg(0), diag::note_constexpr_alignment_compute) << Alignment; return false; } assert(Src.isInt()); return Success((Src.getInt() & (Alignment - 1)) == 0 ? 1 : 0, E); } case Builtin::BI__builtin_align_up: { APValue Src; APSInt Alignment; if (!getBuiltinAlignArguments(E, Info, Src, Alignment)) return false; if (!Src.isInt()) return Error(E); APSInt AlignedVal = APSInt((Src.getInt() + (Alignment - 1)) & ~(Alignment - 1), Src.getInt().isUnsigned()); assert(AlignedVal.getBitWidth() == Src.getInt().getBitWidth()); return Success(AlignedVal, E); } case Builtin::BI__builtin_align_down: { APValue Src; APSInt Alignment; if (!getBuiltinAlignArguments(E, Info, Src, Alignment)) return false; if (!Src.isInt()) return Error(E); APSInt AlignedVal = APSInt(Src.getInt() & ~(Alignment - 1), Src.getInt().isUnsigned()); assert(AlignedVal.getBitWidth() == Src.getInt().getBitWidth()); return Success(AlignedVal, E); } case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; return Success(Val.byteSwap(), E); } case Builtin::BI__builtin_classify_type: return Success((int)EvaluateBuiltinClassifyType(E, Info.getLangOpts()), E); case Builtin::BI__builtin_clrsb: case Builtin::BI__builtin_clrsbl: case Builtin::BI__builtin_clrsbll: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; return Success(Val.getBitWidth() - Val.getMinSignedBits(), E); } case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: case Builtin::BI__builtin_clzll: case Builtin::BI__builtin_clzs: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; if (!Val) return Error(E); return Success(Val.countLeadingZeros(), E); } case Builtin::BI__builtin_constant_p: { const Expr *Arg = E->getArg(0); if (EvaluateBuiltinConstantP(Info, Arg)) return Success(true, E); if (Info.InConstantContext || Arg->HasSideEffects(Info.Ctx)) { // Outside a constant context, eagerly evaluate to false in the presence // of side-effects in order to avoid -Wunsequenced false-positives in // a branch on __builtin_constant_p(expr). return Success(false, E); } Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } case Builtin::BI__builtin_is_constant_evaluated: { const auto *Callee = Info.CurrentCall->getCallee(); if (Info.InConstantContext && !Info.CheckingPotentialConstantExpression && (Info.CallStackDepth == 1 || (Info.CallStackDepth == 2 && Callee->isInStdNamespace() && Callee->getIdentifier() && Callee->getIdentifier()->isStr("is_constant_evaluated")))) { // FIXME: Find a better way to avoid duplicated diagnostics. if (Info.EvalStatus.Diag) Info.report((Info.CallStackDepth == 1) ? E->getExprLoc() : Info.CurrentCall->CallLoc, diag::warn_is_constant_evaluated_always_true_constexpr) << (Info.CallStackDepth == 1 ? "__builtin_is_constant_evaluated" : "std::is_constant_evaluated"); } return Success(Info.InConstantContext, E); } case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: case Builtin::BI__builtin_ctzll: case Builtin::BI__builtin_ctzs: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; if (!Val) return Error(E); return Success(Val.countTrailingZeros(), E); } case Builtin::BI__builtin_eh_return_data_regno: { int Operand = E->getArg(0)->EvaluateKnownConstInt(Info.Ctx).getZExtValue(); Operand = Info.Ctx.getTargetInfo().getEHDataRegisterNumber(Operand); return Success(Operand, E); } case Builtin::BI__builtin_expect: case Builtin::BI__builtin_expect_with_probability: return Visit(E->getArg(0)); case Builtin::BI__builtin_ffs: case Builtin::BI__builtin_ffsl: case Builtin::BI__builtin_ffsll: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; unsigned N = Val.countTrailingZeros(); return Success(N == Val.getBitWidth() ? 0 : N + 1, E); } case Builtin::BI__builtin_fpclassify: { APFloat Val(0.0); if (!EvaluateFloat(E->getArg(5), Val, Info)) return false; unsigned Arg; switch (Val.getCategory()) { case APFloat::fcNaN: Arg = 0; break; case APFloat::fcInfinity: Arg = 1; break; case APFloat::fcNormal: Arg = Val.isDenormal() ? 3 : 2; break; case APFloat::fcZero: Arg = 4; break; } return Visit(E->getArg(Arg)); } case Builtin::BI__builtin_isinf_sign: { APFloat Val(0.0); return EvaluateFloat(E->getArg(0), Val, Info) && Success(Val.isInfinity() ? (Val.isNegative() ? -1 : 1) : 0, E); } case Builtin::BI__builtin_isinf: { APFloat Val(0.0); return EvaluateFloat(E->getArg(0), Val, Info) && Success(Val.isInfinity() ? 1 : 0, E); } case Builtin::BI__builtin_isfinite: { APFloat Val(0.0); return EvaluateFloat(E->getArg(0), Val, Info) && Success(Val.isFinite() ? 1 : 0, E); } case Builtin::BI__builtin_isnan: { APFloat Val(0.0); return EvaluateFloat(E->getArg(0), Val, Info) && Success(Val.isNaN() ? 1 : 0, E); } case Builtin::BI__builtin_isnormal: { APFloat Val(0.0); return EvaluateFloat(E->getArg(0), Val, Info) && Success(Val.isNormal() ? 1 : 0, E); } case Builtin::BI__builtin_parity: case Builtin::BI__builtin_parityl: case Builtin::BI__builtin_parityll: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; return Success(Val.countPopulation() % 2, E); } case Builtin::BI__builtin_popcount: case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; return Success(Val.countPopulation(), E); } case Builtin::BIstrlen: case Builtin::BIwcslen: // A call to strlen is not a constant expression. if (Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_invalid_function) << /*isConstexpr*/0 << /*isConstructor*/0 << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'"); else Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); LLVM_FALLTHROUGH; case Builtin::BI__builtin_strlen: case Builtin::BI__builtin_wcslen: { // As an extension, we support __builtin_strlen() as a constant expression, // and support folding strlen() to a constant. LValue String; if (!EvaluatePointer(E->getArg(0), String, Info)) return false; QualType CharTy = E->getArg(0)->getType()->getPointeeType(); // Fast path: if it's a string literal, search the string value. if (const StringLiteral *S = dyn_cast_or_null( String.getLValueBase().dyn_cast())) { // The string literal may have embedded null characters. Find the first // one and truncate there. StringRef Str = S->getBytes(); int64_t Off = String.Offset.getQuantity(); if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() && S->getCharByteWidth() == 1 && // FIXME: Add fast-path for wchar_t too. Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) { Str = Str.substr(Off); StringRef::size_type Pos = Str.find(0); if (Pos != StringRef::npos) Str = Str.substr(0, Pos); return Success(Str.size(), E); } // Fall through to slow path to issue appropriate diagnostic. } // Slow path: scan the bytes of the string looking for the terminating 0. for (uint64_t Strlen = 0; /**/; ++Strlen) { APValue Char; if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) || !Char.isInt()) return false; if (!Char.getInt()) return Success(Strlen, E); if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1)) return false; } } case Builtin::BIstrcmp: case Builtin::BIwcscmp: case Builtin::BIstrncmp: case Builtin::BIwcsncmp: case Builtin::BImemcmp: case Builtin::BIbcmp: case Builtin::BIwmemcmp: // A call to strlen is not a constant expression. if (Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_invalid_function) << /*isConstexpr*/0 << /*isConstructor*/0 << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'"); else Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); LLVM_FALLTHROUGH; case Builtin::BI__builtin_strcmp: case Builtin::BI__builtin_wcscmp: case Builtin::BI__builtin_strncmp: case Builtin::BI__builtin_wcsncmp: case Builtin::BI__builtin_memcmp: case Builtin::BI__builtin_bcmp: case Builtin::BI__builtin_wmemcmp: { LValue String1, String2; if (!EvaluatePointer(E->getArg(0), String1, Info) || !EvaluatePointer(E->getArg(1), String2, Info)) return false; uint64_t MaxLength = uint64_t(-1); if (BuiltinOp != Builtin::BIstrcmp && BuiltinOp != Builtin::BIwcscmp && BuiltinOp != Builtin::BI__builtin_strcmp && BuiltinOp != Builtin::BI__builtin_wcscmp) { APSInt N; if (!EvaluateInteger(E->getArg(2), N, Info)) return false; MaxLength = N.getExtValue(); } // Empty substrings compare equal by definition. if (MaxLength == 0u) return Success(0, E); if (!String1.checkNullPointerForFoldAccess(Info, E, AK_Read) || !String2.checkNullPointerForFoldAccess(Info, E, AK_Read) || String1.Designator.Invalid || String2.Designator.Invalid) return false; QualType CharTy1 = String1.Designator.getType(Info.Ctx); QualType CharTy2 = String2.Designator.getType(Info.Ctx); bool IsRawByte = BuiltinOp == Builtin::BImemcmp || BuiltinOp == Builtin::BIbcmp || BuiltinOp == Builtin::BI__builtin_memcmp || BuiltinOp == Builtin::BI__builtin_bcmp; assert(IsRawByte || (Info.Ctx.hasSameUnqualifiedType( CharTy1, E->getArg(0)->getType()->getPointeeType()) && Info.Ctx.hasSameUnqualifiedType(CharTy1, CharTy2))); // For memcmp, allow comparing any arrays of '[[un]signed] char' or // 'char8_t', but no other types. if (IsRawByte && !(isOneByteCharacterType(CharTy1) && isOneByteCharacterType(CharTy2))) { // FIXME: Consider using our bit_cast implementation to support this. Info.FFDiag(E, diag::note_constexpr_memcmp_unsupported) << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'") << CharTy1 << CharTy2; return false; } const auto &ReadCurElems = [&](APValue &Char1, APValue &Char2) { return handleLValueToRValueConversion(Info, E, CharTy1, String1, Char1) && handleLValueToRValueConversion(Info, E, CharTy2, String2, Char2) && Char1.isInt() && Char2.isInt(); }; const auto &AdvanceElems = [&] { return HandleLValueArrayAdjustment(Info, E, String1, CharTy1, 1) && HandleLValueArrayAdjustment(Info, E, String2, CharTy2, 1); }; bool StopAtNull = (BuiltinOp != Builtin::BImemcmp && BuiltinOp != Builtin::BIbcmp && BuiltinOp != Builtin::BIwmemcmp && BuiltinOp != Builtin::BI__builtin_memcmp && BuiltinOp != Builtin::BI__builtin_bcmp && BuiltinOp != Builtin::BI__builtin_wmemcmp); bool IsWide = BuiltinOp == Builtin::BIwcscmp || BuiltinOp == Builtin::BIwcsncmp || BuiltinOp == Builtin::BIwmemcmp || BuiltinOp == Builtin::BI__builtin_wcscmp || BuiltinOp == Builtin::BI__builtin_wcsncmp || BuiltinOp == Builtin::BI__builtin_wmemcmp; for (; MaxLength; --MaxLength) { APValue Char1, Char2; if (!ReadCurElems(Char1, Char2)) return false; if (Char1.getInt().ne(Char2.getInt())) { if (IsWide) // wmemcmp compares with wchar_t signedness. return Success(Char1.getInt() < Char2.getInt() ? -1 : 1, E); // memcmp always compares unsigned chars. return Success(Char1.getInt().ult(Char2.getInt()) ? -1 : 1, E); } if (StopAtNull && !Char1.getInt()) return Success(0, E); assert(!(StopAtNull && !Char2.getInt())); if (!AdvanceElems()) return false; } // We hit the strncmp / memcmp limit. return Success(0, E); } case Builtin::BI__atomic_always_lock_free: case Builtin::BI__atomic_is_lock_free: case Builtin::BI__c11_atomic_is_lock_free: { APSInt SizeVal; if (!EvaluateInteger(E->getArg(0), SizeVal, Info)) return false; // For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power // of two less than the maximum inline atomic width, we know it is // lock-free. If the size isn't a power of two, or greater than the // maximum alignment where we promote atomics, we know it is not lock-free // (at least not in the sense of atomic_is_lock_free). Otherwise, // the answer can only be determined at runtime; for example, 16-byte // atomics have lock-free implementations on some, but not all, // x86-64 processors. // Check power-of-two. CharUnits Size = CharUnits::fromQuantity(SizeVal.getZExtValue()); if (Size.isPowerOfTwo()) { // Check against inlining width. unsigned InlineWidthBits = Info.Ctx.getTargetInfo().getMaxAtomicInlineWidth(); if (Size <= Info.Ctx.toCharUnitsFromBits(InlineWidthBits)) { if (BuiltinOp == Builtin::BI__c11_atomic_is_lock_free || Size == CharUnits::One() || E->getArg(1)->isNullPointerConstant(Info.Ctx, Expr::NPC_NeverValueDependent)) // OK, we will inline appropriately-aligned operations of this size, // and _Atomic(T) is appropriately-aligned. return Success(1, E); QualType PointeeType = E->getArg(1)->IgnoreImpCasts()->getType()-> castAs()->getPointeeType(); if (!PointeeType->isIncompleteType() && Info.Ctx.getTypeAlignInChars(PointeeType) >= Size) { // OK, we will inline operations on this object. return Success(1, E); } } } // Avoid emiting call for runtime decision on PowerPC 32-bit // The lock free possibilities on this platform are covered by the lines // above and we know in advance other cases require lock if (Info.Ctx.getTargetInfo().getTriple().getArch() == llvm::Triple::ppc) { return Success(0, E); } return BuiltinOp == Builtin::BI__atomic_always_lock_free ? Success(0, E) : Error(E); } case Builtin::BIomp_is_initial_device: // We can decide statically which value the runtime would return if called. return Success(Info.getLangOpts().OpenMPIsDevice ? 0 : 1, E); case Builtin::BI__builtin_add_overflow: case Builtin::BI__builtin_sub_overflow: case Builtin::BI__builtin_mul_overflow: case Builtin::BI__builtin_sadd_overflow: case Builtin::BI__builtin_uadd_overflow: case Builtin::BI__builtin_uaddl_overflow: case Builtin::BI__builtin_uaddll_overflow: case Builtin::BI__builtin_usub_overflow: case Builtin::BI__builtin_usubl_overflow: case Builtin::BI__builtin_usubll_overflow: case Builtin::BI__builtin_umul_overflow: case Builtin::BI__builtin_umull_overflow: case Builtin::BI__builtin_umulll_overflow: case Builtin::BI__builtin_saddl_overflow: case Builtin::BI__builtin_saddll_overflow: case Builtin::BI__builtin_ssub_overflow: case Builtin::BI__builtin_ssubl_overflow: case Builtin::BI__builtin_ssubll_overflow: case Builtin::BI__builtin_smul_overflow: case Builtin::BI__builtin_smull_overflow: case Builtin::BI__builtin_smulll_overflow: { LValue ResultLValue; APSInt LHS, RHS; QualType ResultType = E->getArg(2)->getType()->getPointeeType(); if (!EvaluateInteger(E->getArg(0), LHS, Info) || !EvaluateInteger(E->getArg(1), RHS, Info) || !EvaluatePointer(E->getArg(2), ResultLValue, Info)) return false; APSInt Result; bool DidOverflow = false; // If the types don't have to match, enlarge all 3 to the largest of them. if (BuiltinOp == Builtin::BI__builtin_add_overflow || BuiltinOp == Builtin::BI__builtin_sub_overflow || BuiltinOp == Builtin::BI__builtin_mul_overflow) { bool IsSigned = LHS.isSigned() || RHS.isSigned() || ResultType->isSignedIntegerOrEnumerationType(); bool AllSigned = LHS.isSigned() && RHS.isSigned() && ResultType->isSignedIntegerOrEnumerationType(); uint64_t LHSSize = LHS.getBitWidth(); uint64_t RHSSize = RHS.getBitWidth(); uint64_t ResultSize = Info.Ctx.getTypeSize(ResultType); uint64_t MaxBits = std::max(std::max(LHSSize, RHSSize), ResultSize); // Add an additional bit if the signedness isn't uniformly agreed to. We // could do this ONLY if there is a signed and an unsigned that both have // MaxBits, but the code to check that is pretty nasty. The issue will be // caught in the shrink-to-result later anyway. if (IsSigned && !AllSigned) ++MaxBits; LHS = APSInt(LHS.extOrTrunc(MaxBits), !IsSigned); RHS = APSInt(RHS.extOrTrunc(MaxBits), !IsSigned); Result = APSInt(MaxBits, !IsSigned); } // Find largest int. switch (BuiltinOp) { default: llvm_unreachable("Invalid value for BuiltinOp"); case Builtin::BI__builtin_add_overflow: case Builtin::BI__builtin_sadd_overflow: case Builtin::BI__builtin_saddl_overflow: case Builtin::BI__builtin_saddll_overflow: case Builtin::BI__builtin_uadd_overflow: case Builtin::BI__builtin_uaddl_overflow: case Builtin::BI__builtin_uaddll_overflow: Result = LHS.isSigned() ? LHS.sadd_ov(RHS, DidOverflow) : LHS.uadd_ov(RHS, DidOverflow); break; case Builtin::BI__builtin_sub_overflow: case Builtin::BI__builtin_ssub_overflow: case Builtin::BI__builtin_ssubl_overflow: case Builtin::BI__builtin_ssubll_overflow: case Builtin::BI__builtin_usub_overflow: case Builtin::BI__builtin_usubl_overflow: case Builtin::BI__builtin_usubll_overflow: Result = LHS.isSigned() ? LHS.ssub_ov(RHS, DidOverflow) : LHS.usub_ov(RHS, DidOverflow); break; case Builtin::BI__builtin_mul_overflow: case Builtin::BI__builtin_smul_overflow: case Builtin::BI__builtin_smull_overflow: case Builtin::BI__builtin_smulll_overflow: case Builtin::BI__builtin_umul_overflow: case Builtin::BI__builtin_umull_overflow: case Builtin::BI__builtin_umulll_overflow: Result = LHS.isSigned() ? LHS.smul_ov(RHS, DidOverflow) : LHS.umul_ov(RHS, DidOverflow); break; } // In the case where multiple sizes are allowed, truncate and see if // the values are the same. if (BuiltinOp == Builtin::BI__builtin_add_overflow || BuiltinOp == Builtin::BI__builtin_sub_overflow || BuiltinOp == Builtin::BI__builtin_mul_overflow) { // APSInt doesn't have a TruncOrSelf, so we use extOrTrunc instead, // since it will give us the behavior of a TruncOrSelf in the case where // its parameter <= its size. We previously set Result to be at least the // type-size of the result, so getTypeSize(ResultType) <= Result.BitWidth // will work exactly like TruncOrSelf. APSInt Temp = Result.extOrTrunc(Info.Ctx.getTypeSize(ResultType)); Temp.setIsSigned(ResultType->isSignedIntegerOrEnumerationType()); if (!APSInt::isSameValue(Temp, Result)) DidOverflow = true; Result = Temp; } APValue APV{Result}; if (!handleAssignment(Info, E, ResultLValue, ResultType, APV)) return false; return Success(DidOverflow, E); } } } /// Determine whether this is a pointer past the end of the complete /// object referred to by the lvalue. static bool isOnePastTheEndOfCompleteObject(const ASTContext &Ctx, const LValue &LV) { // A null pointer can be viewed as being "past the end" but we don't // choose to look at it that way here. if (!LV.getLValueBase()) return false; // If the designator is valid and refers to a subobject, we're not pointing // past the end. if (!LV.getLValueDesignator().Invalid && !LV.getLValueDesignator().isOnePastTheEnd()) return false; // A pointer to an incomplete type might be past-the-end if the type's size is // zero. We cannot tell because the type is incomplete. QualType Ty = getType(LV.getLValueBase()); if (Ty->isIncompleteType()) return true; // We're a past-the-end pointer if we point to the byte after the object, // no matter what our type or path is. auto Size = Ctx.getTypeSizeInChars(Ty); return LV.getLValueOffset() == Size; } namespace { /// Data recursive integer evaluator of certain binary operators. /// /// We use a data recursive algorithm for binary operators so that we are able /// to handle extreme cases of chained binary operators without causing stack /// overflow. class DataRecursiveIntBinOpEvaluator { struct EvalResult { APValue Val; bool Failed; EvalResult() : Failed(false) { } void swap(EvalResult &RHS) { Val.swap(RHS.Val); Failed = RHS.Failed; RHS.Failed = false; } }; struct Job { const Expr *E; EvalResult LHSResult; // meaningful only for binary operator expression. enum { AnyExprKind, BinOpKind, BinOpVisitedLHSKind } Kind; Job() = default; Job(Job &&) = default; void startSpeculativeEval(EvalInfo &Info) { SpecEvalRAII = SpeculativeEvaluationRAII(Info); } private: SpeculativeEvaluationRAII SpecEvalRAII; }; SmallVector Queue; IntExprEvaluator &IntEval; EvalInfo &Info; APValue &FinalResult; public: DataRecursiveIntBinOpEvaluator(IntExprEvaluator &IntEval, APValue &Result) : IntEval(IntEval), Info(IntEval.getEvalInfo()), FinalResult(Result) { } /// True if \param E is a binary operator that we are going to handle /// data recursively. /// We handle binary operators that are comma, logical, or that have operands /// with integral or enumeration type. static bool shouldEnqueue(const BinaryOperator *E) { return E->getOpcode() == BO_Comma || E->isLogicalOp() || (E->isRValue() && E->getType()->isIntegralOrEnumerationType() && E->getLHS()->getType()->isIntegralOrEnumerationType() && E->getRHS()->getType()->isIntegralOrEnumerationType()); } bool Traverse(const BinaryOperator *E) { enqueue(E); EvalResult PrevResult; while (!Queue.empty()) process(PrevResult); if (PrevResult.Failed) return false; FinalResult.swap(PrevResult.Val); return true; } private: bool Success(uint64_t Value, const Expr *E, APValue &Result) { return IntEval.Success(Value, E, Result); } bool Success(const APSInt &Value, const Expr *E, APValue &Result) { return IntEval.Success(Value, E, Result); } bool Error(const Expr *E) { return IntEval.Error(E); } bool Error(const Expr *E, diag::kind D) { return IntEval.Error(E, D); } OptionalDiagnostic CCEDiag(const Expr *E, diag::kind D) { return Info.CCEDiag(E, D); } // Returns true if visiting the RHS is necessary, false otherwise. bool VisitBinOpLHSOnly(EvalResult &LHSResult, const BinaryOperator *E, bool &SuppressRHSDiags); bool VisitBinOp(const EvalResult &LHSResult, const EvalResult &RHSResult, const BinaryOperator *E, APValue &Result); void EvaluateExpr(const Expr *E, EvalResult &Result) { Result.Failed = !Evaluate(Result.Val, Info, E); if (Result.Failed) Result.Val = APValue(); } void process(EvalResult &Result); void enqueue(const Expr *E) { E = E->IgnoreParens(); Queue.resize(Queue.size()+1); Queue.back().E = E; Queue.back().Kind = Job::AnyExprKind; } }; } bool DataRecursiveIntBinOpEvaluator:: VisitBinOpLHSOnly(EvalResult &LHSResult, const BinaryOperator *E, bool &SuppressRHSDiags) { if (E->getOpcode() == BO_Comma) { // Ignore LHS but note if we could not evaluate it. if (LHSResult.Failed) return Info.noteSideEffect(); return true; } if (E->isLogicalOp()) { bool LHSAsBool; if (!LHSResult.Failed && HandleConversionToBool(LHSResult.Val, LHSAsBool)) { // We were able to evaluate the LHS, see if we can get away with not // evaluating the RHS: 0 && X -> 0, 1 || X -> 1 if (LHSAsBool == (E->getOpcode() == BO_LOr)) { Success(LHSAsBool, E, LHSResult.Val); return false; // Ignore RHS } } else { LHSResult.Failed = true; // Since we weren't able to evaluate the left hand side, it // might have had side effects. if (!Info.noteSideEffect()) return false; // We can't evaluate the LHS; however, sometimes the result // is determined by the RHS: X && 0 -> 0, X || 1 -> 1. // Don't ignore RHS and suppress diagnostics from this arm. SuppressRHSDiags = true; } return true; } assert(E->getLHS()->getType()->isIntegralOrEnumerationType() && E->getRHS()->getType()->isIntegralOrEnumerationType()); if (LHSResult.Failed && !Info.noteFailure()) return false; // Ignore RHS; return true; } static void addOrSubLValueAsInteger(APValue &LVal, const APSInt &Index, bool IsSub) { // Compute the new offset in the appropriate width, wrapping at 64 bits. // FIXME: When compiling for a 32-bit target, we should use 32-bit // offsets. assert(!LVal.hasLValuePath() && "have designator for integer lvalue"); CharUnits &Offset = LVal.getLValueOffset(); uint64_t Offset64 = Offset.getQuantity(); uint64_t Index64 = Index.extOrTrunc(64).getZExtValue(); Offset = CharUnits::fromQuantity(IsSub ? Offset64 - Index64 : Offset64 + Index64); } bool DataRecursiveIntBinOpEvaluator:: VisitBinOp(const EvalResult &LHSResult, const EvalResult &RHSResult, const BinaryOperator *E, APValue &Result) { if (E->getOpcode() == BO_Comma) { if (RHSResult.Failed) return false; Result = RHSResult.Val; return true; } if (E->isLogicalOp()) { bool lhsResult, rhsResult; bool LHSIsOK = HandleConversionToBool(LHSResult.Val, lhsResult); bool RHSIsOK = HandleConversionToBool(RHSResult.Val, rhsResult); if (LHSIsOK) { if (RHSIsOK) { if (E->getOpcode() == BO_LOr) return Success(lhsResult || rhsResult, E, Result); else return Success(lhsResult && rhsResult, E, Result); } } else { if (RHSIsOK) { // We can't evaluate the LHS; however, sometimes the result // is determined by the RHS: X && 0 -> 0, X || 1 -> 1. if (rhsResult == (E->getOpcode() == BO_LOr)) return Success(rhsResult, E, Result); } } return false; } assert(E->getLHS()->getType()->isIntegralOrEnumerationType() && E->getRHS()->getType()->isIntegralOrEnumerationType()); if (LHSResult.Failed || RHSResult.Failed) return false; const APValue &LHSVal = LHSResult.Val; const APValue &RHSVal = RHSResult.Val; // Handle cases like (unsigned long)&a + 4. if (E->isAdditiveOp() && LHSVal.isLValue() && RHSVal.isInt()) { Result = LHSVal; addOrSubLValueAsInteger(Result, RHSVal.getInt(), E->getOpcode() == BO_Sub); return true; } // Handle cases like 4 + (unsigned long)&a if (E->getOpcode() == BO_Add && RHSVal.isLValue() && LHSVal.isInt()) { Result = RHSVal; addOrSubLValueAsInteger(Result, LHSVal.getInt(), /*IsSub*/false); return true; } if (E->getOpcode() == BO_Sub && LHSVal.isLValue() && RHSVal.isLValue()) { // Handle (intptr_t)&&A - (intptr_t)&&B. if (!LHSVal.getLValueOffset().isZero() || !RHSVal.getLValueOffset().isZero()) return false; const Expr *LHSExpr = LHSVal.getLValueBase().dyn_cast(); const Expr *RHSExpr = RHSVal.getLValueBase().dyn_cast(); if (!LHSExpr || !RHSExpr) return false; const AddrLabelExpr *LHSAddrExpr = dyn_cast(LHSExpr); const AddrLabelExpr *RHSAddrExpr = dyn_cast(RHSExpr); if (!LHSAddrExpr || !RHSAddrExpr) return false; // Make sure both labels come from the same function. if (LHSAddrExpr->getLabel()->getDeclContext() != RHSAddrExpr->getLabel()->getDeclContext()) return false; Result = APValue(LHSAddrExpr, RHSAddrExpr); return true; } // All the remaining cases expect both operands to be an integer if (!LHSVal.isInt() || !RHSVal.isInt()) return Error(E); // Set up the width and signedness manually, in case it can't be deduced // from the operation we're performing. // FIXME: Don't do this in the cases where we can deduce it. APSInt Value(Info.Ctx.getIntWidth(E->getType()), E->getType()->isUnsignedIntegerOrEnumerationType()); if (!handleIntIntBinOp(Info, E, LHSVal.getInt(), E->getOpcode(), RHSVal.getInt(), Value)) return false; return Success(Value, E, Result); } void DataRecursiveIntBinOpEvaluator::process(EvalResult &Result) { Job &job = Queue.back(); switch (job.Kind) { case Job::AnyExprKind: { if (const BinaryOperator *Bop = dyn_cast(job.E)) { if (shouldEnqueue(Bop)) { job.Kind = Job::BinOpKind; enqueue(Bop->getLHS()); return; } } EvaluateExpr(job.E, Result); Queue.pop_back(); return; } case Job::BinOpKind: { const BinaryOperator *Bop = cast(job.E); bool SuppressRHSDiags = false; if (!VisitBinOpLHSOnly(Result, Bop, SuppressRHSDiags)) { Queue.pop_back(); return; } if (SuppressRHSDiags) job.startSpeculativeEval(Info); job.LHSResult.swap(Result); job.Kind = Job::BinOpVisitedLHSKind; enqueue(Bop->getRHS()); return; } case Job::BinOpVisitedLHSKind: { const BinaryOperator *Bop = cast(job.E); EvalResult RHS; RHS.swap(Result); Result.Failed = !VisitBinOp(job.LHSResult, RHS, Bop, Result.Val); Queue.pop_back(); return; } } llvm_unreachable("Invalid Job::Kind!"); } namespace { /// Used when we determine that we should fail, but can keep evaluating prior to /// noting that we had a failure. class DelayedNoteFailureRAII { EvalInfo &Info; bool NoteFailure; public: DelayedNoteFailureRAII(EvalInfo &Info, bool NoteFailure = true) : Info(Info), NoteFailure(NoteFailure) {} ~DelayedNoteFailureRAII() { if (NoteFailure) { bool ContinueAfterFailure = Info.noteFailure(); (void)ContinueAfterFailure; assert(ContinueAfterFailure && "Shouldn't have kept evaluating on failure."); } } }; enum class CmpResult { Unequal, Less, Equal, Greater, Unordered, }; } template static bool EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E, SuccessCB &&Success, AfterCB &&DoAfter) { assert(E->isComparisonOp() && "expected comparison operator"); assert((E->getOpcode() == BO_Cmp || E->getType()->isIntegralOrEnumerationType()) && "unsupported binary expression evaluation"); auto Error = [&](const Expr *E) { Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; }; bool IsRelational = E->isRelationalOp() || E->getOpcode() == BO_Cmp; bool IsEquality = E->isEqualityOp(); QualType LHSTy = E->getLHS()->getType(); QualType RHSTy = E->getRHS()->getType(); if (LHSTy->isIntegralOrEnumerationType() && RHSTy->isIntegralOrEnumerationType()) { APSInt LHS, RHS; bool LHSOK = EvaluateInteger(E->getLHS(), LHS, Info); if (!LHSOK && !Info.noteFailure()) return false; if (!EvaluateInteger(E->getRHS(), RHS, Info) || !LHSOK) return false; if (LHS < RHS) return Success(CmpResult::Less, E); if (LHS > RHS) return Success(CmpResult::Greater, E); return Success(CmpResult::Equal, E); } if (LHSTy->isFixedPointType() || RHSTy->isFixedPointType()) { APFixedPoint LHSFX(Info.Ctx.getFixedPointSemantics(LHSTy)); APFixedPoint RHSFX(Info.Ctx.getFixedPointSemantics(RHSTy)); bool LHSOK = EvaluateFixedPointOrInteger(E->getLHS(), LHSFX, Info); if (!LHSOK && !Info.noteFailure()) return false; if (!EvaluateFixedPointOrInteger(E->getRHS(), RHSFX, Info) || !LHSOK) return false; if (LHSFX < RHSFX) return Success(CmpResult::Less, E); if (LHSFX > RHSFX) return Success(CmpResult::Greater, E); return Success(CmpResult::Equal, E); } if (LHSTy->isAnyComplexType() || RHSTy->isAnyComplexType()) { ComplexValue LHS, RHS; bool LHSOK; if (E->isAssignmentOp()) { LValue LV; EvaluateLValue(E->getLHS(), LV, Info); LHSOK = false; } else if (LHSTy->isRealFloatingType()) { LHSOK = EvaluateFloat(E->getLHS(), LHS.FloatReal, Info); if (LHSOK) { LHS.makeComplexFloat(); LHS.FloatImag = APFloat(LHS.FloatReal.getSemantics()); } } else { LHSOK = EvaluateComplex(E->getLHS(), LHS, Info); } if (!LHSOK && !Info.noteFailure()) return false; if (E->getRHS()->getType()->isRealFloatingType()) { if (!EvaluateFloat(E->getRHS(), RHS.FloatReal, Info) || !LHSOK) return false; RHS.makeComplexFloat(); RHS.FloatImag = APFloat(RHS.FloatReal.getSemantics()); } else if (!EvaluateComplex(E->getRHS(), RHS, Info) || !LHSOK) return false; if (LHS.isComplexFloat()) { APFloat::cmpResult CR_r = LHS.getComplexFloatReal().compare(RHS.getComplexFloatReal()); APFloat::cmpResult CR_i = LHS.getComplexFloatImag().compare(RHS.getComplexFloatImag()); bool IsEqual = CR_r == APFloat::cmpEqual && CR_i == APFloat::cmpEqual; return Success(IsEqual ? CmpResult::Equal : CmpResult::Unequal, E); } else { assert(IsEquality && "invalid complex comparison"); bool IsEqual = LHS.getComplexIntReal() == RHS.getComplexIntReal() && LHS.getComplexIntImag() == RHS.getComplexIntImag(); return Success(IsEqual ? CmpResult::Equal : CmpResult::Unequal, E); } } if (LHSTy->isRealFloatingType() && RHSTy->isRealFloatingType()) { APFloat RHS(0.0), LHS(0.0); bool LHSOK = EvaluateFloat(E->getRHS(), RHS, Info); if (!LHSOK && !Info.noteFailure()) return false; if (!EvaluateFloat(E->getLHS(), LHS, Info) || !LHSOK) return false; assert(E->isComparisonOp() && "Invalid binary operator!"); auto GetCmpRes = [&]() { switch (LHS.compare(RHS)) { case APFloat::cmpEqual: return CmpResult::Equal; case APFloat::cmpLessThan: return CmpResult::Less; case APFloat::cmpGreaterThan: return CmpResult::Greater; case APFloat::cmpUnordered: return CmpResult::Unordered; } llvm_unreachable("Unrecognised APFloat::cmpResult enum"); }; return Success(GetCmpRes(), E); } if (LHSTy->isPointerType() && RHSTy->isPointerType()) { LValue LHSValue, RHSValue; bool LHSOK = EvaluatePointer(E->getLHS(), LHSValue, Info); if (!LHSOK && !Info.noteFailure()) return false; if (!EvaluatePointer(E->getRHS(), RHSValue, Info) || !LHSOK) return false; // Reject differing bases from the normal codepath; we special-case // comparisons to null. if (!HasSameBase(LHSValue, RHSValue)) { // Inequalities and subtractions between unrelated pointers have // unspecified or undefined behavior. if (!IsEquality) { Info.FFDiag(E, diag::note_constexpr_pointer_comparison_unspecified); return false; } // A constant address may compare equal to the address of a symbol. // The one exception is that address of an object cannot compare equal // to a null pointer constant. if ((!LHSValue.Base && !LHSValue.Offset.isZero()) || (!RHSValue.Base && !RHSValue.Offset.isZero())) return Error(E); // It's implementation-defined whether distinct literals will have // distinct addresses. In clang, the result of such a comparison is // unspecified, so it is not a constant expression. However, we do know // that the address of a literal will be non-null. if ((IsLiteralLValue(LHSValue) || IsLiteralLValue(RHSValue)) && LHSValue.Base && RHSValue.Base) return Error(E); // We can't tell whether weak symbols will end up pointing to the same // object. if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue)) return Error(E); // We can't compare the address of the start of one object with the // past-the-end address of another object, per C++ DR1652. if ((LHSValue.Base && LHSValue.Offset.isZero() && isOnePastTheEndOfCompleteObject(Info.Ctx, RHSValue)) || (RHSValue.Base && RHSValue.Offset.isZero() && isOnePastTheEndOfCompleteObject(Info.Ctx, LHSValue))) return Error(E); // We can't tell whether an object is at the same address as another // zero sized object. if ((RHSValue.Base && isZeroSized(LHSValue)) || (LHSValue.Base && isZeroSized(RHSValue))) return Error(E); return Success(CmpResult::Unequal, E); } const CharUnits &LHSOffset = LHSValue.getLValueOffset(); const CharUnits &RHSOffset = RHSValue.getLValueOffset(); SubobjectDesignator &LHSDesignator = LHSValue.getLValueDesignator(); SubobjectDesignator &RHSDesignator = RHSValue.getLValueDesignator(); // C++11 [expr.rel]p3: // Pointers to void (after pointer conversions) can be compared, with a // result defined as follows: If both pointers represent the same // address or are both the null pointer value, the result is true if the // operator is <= or >= and false otherwise; otherwise the result is // unspecified. // We interpret this as applying to pointers to *cv* void. if (LHSTy->isVoidPointerType() && LHSOffset != RHSOffset && IsRelational) Info.CCEDiag(E, diag::note_constexpr_void_comparison); // C++11 [expr.rel]p2: // - If two pointers point to non-static data members of the same object, // or to subobjects or array elements fo such members, recursively, the // pointer to the later declared member compares greater provided the // two members have the same access control and provided their class is // not a union. // [...] // - Otherwise pointer comparisons are unspecified. if (!LHSDesignator.Invalid && !RHSDesignator.Invalid && IsRelational) { bool WasArrayIndex; unsigned Mismatch = FindDesignatorMismatch( getType(LHSValue.Base), LHSDesignator, RHSDesignator, WasArrayIndex); // At the point where the designators diverge, the comparison has a // specified value if: // - we are comparing array indices // - we are comparing fields of a union, or fields with the same access // Otherwise, the result is unspecified and thus the comparison is not a // constant expression. if (!WasArrayIndex && Mismatch < LHSDesignator.Entries.size() && Mismatch < RHSDesignator.Entries.size()) { const FieldDecl *LF = getAsField(LHSDesignator.Entries[Mismatch]); const FieldDecl *RF = getAsField(RHSDesignator.Entries[Mismatch]); if (!LF && !RF) Info.CCEDiag(E, diag::note_constexpr_pointer_comparison_base_classes); else if (!LF) Info.CCEDiag(E, diag::note_constexpr_pointer_comparison_base_field) << getAsBaseClass(LHSDesignator.Entries[Mismatch]) << RF->getParent() << RF; else if (!RF) Info.CCEDiag(E, diag::note_constexpr_pointer_comparison_base_field) << getAsBaseClass(RHSDesignator.Entries[Mismatch]) << LF->getParent() << LF; else if (!LF->getParent()->isUnion() && LF->getAccess() != RF->getAccess()) Info.CCEDiag(E, diag::note_constexpr_pointer_comparison_differing_access) << LF << LF->getAccess() << RF << RF->getAccess() << LF->getParent(); } } // The comparison here must be unsigned, and performed with the same // width as the pointer. unsigned PtrSize = Info.Ctx.getTypeSize(LHSTy); uint64_t CompareLHS = LHSOffset.getQuantity(); uint64_t CompareRHS = RHSOffset.getQuantity(); assert(PtrSize <= 64 && "Unexpected pointer width"); uint64_t Mask = ~0ULL >> (64 - PtrSize); CompareLHS &= Mask; CompareRHS &= Mask; // If there is a base and this is a relational operator, we can only // compare pointers within the object in question; otherwise, the result // depends on where the object is located in memory. if (!LHSValue.Base.isNull() && IsRelational) { QualType BaseTy = getType(LHSValue.Base); if (BaseTy->isIncompleteType()) return Error(E); CharUnits Size = Info.Ctx.getTypeSizeInChars(BaseTy); uint64_t OffsetLimit = Size.getQuantity(); if (CompareLHS > OffsetLimit || CompareRHS > OffsetLimit) return Error(E); } if (CompareLHS < CompareRHS) return Success(CmpResult::Less, E); if (CompareLHS > CompareRHS) return Success(CmpResult::Greater, E); return Success(CmpResult::Equal, E); } if (LHSTy->isMemberPointerType()) { assert(IsEquality && "unexpected member pointer operation"); assert(RHSTy->isMemberPointerType() && "invalid comparison"); MemberPtr LHSValue, RHSValue; bool LHSOK = EvaluateMemberPointer(E->getLHS(), LHSValue, Info); if (!LHSOK && !Info.noteFailure()) return false; if (!EvaluateMemberPointer(E->getRHS(), RHSValue, Info) || !LHSOK) return false; // C++11 [expr.eq]p2: // If both operands are null, they compare equal. Otherwise if only one is // null, they compare unequal. if (!LHSValue.getDecl() || !RHSValue.getDecl()) { bool Equal = !LHSValue.getDecl() && !RHSValue.getDecl(); return Success(Equal ? CmpResult::Equal : CmpResult::Unequal, E); } // Otherwise if either is a pointer to a virtual member function, the // result is unspecified. if (const CXXMethodDecl *MD = dyn_cast(LHSValue.getDecl())) if (MD->isVirtual()) Info.CCEDiag(E, diag::note_constexpr_compare_virtual_mem_ptr) << MD; if (const CXXMethodDecl *MD = dyn_cast(RHSValue.getDecl())) if (MD->isVirtual()) Info.CCEDiag(E, diag::note_constexpr_compare_virtual_mem_ptr) << MD; // Otherwise they compare equal if and only if they would refer to the // same member of the same most derived object or the same subobject if // they were dereferenced with a hypothetical object of the associated // class type. bool Equal = LHSValue == RHSValue; return Success(Equal ? CmpResult::Equal : CmpResult::Unequal, E); } if (LHSTy->isNullPtrType()) { assert(E->isComparisonOp() && "unexpected nullptr operation"); assert(RHSTy->isNullPtrType() && "missing pointer conversion"); // C++11 [expr.rel]p4, [expr.eq]p3: If two operands of type std::nullptr_t // are compared, the result is true of the operator is <=, >= or ==, and // false otherwise. return Success(CmpResult::Equal, E); } return DoAfter(); } bool RecordExprEvaluator::VisitBinCmp(const BinaryOperator *E) { if (!CheckLiteralType(Info, E)) return false; auto OnSuccess = [&](CmpResult CR, const BinaryOperator *E) { ComparisonCategoryResult CCR; switch (CR) { case CmpResult::Unequal: llvm_unreachable("should never produce Unequal for three-way comparison"); case CmpResult::Less: CCR = ComparisonCategoryResult::Less; break; case CmpResult::Equal: CCR = ComparisonCategoryResult::Equal; break; case CmpResult::Greater: CCR = ComparisonCategoryResult::Greater; break; case CmpResult::Unordered: CCR = ComparisonCategoryResult::Unordered; break; } // Evaluation succeeded. Lookup the information for the comparison category // type and fetch the VarDecl for the result. const ComparisonCategoryInfo &CmpInfo = Info.Ctx.CompCategories.getInfoForType(E->getType()); const VarDecl *VD = CmpInfo.getValueInfo(CmpInfo.makeWeakResult(CCR))->VD; // Check and evaluate the result as a constant expression. LValue LV; LV.set(VD); if (!handleLValueToRValueConversion(Info, E, E->getType(), LV, Result)) return false; return CheckConstantExpression(Info, E->getExprLoc(), E->getType(), Result); }; return EvaluateComparisonBinaryOperator(Info, E, OnSuccess, [&]() { return ExprEvaluatorBaseTy::VisitBinCmp(E); }); } bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { // We don't call noteFailure immediately because the assignment happens after // we evaluate LHS and RHS. if (!Info.keepEvaluatingAfterFailure() && E->isAssignmentOp()) return Error(E); DelayedNoteFailureRAII MaybeNoteFailureLater(Info, E->isAssignmentOp()); if (DataRecursiveIntBinOpEvaluator::shouldEnqueue(E)) return DataRecursiveIntBinOpEvaluator(*this, Result).Traverse(E); assert((!E->getLHS()->getType()->isIntegralOrEnumerationType() || !E->getRHS()->getType()->isIntegralOrEnumerationType()) && "DataRecursiveIntBinOpEvaluator should have handled integral types"); if (E->isComparisonOp()) { // Evaluate builtin binary comparisons by evaluating them as three-way // comparisons and then translating the result. auto OnSuccess = [&](CmpResult CR, const BinaryOperator *E) { assert((CR != CmpResult::Unequal || E->isEqualityOp()) && "should only produce Unequal for equality comparisons"); bool IsEqual = CR == CmpResult::Equal, IsLess = CR == CmpResult::Less, IsGreater = CR == CmpResult::Greater; auto Op = E->getOpcode(); switch (Op) { default: llvm_unreachable("unsupported binary operator"); case BO_EQ: case BO_NE: return Success(IsEqual == (Op == BO_EQ), E); case BO_LT: return Success(IsLess, E); case BO_GT: return Success(IsGreater, E); case BO_LE: return Success(IsEqual || IsLess, E); case BO_GE: return Success(IsEqual || IsGreater, E); } }; return EvaluateComparisonBinaryOperator(Info, E, OnSuccess, [&]() { return ExprEvaluatorBaseTy::VisitBinaryOperator(E); }); } QualType LHSTy = E->getLHS()->getType(); QualType RHSTy = E->getRHS()->getType(); if (LHSTy->isPointerType() && RHSTy->isPointerType() && E->getOpcode() == BO_Sub) { LValue LHSValue, RHSValue; bool LHSOK = EvaluatePointer(E->getLHS(), LHSValue, Info); if (!LHSOK && !Info.noteFailure()) return false; if (!EvaluatePointer(E->getRHS(), RHSValue, Info) || !LHSOK) return false; // Reject differing bases from the normal codepath; we special-case // comparisons to null. if (!HasSameBase(LHSValue, RHSValue)) { // Handle &&A - &&B. if (!LHSValue.Offset.isZero() || !RHSValue.Offset.isZero()) return Error(E); const Expr *LHSExpr = LHSValue.Base.dyn_cast(); const Expr *RHSExpr = RHSValue.Base.dyn_cast(); if (!LHSExpr || !RHSExpr) return Error(E); const AddrLabelExpr *LHSAddrExpr = dyn_cast(LHSExpr); const AddrLabelExpr *RHSAddrExpr = dyn_cast(RHSExpr); if (!LHSAddrExpr || !RHSAddrExpr) return Error(E); // Make sure both labels come from the same function. if (LHSAddrExpr->getLabel()->getDeclContext() != RHSAddrExpr->getLabel()->getDeclContext()) return Error(E); return Success(APValue(LHSAddrExpr, RHSAddrExpr), E); } const CharUnits &LHSOffset = LHSValue.getLValueOffset(); const CharUnits &RHSOffset = RHSValue.getLValueOffset(); SubobjectDesignator &LHSDesignator = LHSValue.getLValueDesignator(); SubobjectDesignator &RHSDesignator = RHSValue.getLValueDesignator(); // C++11 [expr.add]p6: // Unless both pointers point to elements of the same array object, or // one past the last element of the array object, the behavior is // undefined. if (!LHSDesignator.Invalid && !RHSDesignator.Invalid && !AreElementsOfSameArray(getType(LHSValue.Base), LHSDesignator, RHSDesignator)) Info.CCEDiag(E, diag::note_constexpr_pointer_subtraction_not_same_array); QualType Type = E->getLHS()->getType(); QualType ElementType = Type->castAs()->getPointeeType(); CharUnits ElementSize; if (!HandleSizeof(Info, E->getExprLoc(), ElementType, ElementSize)) return false; // As an extension, a type may have zero size (empty struct or union in // C, array of zero length). Pointer subtraction in such cases has // undefined behavior, so is not constant. if (ElementSize.isZero()) { Info.FFDiag(E, diag::note_constexpr_pointer_subtraction_zero_size) << ElementType; return false; } // FIXME: LLVM and GCC both compute LHSOffset - RHSOffset at runtime, // and produce incorrect results when it overflows. Such behavior // appears to be non-conforming, but is common, so perhaps we should // assume the standard intended for such cases to be undefined behavior // and check for them. // Compute (LHSOffset - RHSOffset) / Size carefully, checking for // overflow in the final conversion to ptrdiff_t. APSInt LHS(llvm::APInt(65, (int64_t)LHSOffset.getQuantity(), true), false); APSInt RHS(llvm::APInt(65, (int64_t)RHSOffset.getQuantity(), true), false); APSInt ElemSize(llvm::APInt(65, (int64_t)ElementSize.getQuantity(), true), false); APSInt TrueResult = (LHS - RHS) / ElemSize; APSInt Result = TrueResult.trunc(Info.Ctx.getIntWidth(E->getType())); if (Result.extend(65) != TrueResult && !HandleOverflow(Info, E, TrueResult, E->getType())) return false; return Success(Result, E); } return ExprEvaluatorBaseTy::VisitBinaryOperator(E); } /// VisitUnaryExprOrTypeTraitExpr - Evaluate a sizeof, alignof or vec_step with /// a result as the expression's type. bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr( const UnaryExprOrTypeTraitExpr *E) { switch(E->getKind()) { case UETT_PreferredAlignOf: case UETT_AlignOf: { if (E->isArgumentType()) return Success(GetAlignOfType(Info, E->getArgumentType(), E->getKind()), E); else return Success(GetAlignOfExpr(Info, E->getArgumentExpr(), E->getKind()), E); } case UETT_VecStep: { QualType Ty = E->getTypeOfArgument(); if (Ty->isVectorType()) { unsigned n = Ty->castAs()->getNumElements(); // The vec_step built-in functions that take a 3-component // vector return 4. (OpenCL 1.1 spec 6.11.12) if (n == 3) n = 4; return Success(n, E); } else return Success(1, E); } case UETT_SizeOf: { QualType SrcTy = E->getTypeOfArgument(); // C++ [expr.sizeof]p2: "When applied to a reference or a reference type, // the result is the size of the referenced type." if (const ReferenceType *Ref = SrcTy->getAs()) SrcTy = Ref->getPointeeType(); CharUnits Sizeof; if (!HandleSizeof(Info, E->getExprLoc(), SrcTy, Sizeof)) return false; return Success(Sizeof, E); } case UETT_OpenMPRequiredSimdAlign: assert(E->isArgumentType()); return Success( Info.Ctx.toCharUnitsFromBits( Info.Ctx.getOpenMPDefaultSimdAlign(E->getArgumentType())) .getQuantity(), E); } llvm_unreachable("unknown expr/type trait"); } bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CharUnits Result; unsigned n = OOE->getNumComponents(); if (n == 0) return Error(OOE); QualType CurrentType = OOE->getTypeSourceInfo()->getType(); for (unsigned i = 0; i != n; ++i) { OffsetOfNode ON = OOE->getComponent(i); switch (ON.getKind()) { case OffsetOfNode::Array: { const Expr *Idx = OOE->getIndexExpr(ON.getArrayExprIndex()); APSInt IdxResult; if (!EvaluateInteger(Idx, IdxResult, Info)) return false; const ArrayType *AT = Info.Ctx.getAsArrayType(CurrentType); if (!AT) return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); Result += IdxResult.getSExtValue() * ElementSize; break; } case OffsetOfNode::Field: { FieldDecl *MemberDecl = ON.getField(); const RecordType *RT = CurrentType->getAs(); if (!RT) return Error(OOE); RecordDecl *RD = RT->getDecl(); if (RD->isInvalidDecl()) return false; const ASTRecordLayout &RL = Info.Ctx.getASTRecordLayout(RD); unsigned i = MemberDecl->getFieldIndex(); assert(i < RL.getFieldCount() && "offsetof field in wrong type"); Result += Info.Ctx.toCharUnitsFromBits(RL.getFieldOffset(i)); CurrentType = MemberDecl->getType().getNonReferenceType(); break; } case OffsetOfNode::Identifier: llvm_unreachable("dependent __builtin_offsetof"); case OffsetOfNode::Base: { CXXBaseSpecifier *BaseSpec = ON.getBase(); if (BaseSpec->isVirtual()) return Error(OOE); // Find the layout of the class whose base we are looking into. const RecordType *RT = CurrentType->getAs(); if (!RT) return Error(OOE); RecordDecl *RD = RT->getDecl(); if (RD->isInvalidDecl()) return false; const ASTRecordLayout &RL = Info.Ctx.getASTRecordLayout(RD); // Find the base class itself. CurrentType = BaseSpec->getType(); const RecordType *BaseRT = CurrentType->getAs(); if (!BaseRT) return Error(OOE); // Add the offset to the base. Result += RL.getBaseClassOffset(cast(BaseRT->getDecl())); break; } } } return Success(Result, OOE); } bool IntExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) { switch (E->getOpcode()) { default: // Address, indirect, pre/post inc/dec, etc are not valid constant exprs. // See C99 6.6p3. return Error(E); case UO_Extension: // FIXME: Should extension allow i-c-e extension expressions in its scope? // If so, we could clear the diagnostic ID. return Visit(E->getSubExpr()); case UO_Plus: // The result is just the value. return Visit(E->getSubExpr()); case UO_Minus: { if (!Visit(E->getSubExpr())) return false; if (!Result.isInt()) return Error(E); const APSInt &Value = Result.getInt(); if (Value.isSigned() && Value.isMinSignedValue() && E->canOverflow() && !HandleOverflow(Info, E, -Value.extend(Value.getBitWidth() + 1), E->getType())) return false; return Success(-Value, E); } case UO_Not: { if (!Visit(E->getSubExpr())) return false; if (!Result.isInt()) return Error(E); return Success(~Result.getInt(), E); } case UO_LNot: { bool bres; if (!EvaluateAsBooleanCondition(E->getSubExpr(), bres, Info)) return false; return Success(!bres, E); } } } /// HandleCast - This is used to evaluate implicit or explicit casts where the /// result type is integer. bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { const Expr *SubExpr = E->getSubExpr(); QualType DestType = E->getType(); QualType SrcType = SubExpr->getType(); switch (E->getCastKind()) { case CK_BaseToDerived: case CK_DerivedToBase: case CK_UncheckedDerivedToBase: case CK_Dynamic: case CK_ToUnion: case CK_ArrayToPointerDecay: case CK_FunctionToPointerDecay: case CK_NullToPointer: case CK_NullToMemberPointer: case CK_BaseToDerivedMemberPointer: case CK_DerivedToBaseMemberPointer: case CK_ReinterpretMemberPointer: case CK_ConstructorConversion: case CK_IntegralToPointer: case CK_ToVoid: case CK_VectorSplat: case CK_IntegralToFloating: case CK_FloatingCast: case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: case CK_AnyPointerToBlockPointerCast: case CK_ObjCObjectLValueCast: case CK_FloatingRealToComplex: case CK_FloatingComplexToReal: case CK_FloatingComplexCast: case CK_FloatingComplexToIntegralComplex: case CK_IntegralRealToComplex: case CK_IntegralComplexCast: case CK_IntegralComplexToFloatingComplex: case CK_BuiltinFnToFnPtr: case CK_ZeroToOCLOpaqueType: case CK_NonAtomicToAtomic: case CK_AddressSpaceConversion: case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_IntegralToFixedPoint: llvm_unreachable("invalid cast kind for integral value"); case CK_BitCast: case CK_Dependent: case CK_LValueBitCast: case CK_ARCProduceObject: case CK_ARCConsumeObject: case CK_ARCReclaimReturnedObject: case CK_ARCExtendBlockObject: case CK_CopyAndAutoreleaseBlockObject: return Error(E); case CK_UserDefinedConversion: case CK_LValueToRValue: case CK_AtomicToNonAtomic: case CK_NoOp: case CK_LValueToRValueBitCast: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_MemberPointerToBoolean: case CK_PointerToBoolean: case CK_IntegralToBoolean: case CK_FloatingToBoolean: case CK_BooleanToSignedIntegral: case CK_FloatingComplexToBoolean: case CK_IntegralComplexToBoolean: { bool BoolResult; if (!EvaluateAsBooleanCondition(SubExpr, BoolResult, Info)) return false; uint64_t IntResult = BoolResult; if (BoolResult && E->getCastKind() == CK_BooleanToSignedIntegral) IntResult = (uint64_t)-1; return Success(IntResult, E); } case CK_FixedPointToIntegral: { APFixedPoint Src(Info.Ctx.getFixedPointSemantics(SrcType)); if (!EvaluateFixedPoint(SubExpr, Src, Info)) return false; bool Overflowed; llvm::APSInt Result = Src.convertToInt( Info.Ctx.getIntWidth(DestType), DestType->isSignedIntegerOrEnumerationType(), &Overflowed); if (Overflowed && !HandleOverflow(Info, E, Result, DestType)) return false; return Success(Result, E); } case CK_FixedPointToBoolean: { // Unsigned padding does not affect this. APValue Val; if (!Evaluate(Val, Info, SubExpr)) return false; return Success(Val.getFixedPoint().getBoolValue(), E); } case CK_IntegralCast: { if (!Visit(SubExpr)) return false; if (!Result.isInt()) { // Allow casts of address-of-label differences if they are no-ops // or narrowing. (The narrowing case isn't actually guaranteed to // be constant-evaluatable except in some narrow cases which are hard // to detect here. We let it through on the assumption the user knows // what they are doing.) if (Result.isAddrLabelDiff()) return Info.Ctx.getTypeSize(DestType) <= Info.Ctx.getTypeSize(SrcType); // Only allow casts of lvalues if they are lossless. return Info.Ctx.getTypeSize(DestType) == Info.Ctx.getTypeSize(SrcType); } return Success(HandleIntToIntCast(Info, E, DestType, SrcType, Result.getInt()), E); } case CK_PointerToIntegral: { CCEDiag(E, diag::note_constexpr_invalid_cast) << 2; LValue LV; if (!EvaluatePointer(SubExpr, LV, Info)) return false; if (LV.getLValueBase()) { // Only allow based lvalue casts if they are lossless. // FIXME: Allow a larger integer size than the pointer size, and allow // narrowing back down to pointer width in subsequent integral casts. // FIXME: Check integer type's active bits, not its type size. if (Info.Ctx.getTypeSize(DestType) != Info.Ctx.getTypeSize(SrcType)) return Error(E); LV.Designator.setInvalid(); LV.moveInto(Result); return true; } APSInt AsInt; APValue V; LV.moveInto(V); if (!V.toIntegralConstant(AsInt, SrcType, Info.Ctx)) llvm_unreachable("Can't cast this!"); return Success(HandleIntToIntCast(Info, E, DestType, SrcType, AsInt), E); } case CK_IntegralComplexToReal: { ComplexValue C; if (!EvaluateComplex(SubExpr, C, Info)) return false; return Success(C.getComplexIntReal(), E); } case CK_FloatingToIntegral: { APFloat F(0.0); if (!EvaluateFloat(SubExpr, F, Info)) return false; APSInt Value; if (!HandleFloatToIntCast(Info, E, SrcType, F, DestType, Value)) return false; return Success(Value, E); } } llvm_unreachable("unknown cast resulting in integral value"); } bool IntExprEvaluator::VisitUnaryReal(const UnaryOperator *E) { if (E->getSubExpr()->getType()->isAnyComplexType()) { ComplexValue LV; if (!EvaluateComplex(E->getSubExpr(), LV, Info)) return false; if (!LV.isComplexInt()) return Error(E); return Success(LV.getComplexIntReal(), E); } return Visit(E->getSubExpr()); } bool IntExprEvaluator::VisitUnaryImag(const UnaryOperator *E) { if (E->getSubExpr()->getType()->isComplexIntegerType()) { ComplexValue LV; if (!EvaluateComplex(E->getSubExpr(), LV, Info)) return false; if (!LV.isComplexInt()) return Error(E); return Success(LV.getComplexIntImag(), E); } VisitIgnoredValue(E->getSubExpr()); return Success(0, E); } bool IntExprEvaluator::VisitSizeOfPackExpr(const SizeOfPackExpr *E) { return Success(E->getPackLength(), E); } bool IntExprEvaluator::VisitCXXNoexceptExpr(const CXXNoexceptExpr *E) { return Success(E->getValue(), E); } bool IntExprEvaluator::VisitConceptSpecializationExpr( const ConceptSpecializationExpr *E) { return Success(E->isSatisfied(), E); } bool IntExprEvaluator::VisitRequiresExpr(const RequiresExpr *E) { return Success(E->isSatisfied(), E); } bool FixedPointExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) { switch (E->getOpcode()) { default: // Invalid unary operators return Error(E); case UO_Plus: // The result is just the value. return Visit(E->getSubExpr()); case UO_Minus: { if (!Visit(E->getSubExpr())) return false; if (!Result.isFixedPoint()) return Error(E); bool Overflowed; APFixedPoint Negated = Result.getFixedPoint().negate(&Overflowed); if (Overflowed && !HandleOverflow(Info, E, Negated, E->getType())) return false; return Success(Negated, E); } case UO_LNot: { bool bres; if (!EvaluateAsBooleanCondition(E->getSubExpr(), bres, Info)) return false; return Success(!bres, E); } } } bool FixedPointExprEvaluator::VisitCastExpr(const CastExpr *E) { const Expr *SubExpr = E->getSubExpr(); QualType DestType = E->getType(); assert(DestType->isFixedPointType() && "Expected destination type to be a fixed point type"); auto DestFXSema = Info.Ctx.getFixedPointSemantics(DestType); switch (E->getCastKind()) { case CK_FixedPointCast: { APFixedPoint Src(Info.Ctx.getFixedPointSemantics(SubExpr->getType())); if (!EvaluateFixedPoint(SubExpr, Src, Info)) return false; bool Overflowed; APFixedPoint Result = Src.convert(DestFXSema, &Overflowed); if (Overflowed) { if (Info.checkingForUndefinedBehavior()) Info.Ctx.getDiagnostics().Report(E->getExprLoc(), diag::warn_fixedpoint_constant_overflow) << Result.toString() << E->getType(); else if (!HandleOverflow(Info, E, Result, E->getType())) return false; } return Success(Result, E); } case CK_IntegralToFixedPoint: { APSInt Src; if (!EvaluateInteger(SubExpr, Src, Info)) return false; bool Overflowed; APFixedPoint IntResult = APFixedPoint::getFromIntValue( Src, Info.Ctx.getFixedPointSemantics(DestType), &Overflowed); if (Overflowed) { if (Info.checkingForUndefinedBehavior()) Info.Ctx.getDiagnostics().Report(E->getExprLoc(), diag::warn_fixedpoint_constant_overflow) << IntResult.toString() << E->getType(); else if (!HandleOverflow(Info, E, IntResult, E->getType())) return false; } return Success(IntResult, E); } case CK_NoOp: case CK_LValueToRValue: return ExprEvaluatorBaseTy::VisitCastExpr(E); default: return Error(E); } } bool FixedPointExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { if (E->isPtrMemOp() || E->isAssignmentOp() || E->getOpcode() == BO_Comma) return ExprEvaluatorBaseTy::VisitBinaryOperator(E); const Expr *LHS = E->getLHS(); const Expr *RHS = E->getRHS(); FixedPointSemantics ResultFXSema = Info.Ctx.getFixedPointSemantics(E->getType()); APFixedPoint LHSFX(Info.Ctx.getFixedPointSemantics(LHS->getType())); if (!EvaluateFixedPointOrInteger(LHS, LHSFX, Info)) return false; APFixedPoint RHSFX(Info.Ctx.getFixedPointSemantics(RHS->getType())); if (!EvaluateFixedPointOrInteger(RHS, RHSFX, Info)) return false; bool OpOverflow = false, ConversionOverflow = false; APFixedPoint Result(LHSFX.getSemantics()); switch (E->getOpcode()) { case BO_Add: { Result = LHSFX.add(RHSFX, &OpOverflow) .convert(ResultFXSema, &ConversionOverflow); break; } case BO_Sub: { Result = LHSFX.sub(RHSFX, &OpOverflow) .convert(ResultFXSema, &ConversionOverflow); break; } case BO_Mul: { Result = LHSFX.mul(RHSFX, &OpOverflow) .convert(ResultFXSema, &ConversionOverflow); break; } case BO_Div: { if (RHSFX.getValue() == 0) { Info.FFDiag(E, diag::note_expr_divide_by_zero); return false; } Result = LHSFX.div(RHSFX, &OpOverflow) .convert(ResultFXSema, &ConversionOverflow); break; } default: return false; } if (OpOverflow || ConversionOverflow) { if (Info.checkingForUndefinedBehavior()) Info.Ctx.getDiagnostics().Report(E->getExprLoc(), diag::warn_fixedpoint_constant_overflow) << Result.toString() << E->getType(); else if (!HandleOverflow(Info, E, Result, E->getType())) return false; } return Success(Result, E); } //===----------------------------------------------------------------------===// // Float Evaluation //===----------------------------------------------------------------------===// namespace { class FloatExprEvaluator : public ExprEvaluatorBase { APFloat &Result; public: FloatExprEvaluator(EvalInfo &info, APFloat &result) : ExprEvaluatorBaseTy(info), Result(result) {} bool Success(const APValue &V, const Expr *e) { Result = V.getFloat(); return true; } bool ZeroInitialization(const Expr *E) { Result = APFloat::getZero(Info.Ctx.getFloatTypeSemantics(E->getType())); return true; } bool VisitCallExpr(const CallExpr *E); bool VisitUnaryOperator(const UnaryOperator *E); bool VisitBinaryOperator(const BinaryOperator *E); bool VisitFloatingLiteral(const FloatingLiteral *E); bool VisitCastExpr(const CastExpr *E); bool VisitUnaryReal(const UnaryOperator *E); bool VisitUnaryImag(const UnaryOperator *E); // FIXME: Missing: array subscript of vector, member of vector }; } // end anonymous namespace static bool EvaluateFloat(const Expr* E, APFloat& Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isRealFloatingType()); return FloatExprEvaluator(Info, Result).Visit(E); } static bool TryEvaluateBuiltinNaN(const ASTContext &Context, QualType ResultTy, const Expr *Arg, bool SNaN, llvm::APFloat &Result) { const StringLiteral *S = dyn_cast(Arg->IgnoreParenCasts()); if (!S) return false; const llvm::fltSemantics &Sem = Context.getFloatTypeSemantics(ResultTy); llvm::APInt fill; // Treat empty strings as if they were zero. if (S->getString().empty()) fill = llvm::APInt(32, 0); else if (S->getString().getAsInteger(0, fill)) return false; if (Context.getTargetInfo().isNan2008()) { if (SNaN) Result = llvm::APFloat::getSNaN(Sem, false, &fill); else Result = llvm::APFloat::getQNaN(Sem, false, &fill); } else { // Prior to IEEE 754-2008, architectures were allowed to choose whether // the first bit of their significand was set for qNaN or sNaN. MIPS chose // a different encoding to what became a standard in 2008, and for pre- // 2008 revisions, MIPS interpreted sNaN-2008 as qNan and qNaN-2008 as // sNaN. This is now known as "legacy NaN" encoding. if (SNaN) Result = llvm::APFloat::getQNaN(Sem, false, &fill); else Result = llvm::APFloat::getSNaN(Sem, false, &fill); } return true; } bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) { switch (E->getBuiltinCallee()) { default: return ExprEvaluatorBaseTy::VisitCallExpr(E); case Builtin::BI__builtin_huge_val: case Builtin::BI__builtin_huge_valf: case Builtin::BI__builtin_huge_vall: case Builtin::BI__builtin_huge_valf128: case Builtin::BI__builtin_inf: case Builtin::BI__builtin_inff: case Builtin::BI__builtin_infl: case Builtin::BI__builtin_inff128: { const llvm::fltSemantics &Sem = Info.Ctx.getFloatTypeSemantics(E->getType()); Result = llvm::APFloat::getInf(Sem); return true; } case Builtin::BI__builtin_nans: case Builtin::BI__builtin_nansf: case Builtin::BI__builtin_nansl: case Builtin::BI__builtin_nansf128: if (!TryEvaluateBuiltinNaN(Info.Ctx, E->getType(), E->getArg(0), true, Result)) return Error(E); return true; case Builtin::BI__builtin_nan: case Builtin::BI__builtin_nanf: case Builtin::BI__builtin_nanl: case Builtin::BI__builtin_nanf128: // If this is __builtin_nan() turn this into a nan, otherwise we // can't constant fold it. if (!TryEvaluateBuiltinNaN(Info.Ctx, E->getType(), E->getArg(0), false, Result)) return Error(E); return true; case Builtin::BI__builtin_fabs: case Builtin::BI__builtin_fabsf: case Builtin::BI__builtin_fabsl: case Builtin::BI__builtin_fabsf128: if (!EvaluateFloat(E->getArg(0), Result, Info)) return false; if (Result.isNegative()) Result.changeSign(); return true; // FIXME: Builtin::BI__builtin_powi // FIXME: Builtin::BI__builtin_powif // FIXME: Builtin::BI__builtin_powil case Builtin::BI__builtin_copysign: case Builtin::BI__builtin_copysignf: case Builtin::BI__builtin_copysignl: case Builtin::BI__builtin_copysignf128: { APFloat RHS(0.); if (!EvaluateFloat(E->getArg(0), Result, Info) || !EvaluateFloat(E->getArg(1), RHS, Info)) return false; Result.copySign(RHS); return true; } } } bool FloatExprEvaluator::VisitUnaryReal(const UnaryOperator *E) { if (E->getSubExpr()->getType()->isAnyComplexType()) { ComplexValue CV; if (!EvaluateComplex(E->getSubExpr(), CV, Info)) return false; Result = CV.FloatReal; return true; } return Visit(E->getSubExpr()); } bool FloatExprEvaluator::VisitUnaryImag(const UnaryOperator *E) { if (E->getSubExpr()->getType()->isAnyComplexType()) { ComplexValue CV; if (!EvaluateComplex(E->getSubExpr(), CV, Info)) return false; Result = CV.FloatImag; return true; } VisitIgnoredValue(E->getSubExpr()); const llvm::fltSemantics &Sem = Info.Ctx.getFloatTypeSemantics(E->getType()); Result = llvm::APFloat::getZero(Sem); return true; } bool FloatExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) { switch (E->getOpcode()) { default: return Error(E); case UO_Plus: return EvaluateFloat(E->getSubExpr(), Result, Info); case UO_Minus: if (!EvaluateFloat(E->getSubExpr(), Result, Info)) return false; Result.changeSign(); return true; } } bool FloatExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { if (E->isPtrMemOp() || E->isAssignmentOp() || E->getOpcode() == BO_Comma) return ExprEvaluatorBaseTy::VisitBinaryOperator(E); APFloat RHS(0.0); bool LHSOK = EvaluateFloat(E->getLHS(), Result, Info); if (!LHSOK && !Info.noteFailure()) return false; return EvaluateFloat(E->getRHS(), RHS, Info) && LHSOK && handleFloatFloatBinOp(Info, E, Result, E->getOpcode(), RHS); } bool FloatExprEvaluator::VisitFloatingLiteral(const FloatingLiteral *E) { Result = E->getValue(); return true; } bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) { const Expr* SubExpr = E->getSubExpr(); switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_IntegralToFloating: { APSInt IntResult; return EvaluateInteger(SubExpr, IntResult, Info) && HandleIntToFloatCast(Info, E, SubExpr->getType(), IntResult, E->getType(), Result); } case CK_FloatingCast: { if (!Visit(SubExpr)) return false; return HandleFloatToFloatCast(Info, E, SubExpr->getType(), E->getType(), Result); } case CK_FloatingComplexToReal: { ComplexValue V; if (!EvaluateComplex(SubExpr, V, Info)) return false; Result = V.getComplexFloatReal(); return true; } } } //===----------------------------------------------------------------------===// // Complex Evaluation (for float and integer) //===----------------------------------------------------------------------===// namespace { class ComplexExprEvaluator : public ExprEvaluatorBase { ComplexValue &Result; public: ComplexExprEvaluator(EvalInfo &info, ComplexValue &Result) : ExprEvaluatorBaseTy(info), Result(Result) {} bool Success(const APValue &V, const Expr *e) { Result.setFrom(V); return true; } bool ZeroInitialization(const Expr *E); //===--------------------------------------------------------------------===// // Visitor Methods //===--------------------------------------------------------------------===// bool VisitImaginaryLiteral(const ImaginaryLiteral *E); bool VisitCastExpr(const CastExpr *E); bool VisitBinaryOperator(const BinaryOperator *E); bool VisitUnaryOperator(const UnaryOperator *E); bool VisitInitListExpr(const InitListExpr *E); }; } // end anonymous namespace static bool EvaluateComplex(const Expr *E, ComplexValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isAnyComplexType()); return ComplexExprEvaluator(Info, Result).Visit(E); } bool ComplexExprEvaluator::ZeroInitialization(const Expr *E) { QualType ElemTy = E->getType()->castAs()->getElementType(); if (ElemTy->isRealFloatingType()) { Result.makeComplexFloat(); APFloat Zero = APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)); Result.FloatReal = Zero; Result.FloatImag = Zero; } else { Result.makeComplexInt(); APSInt Zero = Info.Ctx.MakeIntValue(0, ElemTy); Result.IntReal = Zero; Result.IntImag = Zero; } return true; } bool ComplexExprEvaluator::VisitImaginaryLiteral(const ImaginaryLiteral *E) { const Expr* SubExpr = E->getSubExpr(); if (SubExpr->getType()->isRealFloatingType()) { Result.makeComplexFloat(); APFloat &Imag = Result.FloatImag; if (!EvaluateFloat(SubExpr, Imag, Info)) return false; Result.FloatReal = APFloat(Imag.getSemantics()); return true; } else { assert(SubExpr->getType()->isIntegerType() && "Unexpected imaginary literal."); Result.makeComplexInt(); APSInt &Imag = Result.IntImag; if (!EvaluateInteger(SubExpr, Imag, Info)) return false; Result.IntReal = APSInt(Imag.getBitWidth(), !Imag.isSigned()); return true; } } bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { case CK_BitCast: case CK_BaseToDerived: case CK_DerivedToBase: case CK_UncheckedDerivedToBase: case CK_Dynamic: case CK_ToUnion: case CK_ArrayToPointerDecay: case CK_FunctionToPointerDecay: case CK_NullToPointer: case CK_NullToMemberPointer: case CK_BaseToDerivedMemberPointer: case CK_DerivedToBaseMemberPointer: case CK_MemberPointerToBoolean: case CK_ReinterpretMemberPointer: case CK_ConstructorConversion: case CK_IntegralToPointer: case CK_PointerToIntegral: case CK_PointerToBoolean: case CK_ToVoid: case CK_VectorSplat: case CK_IntegralCast: case CK_BooleanToSignedIntegral: case CK_IntegralToBoolean: case CK_IntegralToFloating: case CK_FloatingToIntegral: case CK_FloatingToBoolean: case CK_FloatingCast: case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: case CK_AnyPointerToBlockPointerCast: case CK_ObjCObjectLValueCast: case CK_FloatingComplexToReal: case CK_FloatingComplexToBoolean: case CK_IntegralComplexToReal: case CK_IntegralComplexToBoolean: case CK_ARCProduceObject: case CK_ARCConsumeObject: case CK_ARCReclaimReturnedObject: case CK_ARCExtendBlockObject: case CK_CopyAndAutoreleaseBlockObject: case CK_BuiltinFnToFnPtr: case CK_ZeroToOCLOpaqueType: case CK_NonAtomicToAtomic: case CK_AddressSpaceConversion: case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_FixedPointToBoolean: case CK_FixedPointToIntegral: case CK_IntegralToFixedPoint: llvm_unreachable("invalid cast kind for complex value"); case CK_LValueToRValue: case CK_AtomicToNonAtomic: case CK_NoOp: case CK_LValueToRValueBitCast: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_Dependent: case CK_LValueBitCast: case CK_UserDefinedConversion: return Error(E); case CK_FloatingRealToComplex: { APFloat &Real = Result.FloatReal; if (!EvaluateFloat(E->getSubExpr(), Real, Info)) return false; Result.makeComplexFloat(); Result.FloatImag = APFloat(Real.getSemantics()); return true; } case CK_FloatingComplexCast: { if (!Visit(E->getSubExpr())) return false; QualType To = E->getType()->castAs()->getElementType(); QualType From = E->getSubExpr()->getType()->castAs()->getElementType(); return HandleFloatToFloatCast(Info, E, From, To, Result.FloatReal) && HandleFloatToFloatCast(Info, E, From, To, Result.FloatImag); } case CK_FloatingComplexToIntegralComplex: { if (!Visit(E->getSubExpr())) return false; QualType To = E->getType()->castAs()->getElementType(); QualType From = E->getSubExpr()->getType()->castAs()->getElementType(); Result.makeComplexInt(); return HandleFloatToIntCast(Info, E, From, Result.FloatReal, To, Result.IntReal) && HandleFloatToIntCast(Info, E, From, Result.FloatImag, To, Result.IntImag); } case CK_IntegralRealToComplex: { APSInt &Real = Result.IntReal; if (!EvaluateInteger(E->getSubExpr(), Real, Info)) return false; Result.makeComplexInt(); Result.IntImag = APSInt(Real.getBitWidth(), !Real.isSigned()); return true; } case CK_IntegralComplexCast: { if (!Visit(E->getSubExpr())) return false; QualType To = E->getType()->castAs()->getElementType(); QualType From = E->getSubExpr()->getType()->castAs()->getElementType(); Result.IntReal = HandleIntToIntCast(Info, E, To, From, Result.IntReal); Result.IntImag = HandleIntToIntCast(Info, E, To, From, Result.IntImag); return true; } case CK_IntegralComplexToFloatingComplex: { if (!Visit(E->getSubExpr())) return false; QualType To = E->getType()->castAs()->getElementType(); QualType From = E->getSubExpr()->getType()->castAs()->getElementType(); Result.makeComplexFloat(); return HandleIntToFloatCast(Info, E, From, Result.IntReal, To, Result.FloatReal) && HandleIntToFloatCast(Info, E, From, Result.IntImag, To, Result.FloatImag); } } llvm_unreachable("unknown cast resulting in complex value"); } bool ComplexExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { if (E->isPtrMemOp() || E->isAssignmentOp() || E->getOpcode() == BO_Comma) return ExprEvaluatorBaseTy::VisitBinaryOperator(E); // Track whether the LHS or RHS is real at the type system level. When this is // the case we can simplify our evaluation strategy. bool LHSReal = false, RHSReal = false; bool LHSOK; if (E->getLHS()->getType()->isRealFloatingType()) { LHSReal = true; APFloat &Real = Result.FloatReal; LHSOK = EvaluateFloat(E->getLHS(), Real, Info); if (LHSOK) { Result.makeComplexFloat(); Result.FloatImag = APFloat(Real.getSemantics()); } } else { LHSOK = Visit(E->getLHS()); } if (!LHSOK && !Info.noteFailure()) return false; ComplexValue RHS; if (E->getRHS()->getType()->isRealFloatingType()) { RHSReal = true; APFloat &Real = RHS.FloatReal; if (!EvaluateFloat(E->getRHS(), Real, Info) || !LHSOK) return false; RHS.makeComplexFloat(); RHS.FloatImag = APFloat(Real.getSemantics()); } else if (!EvaluateComplex(E->getRHS(), RHS, Info) || !LHSOK) return false; assert(!(LHSReal && RHSReal) && "Cannot have both operands of a complex operation be real."); switch (E->getOpcode()) { default: return Error(E); case BO_Add: if (Result.isComplexFloat()) { Result.getComplexFloatReal().add(RHS.getComplexFloatReal(), APFloat::rmNearestTiesToEven); if (LHSReal) Result.getComplexFloatImag() = RHS.getComplexFloatImag(); else if (!RHSReal) Result.getComplexFloatImag().add(RHS.getComplexFloatImag(), APFloat::rmNearestTiesToEven); } else { Result.getComplexIntReal() += RHS.getComplexIntReal(); Result.getComplexIntImag() += RHS.getComplexIntImag(); } break; case BO_Sub: if (Result.isComplexFloat()) { Result.getComplexFloatReal().subtract(RHS.getComplexFloatReal(), APFloat::rmNearestTiesToEven); if (LHSReal) { Result.getComplexFloatImag() = RHS.getComplexFloatImag(); Result.getComplexFloatImag().changeSign(); } else if (!RHSReal) { Result.getComplexFloatImag().subtract(RHS.getComplexFloatImag(), APFloat::rmNearestTiesToEven); } } else { Result.getComplexIntReal() -= RHS.getComplexIntReal(); Result.getComplexIntImag() -= RHS.getComplexIntImag(); } break; case BO_Mul: if (Result.isComplexFloat()) { // This is an implementation of complex multiplication according to the // constraints laid out in C11 Annex G. The implementation uses the // following naming scheme: // (a + ib) * (c + id) ComplexValue LHS = Result; APFloat &A = LHS.getComplexFloatReal(); APFloat &B = LHS.getComplexFloatImag(); APFloat &C = RHS.getComplexFloatReal(); APFloat &D = RHS.getComplexFloatImag(); APFloat &ResR = Result.getComplexFloatReal(); APFloat &ResI = Result.getComplexFloatImag(); if (LHSReal) { assert(!RHSReal && "Cannot have two real operands for a complex op!"); ResR = A * C; ResI = A * D; } else if (RHSReal) { ResR = C * A; ResI = C * B; } else { // In the fully general case, we need to handle NaNs and infinities // robustly. APFloat AC = A * C; APFloat BD = B * D; APFloat AD = A * D; APFloat BC = B * C; ResR = AC - BD; ResI = AD + BC; if (ResR.isNaN() && ResI.isNaN()) { bool Recalc = false; if (A.isInfinity() || B.isInfinity()) { A = APFloat::copySign( APFloat(A.getSemantics(), A.isInfinity() ? 1 : 0), A); B = APFloat::copySign( APFloat(B.getSemantics(), B.isInfinity() ? 1 : 0), B); if (C.isNaN()) C = APFloat::copySign(APFloat(C.getSemantics()), C); if (D.isNaN()) D = APFloat::copySign(APFloat(D.getSemantics()), D); Recalc = true; } if (C.isInfinity() || D.isInfinity()) { C = APFloat::copySign( APFloat(C.getSemantics(), C.isInfinity() ? 1 : 0), C); D = APFloat::copySign( APFloat(D.getSemantics(), D.isInfinity() ? 1 : 0), D); if (A.isNaN()) A = APFloat::copySign(APFloat(A.getSemantics()), A); if (B.isNaN()) B = APFloat::copySign(APFloat(B.getSemantics()), B); Recalc = true; } if (!Recalc && (AC.isInfinity() || BD.isInfinity() || AD.isInfinity() || BC.isInfinity())) { if (A.isNaN()) A = APFloat::copySign(APFloat(A.getSemantics()), A); if (B.isNaN()) B = APFloat::copySign(APFloat(B.getSemantics()), B); if (C.isNaN()) C = APFloat::copySign(APFloat(C.getSemantics()), C); if (D.isNaN()) D = APFloat::copySign(APFloat(D.getSemantics()), D); Recalc = true; } if (Recalc) { ResR = APFloat::getInf(A.getSemantics()) * (A * C - B * D); ResI = APFloat::getInf(A.getSemantics()) * (A * D + B * C); } } } } else { ComplexValue LHS = Result; Result.getComplexIntReal() = (LHS.getComplexIntReal() * RHS.getComplexIntReal() - LHS.getComplexIntImag() * RHS.getComplexIntImag()); Result.getComplexIntImag() = (LHS.getComplexIntReal() * RHS.getComplexIntImag() + LHS.getComplexIntImag() * RHS.getComplexIntReal()); } break; case BO_Div: if (Result.isComplexFloat()) { // This is an implementation of complex division according to the // constraints laid out in C11 Annex G. The implementation uses the // following naming scheme: // (a + ib) / (c + id) ComplexValue LHS = Result; APFloat &A = LHS.getComplexFloatReal(); APFloat &B = LHS.getComplexFloatImag(); APFloat &C = RHS.getComplexFloatReal(); APFloat &D = RHS.getComplexFloatImag(); APFloat &ResR = Result.getComplexFloatReal(); APFloat &ResI = Result.getComplexFloatImag(); if (RHSReal) { ResR = A / C; ResI = B / C; } else { if (LHSReal) { // No real optimizations we can do here, stub out with zero. B = APFloat::getZero(A.getSemantics()); } int DenomLogB = 0; APFloat MaxCD = maxnum(abs(C), abs(D)); if (MaxCD.isFinite()) { DenomLogB = ilogb(MaxCD); C = scalbn(C, -DenomLogB, APFloat::rmNearestTiesToEven); D = scalbn(D, -DenomLogB, APFloat::rmNearestTiesToEven); } APFloat Denom = C * C + D * D; ResR = scalbn((A * C + B * D) / Denom, -DenomLogB, APFloat::rmNearestTiesToEven); ResI = scalbn((B * C - A * D) / Denom, -DenomLogB, APFloat::rmNearestTiesToEven); if (ResR.isNaN() && ResI.isNaN()) { if (Denom.isPosZero() && (!A.isNaN() || !B.isNaN())) { ResR = APFloat::getInf(ResR.getSemantics(), C.isNegative()) * A; ResI = APFloat::getInf(ResR.getSemantics(), C.isNegative()) * B; } else if ((A.isInfinity() || B.isInfinity()) && C.isFinite() && D.isFinite()) { A = APFloat::copySign( APFloat(A.getSemantics(), A.isInfinity() ? 1 : 0), A); B = APFloat::copySign( APFloat(B.getSemantics(), B.isInfinity() ? 1 : 0), B); ResR = APFloat::getInf(ResR.getSemantics()) * (A * C + B * D); ResI = APFloat::getInf(ResI.getSemantics()) * (B * C - A * D); } else if (MaxCD.isInfinity() && A.isFinite() && B.isFinite()) { C = APFloat::copySign( APFloat(C.getSemantics(), C.isInfinity() ? 1 : 0), C); D = APFloat::copySign( APFloat(D.getSemantics(), D.isInfinity() ? 1 : 0), D); ResR = APFloat::getZero(ResR.getSemantics()) * (A * C + B * D); ResI = APFloat::getZero(ResI.getSemantics()) * (B * C - A * D); } } } } else { if (RHS.getComplexIntReal() == 0 && RHS.getComplexIntImag() == 0) return Error(E, diag::note_expr_divide_by_zero); ComplexValue LHS = Result; APSInt Den = RHS.getComplexIntReal() * RHS.getComplexIntReal() + RHS.getComplexIntImag() * RHS.getComplexIntImag(); Result.getComplexIntReal() = (LHS.getComplexIntReal() * RHS.getComplexIntReal() + LHS.getComplexIntImag() * RHS.getComplexIntImag()) / Den; Result.getComplexIntImag() = (LHS.getComplexIntImag() * RHS.getComplexIntReal() - LHS.getComplexIntReal() * RHS.getComplexIntImag()) / Den; } break; } return true; } bool ComplexExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) { // Get the operand value into 'Result'. if (!Visit(E->getSubExpr())) return false; switch (E->getOpcode()) { default: return Error(E); case UO_Extension: return true; case UO_Plus: // The result is always just the subexpr. return true; case UO_Minus: if (Result.isComplexFloat()) { Result.getComplexFloatReal().changeSign(); Result.getComplexFloatImag().changeSign(); } else { Result.getComplexIntReal() = -Result.getComplexIntReal(); Result.getComplexIntImag() = -Result.getComplexIntImag(); } return true; case UO_Not: if (Result.isComplexFloat()) Result.getComplexFloatImag().changeSign(); else Result.getComplexIntImag() = -Result.getComplexIntImag(); return true; } } bool ComplexExprEvaluator::VisitInitListExpr(const InitListExpr *E) { if (E->getNumInits() == 2) { if (E->getType()->isComplexType()) { Result.makeComplexFloat(); if (!EvaluateFloat(E->getInit(0), Result.FloatReal, Info)) return false; if (!EvaluateFloat(E->getInit(1), Result.FloatImag, Info)) return false; } else { Result.makeComplexInt(); if (!EvaluateInteger(E->getInit(0), Result.IntReal, Info)) return false; if (!EvaluateInteger(E->getInit(1), Result.IntImag, Info)) return false; } return true; } return ExprEvaluatorBaseTy::VisitInitListExpr(E); } //===----------------------------------------------------------------------===// // Atomic expression evaluation, essentially just handling the NonAtomicToAtomic // implicit conversion. //===----------------------------------------------------------------------===// namespace { class AtomicExprEvaluator : public ExprEvaluatorBase { const LValue *This; APValue &Result; public: AtomicExprEvaluator(EvalInfo &Info, const LValue *This, APValue &Result) : ExprEvaluatorBaseTy(Info), This(This), Result(Result) {} bool Success(const APValue &V, const Expr *E) { Result = V; return true; } bool ZeroInitialization(const Expr *E) { ImplicitValueInitExpr VIE( E->getType()->castAs()->getValueType()); // For atomic-qualified class (and array) types in C++, initialize the // _Atomic-wrapped subobject directly, in-place. return This ? EvaluateInPlace(Result, Info, *This, &VIE) : Evaluate(Result, Info, &VIE); } bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_NonAtomicToAtomic: return This ? EvaluateInPlace(Result, Info, *This, E->getSubExpr()) : Evaluate(Result, Info, E->getSubExpr()); } } }; } // end anonymous namespace static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isAtomicType()); return AtomicExprEvaluator(Info, This, Result).Visit(E); } //===----------------------------------------------------------------------===// // Void expression evaluation, primarily for a cast to void on the LHS of a // comma operator //===----------------------------------------------------------------------===// namespace { class VoidExprEvaluator : public ExprEvaluatorBase { public: VoidExprEvaluator(EvalInfo &Info) : ExprEvaluatorBaseTy(Info) {} bool Success(const APValue &V, const Expr *e) { return true; } bool ZeroInitialization(const Expr *E) { return true; } bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_ToVoid: VisitIgnoredValue(E->getSubExpr()); return true; } } bool VisitCallExpr(const CallExpr *E) { switch (E->getBuiltinCallee()) { case Builtin::BI__assume: case Builtin::BI__builtin_assume: // The argument is not evaluated! return true; case Builtin::BI__builtin_operator_delete: return HandleOperatorDeleteCall(Info, E); default: break; } return ExprEvaluatorBaseTy::VisitCallExpr(E); } bool VisitCXXDeleteExpr(const CXXDeleteExpr *E); }; } // end anonymous namespace bool VoidExprEvaluator::VisitCXXDeleteExpr(const CXXDeleteExpr *E) { // We cannot speculatively evaluate a delete expression. if (Info.SpeculativeEvaluationDepth) return false; FunctionDecl *OperatorDelete = E->getOperatorDelete(); if (!OperatorDelete->isReplaceableGlobalAllocationFunction()) { Info.FFDiag(E, diag::note_constexpr_new_non_replaceable) << isa(OperatorDelete) << OperatorDelete; return false; } const Expr *Arg = E->getArgument(); LValue Pointer; if (!EvaluatePointer(Arg, Pointer, Info)) return false; if (Pointer.Designator.Invalid) return false; // Deleting a null pointer has no effect. if (Pointer.isNullPointer()) { // This is the only case where we need to produce an extension warning: // the only other way we can succeed is if we find a dynamic allocation, // and we will have warned when we allocated it in that case. if (!Info.getLangOpts().CPlusPlus20) Info.CCEDiag(E, diag::note_constexpr_new); return true; } Optional Alloc = CheckDeleteKind( Info, E, Pointer, E->isArrayForm() ? DynAlloc::ArrayNew : DynAlloc::New); if (!Alloc) return false; QualType AllocType = Pointer.Base.getDynamicAllocType(); // For the non-array case, the designator must be empty if the static type // does not have a virtual destructor. if (!E->isArrayForm() && Pointer.Designator.Entries.size() != 0 && !hasVirtualDestructor(Arg->getType()->getPointeeType())) { Info.FFDiag(E, diag::note_constexpr_delete_base_nonvirt_dtor) << Arg->getType()->getPointeeType() << AllocType; return false; } // For a class type with a virtual destructor, the selected operator delete // is the one looked up when building the destructor. if (!E->isArrayForm() && !E->isGlobalDelete()) { const FunctionDecl *VirtualDelete = getVirtualOperatorDelete(AllocType); if (VirtualDelete && !VirtualDelete->isReplaceableGlobalAllocationFunction()) { Info.FFDiag(E, diag::note_constexpr_new_non_replaceable) << isa(VirtualDelete) << VirtualDelete; return false; } } if (!HandleDestruction(Info, E->getExprLoc(), Pointer.getLValueBase(), (*Alloc)->Value, AllocType)) return false; if (!Info.HeapAllocs.erase(Pointer.Base.dyn_cast())) { // The element was already erased. This means the destructor call also // deleted the object. // FIXME: This probably results in undefined behavior before we get this // far, and should be diagnosed elsewhere first. Info.FFDiag(E, diag::note_constexpr_double_delete); return false; } return true; } static bool EvaluateVoid(const Expr *E, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isVoidType()); return VoidExprEvaluator(Info).Visit(E); } //===----------------------------------------------------------------------===// // Top level Expr::EvaluateAsRValue method. //===----------------------------------------------------------------------===// static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E) { // In C, function designators are not lvalues, but we evaluate them as if they // are. QualType T = E->getType(); if (E->isGLValue() || T->isFunctionType()) { LValue LV; if (!EvaluateLValue(E, LV, Info)) return false; LV.moveInto(Result); } else if (T->isVectorType()) { if (!EvaluateVector(E, Result, Info)) return false; } else if (T->isIntegralOrEnumerationType()) { if (!IntExprEvaluator(Info, Result).Visit(E)) return false; } else if (T->hasPointerRepresentation()) { LValue LV; if (!EvaluatePointer(E, LV, Info)) return false; LV.moveInto(Result); } else if (T->isRealFloatingType()) { llvm::APFloat F(0.0); if (!EvaluateFloat(E, F, Info)) return false; Result = APValue(F); } else if (T->isAnyComplexType()) { ComplexValue C; if (!EvaluateComplex(E, C, Info)) return false; C.moveInto(Result); } else if (T->isFixedPointType()) { if (!FixedPointExprEvaluator(Info, Result).Visit(E)) return false; } else if (T->isMemberPointerType()) { MemberPtr P; if (!EvaluateMemberPointer(E, P, Info)) return false; P.moveInto(Result); return true; } else if (T->isArrayType()) { LValue LV; APValue &Value = Info.CurrentCall->createTemporary(E, T, false, LV); if (!EvaluateArray(E, LV, Value, Info)) return false; Result = Value; } else if (T->isRecordType()) { LValue LV; APValue &Value = Info.CurrentCall->createTemporary(E, T, false, LV); if (!EvaluateRecord(E, LV, Value, Info)) return false; Result = Value; } else if (T->isVoidType()) { if (!Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_nonliteral) << E->getType(); if (!EvaluateVoid(E, Info)) return false; } else if (T->isAtomicType()) { QualType Unqual = T.getAtomicUnqualifiedType(); if (Unqual->isArrayType() || Unqual->isRecordType()) { LValue LV; APValue &Value = Info.CurrentCall->createTemporary(E, Unqual, false, LV); if (!EvaluateAtomic(E, &LV, Value, Info)) return false; } else { if (!EvaluateAtomic(E, nullptr, Result, Info)) return false; } } else if (Info.getLangOpts().CPlusPlus11) { Info.FFDiag(E, diag::note_constexpr_nonliteral) << E->getType(); return false; } else { Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } return true; } /// EvaluateInPlace - Evaluate an expression in-place in an APValue. In some /// cases, the in-place evaluation is essential, since later initializers for /// an object can indirectly refer to subobjects which were initialized earlier. static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This, const Expr *E, bool AllowNonLiteralTypes) { assert(!E->isValueDependent()); if (!AllowNonLiteralTypes && !CheckLiteralType(Info, E, &This)) return false; if (E->isRValue()) { // Evaluate arrays and record types in-place, so that later initializers can // refer to earlier-initialized members of the object. QualType T = E->getType(); if (T->isArrayType()) return EvaluateArray(E, This, Result, Info); else if (T->isRecordType()) return EvaluateRecord(E, This, Result, Info); else if (T->isAtomicType()) { QualType Unqual = T.getAtomicUnqualifiedType(); if (Unqual->isArrayType() || Unqual->isRecordType()) return EvaluateAtomic(E, &This, Result, Info); } } // For any other type, in-place evaluation is unimportant. return Evaluate(Result, Info, E); } /// EvaluateAsRValue - Try to evaluate this expression, performing an implicit /// lvalue-to-rvalue cast if it is an lvalue. static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result) { if (Info.EnableNewConstInterp) { if (!Info.Ctx.getInterpContext().evaluateAsRValue(Info, E, Result)) return false; } else { if (E->getType().isNull()) return false; if (!CheckLiteralType(Info, E)) return false; if (!::Evaluate(Result, Info, E)) return false; if (E->isGLValue()) { LValue LV; LV.setFrom(Info.Ctx, Result); if (!handleLValueToRValueConversion(Info, E, E->getType(), LV, Result)) return false; } } // Check this core constant expression is a constant expression. return CheckConstantExpression(Info, E->getExprLoc(), E->getType(), Result) && CheckMemoryLeaks(Info); } static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result, const ASTContext &Ctx, bool &IsConst) { // Fast-path evaluations of integer literals, since we sometimes see files // containing vast quantities of these. if (const IntegerLiteral *L = dyn_cast(Exp)) { Result.Val = APValue(APSInt(L->getValue(), L->getType()->isUnsignedIntegerType())); IsConst = true; return true; } // This case should be rare, but we need to check it before we check on // the type below. if (Exp->getType().isNull()) { IsConst = false; return true; } // FIXME: Evaluating values of large array and record types can cause // performance problems. Only do so in C++11 for now. if (Exp->isRValue() && (Exp->getType()->isArrayType() || Exp->getType()->isRecordType()) && !Ctx.getLangOpts().CPlusPlus11) { IsConst = false; return true; } return false; } static bool hasUnacceptableSideEffect(Expr::EvalStatus &Result, Expr::SideEffectsKind SEK) { return (SEK < Expr::SE_AllowSideEffects && Result.HasSideEffects) || (SEK < Expr::SE_AllowUndefinedBehavior && Result.HasUndefinedBehavior); } static bool EvaluateAsRValue(const Expr *E, Expr::EvalResult &Result, const ASTContext &Ctx, EvalInfo &Info) { bool IsConst; if (FastEvaluateAsRValue(E, Result, Ctx, IsConst)) return IsConst; return EvaluateAsRValue(Info, E, Result.Val); } static bool EvaluateAsInt(const Expr *E, Expr::EvalResult &ExprResult, const ASTContext &Ctx, Expr::SideEffectsKind AllowSideEffects, EvalInfo &Info) { if (!E->getType()->isIntegralOrEnumerationType()) return false; if (!::EvaluateAsRValue(E, ExprResult, Ctx, Info) || !ExprResult.Val.isInt() || hasUnacceptableSideEffect(ExprResult, AllowSideEffects)) return false; return true; } static bool EvaluateAsFixedPoint(const Expr *E, Expr::EvalResult &ExprResult, const ASTContext &Ctx, Expr::SideEffectsKind AllowSideEffects, EvalInfo &Info) { if (!E->getType()->isFixedPointType()) return false; if (!::EvaluateAsRValue(E, ExprResult, Ctx, Info)) return false; if (!ExprResult.Val.isFixedPoint() || hasUnacceptableSideEffect(ExprResult, AllowSideEffects)) return false; return true; } /// EvaluateAsRValue - Return true if this is a constant which we can fold using /// any crazy technique (that has nothing to do with language standards) that /// we want to. If this function returns true, it returns the folded constant /// in Result. If this expression is a glvalue, an lvalue-to-rvalue conversion /// will be applied to the result. bool Expr::EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); EvalInfo Info(Ctx, Result, EvalInfo::EM_IgnoreSideEffects); Info.InConstantContext = InConstantContext; return ::EvaluateAsRValue(this, Result, Ctx, Info); } bool Expr::EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); EvalResult Scratch; return EvaluateAsRValue(Scratch, Ctx, InConstantContext) && HandleConversionToBool(Scratch.Val, Result); } bool Expr::EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects, bool InConstantContext) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); EvalInfo Info(Ctx, Result, EvalInfo::EM_IgnoreSideEffects); Info.InConstantContext = InConstantContext; return ::EvaluateAsInt(this, Result, Ctx, AllowSideEffects, Info); } bool Expr::EvaluateAsFixedPoint(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects, bool InConstantContext) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); EvalInfo Info(Ctx, Result, EvalInfo::EM_IgnoreSideEffects); Info.InConstantContext = InConstantContext; return ::EvaluateAsFixedPoint(this, Result, Ctx, AllowSideEffects, Info); } bool Expr::EvaluateAsFloat(APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects, bool InConstantContext) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); if (!getType()->isRealFloatingType()) return false; EvalResult ExprResult; if (!EvaluateAsRValue(ExprResult, Ctx, InConstantContext) || !ExprResult.Val.isFloat() || hasUnacceptableSideEffect(ExprResult, AllowSideEffects)) return false; Result = ExprResult.Val.getFloat(); return true; } bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); EvalInfo Info(Ctx, Result, EvalInfo::EM_ConstantFold); Info.InConstantContext = InConstantContext; LValue LV; CheckedTemporaries CheckedTemps; if (!EvaluateLValue(this, LV, Info) || !Info.discardCleanups() || Result.HasSideEffects || !CheckLValueConstantExpression(Info, getExprLoc(), Ctx.getLValueReferenceType(getType()), LV, Expr::EvaluateForCodeGen, CheckedTemps)) return false; LV.moveInto(Result.Val); return true; } bool Expr::EvaluateAsConstantExpr(EvalResult &Result, ConstExprUsage Usage, const ASTContext &Ctx, bool InPlace) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); EvalInfo::EvaluationMode EM = EvalInfo::EM_ConstantExpression; EvalInfo Info(Ctx, Result, EM); Info.InConstantContext = true; if (InPlace) { Info.setEvaluatingDecl(this, Result.Val); LValue LVal; LVal.set(this); if (!::EvaluateInPlace(Result.Val, Info, LVal, this) || Result.HasSideEffects) return false; } else if (!::Evaluate(Result.Val, Info, this) || Result.HasSideEffects) return false; if (!Info.discardCleanups()) llvm_unreachable("Unhandled cleanup; missing full expression marker?"); return CheckConstantExpression(Info, getExprLoc(), getStorageType(Ctx, this), Result.Val, Usage) && CheckMemoryLeaks(Info); } bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, const VarDecl *VD, SmallVectorImpl &Notes) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); // FIXME: Evaluating initializers for large array and record types can cause // performance problems. Only do so in C++11 for now. if (isRValue() && (getType()->isArrayType() || getType()->isRecordType()) && !Ctx.getLangOpts().CPlusPlus11) return false; Expr::EvalStatus EStatus; EStatus.Diag = &Notes; EvalInfo Info(Ctx, EStatus, VD->isConstexpr() ? EvalInfo::EM_ConstantExpression : EvalInfo::EM_ConstantFold); Info.setEvaluatingDecl(VD, Value); Info.InConstantContext = true; SourceLocation DeclLoc = VD->getLocation(); QualType DeclTy = VD->getType(); if (Info.EnableNewConstInterp) { auto &InterpCtx = const_cast(Ctx).getInterpContext(); if (!InterpCtx.evaluateAsInitializer(Info, VD, Value)) return false; } else { LValue LVal; LVal.set(VD); if (!EvaluateInPlace(Value, Info, LVal, this, /*AllowNonLiteralTypes=*/true) || EStatus.HasSideEffects) return false; // At this point, any lifetime-extended temporaries are completely // initialized. Info.performLifetimeExtension(); if (!Info.discardCleanups()) llvm_unreachable("Unhandled cleanup; missing full expression marker?"); } return CheckConstantExpression(Info, DeclLoc, DeclTy, Value) && CheckMemoryLeaks(Info); } bool VarDecl::evaluateDestruction( SmallVectorImpl &Notes) const { Expr::EvalStatus EStatus; EStatus.Diag = &Notes; // Make a copy of the value for the destructor to mutate, if we know it. // Otherwise, treat the value as default-initialized; if the destructor works // anyway, then the destruction is constant (and must be essentially empty). APValue DestroyedValue; if (getEvaluatedValue() && !getEvaluatedValue()->isAbsent()) DestroyedValue = *getEvaluatedValue(); else if (!getDefaultInitValue(getType(), DestroyedValue)) return false; EvalInfo Info(getASTContext(), EStatus, EvalInfo::EM_ConstantExpression); Info.setEvaluatingDecl(this, DestroyedValue, EvalInfo::EvaluatingDeclKind::Dtor); Info.InConstantContext = true; SourceLocation DeclLoc = getLocation(); QualType DeclTy = getType(); LValue LVal; LVal.set(this); if (!HandleDestruction(Info, DeclLoc, LVal.Base, DestroyedValue, DeclTy) || EStatus.HasSideEffects) return false; if (!Info.discardCleanups()) llvm_unreachable("Unhandled cleanup; missing full expression marker?"); ensureEvaluatedStmt()->HasConstantDestruction = true; return true; } /// isEvaluatable - Call EvaluateAsRValue to see if this expression can be /// constant folded, but discard the result. bool Expr::isEvaluatable(const ASTContext &Ctx, SideEffectsKind SEK) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); EvalResult Result; return EvaluateAsRValue(Result, Ctx, /* in constant context */ true) && !hasUnacceptableSideEffect(Result, SEK); } APSInt Expr::EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl *Diag) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); EvalResult EVResult; EVResult.Diag = Diag; EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); Info.InConstantContext = true; bool Result = ::EvaluateAsRValue(this, EVResult, Ctx, Info); (void)Result; assert(Result && "Could not evaluate expression"); assert(EVResult.Val.isInt() && "Expression did not evaluate to integer"); return EVResult.Val.getInt(); } APSInt Expr::EvaluateKnownConstIntCheckOverflow( const ASTContext &Ctx, SmallVectorImpl *Diag) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); EvalResult EVResult; EVResult.Diag = Diag; EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); Info.InConstantContext = true; Info.CheckingForUndefinedBehavior = true; bool Result = ::EvaluateAsRValue(Info, this, EVResult.Val); (void)Result; assert(Result && "Could not evaluate expression"); assert(EVResult.Val.isInt() && "Expression did not evaluate to integer"); return EVResult.Val.getInt(); } void Expr::EvaluateForOverflow(const ASTContext &Ctx) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); bool IsConst; EvalResult EVResult; if (!FastEvaluateAsRValue(this, EVResult, Ctx, IsConst)) { EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); Info.CheckingForUndefinedBehavior = true; (void)::EvaluateAsRValue(Info, this, EVResult.Val); } } bool Expr::EvalResult::isGlobalLValue() const { assert(Val.isLValue()); return IsGlobalLValue(Val.getLValueBase()); } /// isIntegerConstantExpr - this recursive routine will test if an expression is /// an integer constant expression. /// FIXME: Pass up a reason why! Invalid operation in i-c-e, division by zero, /// comma, etc // CheckICE - This function does the fundamental ICE checking: the returned // ICEDiag contains an ICEKind indicating whether the expression is an ICE, // and a (possibly null) SourceLocation indicating the location of the problem. // // Note that to reduce code duplication, this helper does no evaluation // itself; the caller checks whether the expression is evaluatable, and // in the rare cases where CheckICE actually cares about the evaluated // value, it calls into Evaluate. namespace { enum ICEKind { /// This expression is an ICE. IK_ICE, /// This expression is not an ICE, but if it isn't evaluated, it's /// a legal subexpression for an ICE. This return value is used to handle /// the comma operator in C99 mode, and non-constant subexpressions. IK_ICEIfUnevaluated, /// This expression is not an ICE, and is not a legal subexpression for one. IK_NotICE }; struct ICEDiag { ICEKind Kind; SourceLocation Loc; ICEDiag(ICEKind IK, SourceLocation l) : Kind(IK), Loc(l) {} }; } static ICEDiag NoDiag() { return ICEDiag(IK_ICE, SourceLocation()); } static ICEDiag Worst(ICEDiag A, ICEDiag B) { return A.Kind >= B.Kind ? A : B; } static ICEDiag CheckEvalInICE(const Expr* E, const ASTContext &Ctx) { Expr::EvalResult EVResult; Expr::EvalStatus Status; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression); Info.InConstantContext = true; if (!::EvaluateAsRValue(E, EVResult, Ctx, Info) || EVResult.HasSideEffects || !EVResult.Val.isInt()) return ICEDiag(IK_NotICE, E->getBeginLoc()); return NoDiag(); } static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { assert(!E->isValueDependent() && "Should not see value dependent exprs!"); if (!E->getType()->isIntegralOrEnumerationType()) return ICEDiag(IK_NotICE, E->getBeginLoc()); switch (E->getStmtClass()) { #define ABSTRACT_STMT(Node) #define STMT(Node, Base) case Expr::Node##Class: #define EXPR(Node, Base) #include "clang/AST/StmtNodes.inc" case Expr::PredefinedExprClass: case Expr::FloatingLiteralClass: case Expr::ImaginaryLiteralClass: case Expr::StringLiteralClass: case Expr::ArraySubscriptExprClass: case Expr::MatrixSubscriptExprClass: case Expr::OMPArraySectionExprClass: case Expr::OMPArrayShapingExprClass: case Expr::OMPIteratorExprClass: case Expr::MemberExprClass: case Expr::CompoundAssignOperatorClass: case Expr::CompoundLiteralExprClass: case Expr::ExtVectorElementExprClass: case Expr::DesignatedInitExprClass: case Expr::ArrayInitLoopExprClass: case Expr::ArrayInitIndexExprClass: case Expr::NoInitExprClass: case Expr::DesignatedInitUpdateExprClass: case Expr::ImplicitValueInitExprClass: case Expr::ParenListExprClass: case Expr::VAArgExprClass: case Expr::AddrLabelExprClass: case Expr::StmtExprClass: case Expr::CXXMemberCallExprClass: case Expr::CUDAKernelCallExprClass: case Expr::CXXAddrspaceCastExprClass: case Expr::CXXDynamicCastExprClass: case Expr::CXXTypeidExprClass: case Expr::CXXUuidofExprClass: case Expr::MSPropertyRefExprClass: case Expr::MSPropertySubscriptExprClass: case Expr::CXXNullPtrLiteralExprClass: case Expr::UserDefinedLiteralClass: case Expr::CXXThisExprClass: case Expr::CXXThrowExprClass: case Expr::CXXNewExprClass: case Expr::CXXDeleteExprClass: case Expr::CXXPseudoDestructorExprClass: case Expr::UnresolvedLookupExprClass: case Expr::TypoExprClass: case Expr::RecoveryExprClass: case Expr::DependentScopeDeclRefExprClass: case Expr::CXXConstructExprClass: case Expr::CXXInheritedCtorInitExprClass: case Expr::CXXStdInitializerListExprClass: case Expr::CXXBindTemporaryExprClass: case Expr::ExprWithCleanupsClass: case Expr::CXXTemporaryObjectExprClass: case Expr::CXXUnresolvedConstructExprClass: case Expr::CXXDependentScopeMemberExprClass: case Expr::UnresolvedMemberExprClass: case Expr::ObjCStringLiteralClass: case Expr::ObjCBoxedExprClass: case Expr::ObjCArrayLiteralClass: case Expr::ObjCDictionaryLiteralClass: case Expr::ObjCEncodeExprClass: case Expr::ObjCMessageExprClass: case Expr::ObjCSelectorExprClass: case Expr::ObjCProtocolExprClass: case Expr::ObjCIvarRefExprClass: case Expr::ObjCPropertyRefExprClass: case Expr::ObjCSubscriptRefExprClass: case Expr::ObjCIsaExprClass: case Expr::ObjCAvailabilityCheckExprClass: case Expr::ShuffleVectorExprClass: case Expr::ConvertVectorExprClass: case Expr::BlockExprClass: case Expr::NoStmtClass: case Expr::OpaqueValueExprClass: case Expr::PackExpansionExprClass: case Expr::SubstNonTypeTemplateParmPackExprClass: case Expr::FunctionParmPackExprClass: case Expr::AsTypeExprClass: case Expr::ObjCIndirectCopyRestoreExprClass: case Expr::MaterializeTemporaryExprClass: case Expr::PseudoObjectExprClass: case Expr::AtomicExprClass: case Expr::LambdaExprClass: case Expr::CXXFoldExprClass: case Expr::CoawaitExprClass: case Expr::DependentCoawaitExprClass: case Expr::CoyieldExprClass: return ICEDiag(IK_NotICE, E->getBeginLoc()); case Expr::InitListExprClass: { // C++03 [dcl.init]p13: If T is a scalar type, then a declaration of the // form "T x = { a };" is equivalent to "T x = a;". // Unless we're initializing a reference, T is a scalar as it is known to be // of integral or enumeration type. if (E->isRValue()) if (cast(E)->getNumInits() == 1) return CheckICE(cast(E)->getInit(0), Ctx); return ICEDiag(IK_NotICE, E->getBeginLoc()); } case Expr::SizeOfPackExprClass: case Expr::GNUNullExprClass: case Expr::SourceLocExprClass: return NoDiag(); case Expr::SubstNonTypeTemplateParmExprClass: return CheckICE(cast(E)->getReplacement(), Ctx); case Expr::ConstantExprClass: return CheckICE(cast(E)->getSubExpr(), Ctx); case Expr::ParenExprClass: return CheckICE(cast(E)->getSubExpr(), Ctx); case Expr::GenericSelectionExprClass: return CheckICE(cast(E)->getResultExpr(), Ctx); case Expr::IntegerLiteralClass: case Expr::FixedPointLiteralClass: case Expr::CharacterLiteralClass: case Expr::ObjCBoolLiteralExprClass: case Expr::CXXBoolLiteralExprClass: case Expr::CXXScalarValueInitExprClass: case Expr::TypeTraitExprClass: case Expr::ConceptSpecializationExprClass: case Expr::RequiresExprClass: case Expr::ArrayTypeTraitExprClass: case Expr::ExpressionTraitExprClass: case Expr::CXXNoexceptExprClass: return NoDiag(); case Expr::CallExprClass: case Expr::CXXOperatorCallExprClass: { // C99 6.6/3 allows function calls within unevaluated subexpressions of // constant expressions, but they can never be ICEs because an ICE cannot // contain an operand of (pointer to) function type. const CallExpr *CE = cast(E); if (CE->getBuiltinCallee()) return CheckEvalInICE(E, Ctx); return ICEDiag(IK_NotICE, E->getBeginLoc()); } case Expr::CXXRewrittenBinaryOperatorClass: return CheckICE(cast(E)->getSemanticForm(), Ctx); case Expr::DeclRefExprClass: { if (isa(cast(E)->getDecl())) return NoDiag(); const ValueDecl *D = cast(E)->getDecl(); if (Ctx.getLangOpts().CPlusPlus && D && IsConstNonVolatile(D->getType())) { // Parameter variables are never constants. Without this check, // getAnyInitializer() can find a default argument, which leads // to chaos. if (isa(D)) return ICEDiag(IK_NotICE, cast(E)->getLocation()); // C++ 7.1.5.1p2 // A variable of non-volatile const-qualified integral or enumeration // type initialized by an ICE can be used in ICEs. if (const VarDecl *Dcl = dyn_cast(D)) { if (!Dcl->getType()->isIntegralOrEnumerationType()) return ICEDiag(IK_NotICE, cast(E)->getLocation()); const VarDecl *VD; // Look for a declaration of this variable that has an initializer, and // check whether it is an ICE. if (Dcl->getAnyInitializer(VD) && VD->checkInitIsICE()) return NoDiag(); else return ICEDiag(IK_NotICE, cast(E)->getLocation()); } } return ICEDiag(IK_NotICE, E->getBeginLoc()); } case Expr::UnaryOperatorClass: { const UnaryOperator *Exp = cast(E); switch (Exp->getOpcode()) { case UO_PostInc: case UO_PostDec: case UO_PreInc: case UO_PreDec: case UO_AddrOf: case UO_Deref: case UO_Coawait: // C99 6.6/3 allows increment and decrement within unevaluated // subexpressions of constant expressions, but they can never be ICEs // because an ICE cannot contain an lvalue operand. return ICEDiag(IK_NotICE, E->getBeginLoc()); case UO_Extension: case UO_LNot: case UO_Plus: case UO_Minus: case UO_Not: case UO_Real: case UO_Imag: return CheckICE(Exp->getSubExpr(), Ctx); } llvm_unreachable("invalid unary operator class"); } case Expr::OffsetOfExprClass: { // Note that per C99, offsetof must be an ICE. And AFAIK, using // EvaluateAsRValue matches the proposed gcc behavior for cases like // "offsetof(struct s{int x[4];}, x[1.0])". This doesn't affect // compliance: we should warn earlier for offsetof expressions with // array subscripts that aren't ICEs, and if the array subscripts // are ICEs, the value of the offsetof must be an integer constant. return CheckEvalInICE(E, Ctx); } case Expr::UnaryExprOrTypeTraitExprClass: { const UnaryExprOrTypeTraitExpr *Exp = cast(E); if ((Exp->getKind() == UETT_SizeOf) && Exp->getTypeOfArgument()->isVariableArrayType()) return ICEDiag(IK_NotICE, E->getBeginLoc()); return NoDiag(); } case Expr::BinaryOperatorClass: { const BinaryOperator *Exp = cast(E); switch (Exp->getOpcode()) { case BO_PtrMemD: case BO_PtrMemI: case BO_Assign: case BO_MulAssign: case BO_DivAssign: case BO_RemAssign: case BO_AddAssign: case BO_SubAssign: case BO_ShlAssign: case BO_ShrAssign: case BO_AndAssign: case BO_XorAssign: case BO_OrAssign: // C99 6.6/3 allows assignments within unevaluated subexpressions of // constant expressions, but they can never be ICEs because an ICE cannot // contain an lvalue operand. return ICEDiag(IK_NotICE, E->getBeginLoc()); case BO_Mul: case BO_Div: case BO_Rem: case BO_Add: case BO_Sub: case BO_Shl: case BO_Shr: case BO_LT: case BO_GT: case BO_LE: case BO_GE: case BO_EQ: case BO_NE: case BO_And: case BO_Xor: case BO_Or: case BO_Comma: case BO_Cmp: { ICEDiag LHSResult = CheckICE(Exp->getLHS(), Ctx); ICEDiag RHSResult = CheckICE(Exp->getRHS(), Ctx); if (Exp->getOpcode() == BO_Div || Exp->getOpcode() == BO_Rem) { // EvaluateAsRValue gives an error for undefined Div/Rem, so make sure // we don't evaluate one. if (LHSResult.Kind == IK_ICE && RHSResult.Kind == IK_ICE) { llvm::APSInt REval = Exp->getRHS()->EvaluateKnownConstInt(Ctx); if (REval == 0) return ICEDiag(IK_ICEIfUnevaluated, E->getBeginLoc()); if (REval.isSigned() && REval.isAllOnesValue()) { llvm::APSInt LEval = Exp->getLHS()->EvaluateKnownConstInt(Ctx); if (LEval.isMinSignedValue()) return ICEDiag(IK_ICEIfUnevaluated, E->getBeginLoc()); } } } if (Exp->getOpcode() == BO_Comma) { if (Ctx.getLangOpts().C99) { // C99 6.6p3 introduces a strange edge case: comma can be in an ICE // if it isn't evaluated. if (LHSResult.Kind == IK_ICE && RHSResult.Kind == IK_ICE) return ICEDiag(IK_ICEIfUnevaluated, E->getBeginLoc()); } else { // In both C89 and C++, commas in ICEs are illegal. return ICEDiag(IK_NotICE, E->getBeginLoc()); } } return Worst(LHSResult, RHSResult); } case BO_LAnd: case BO_LOr: { ICEDiag LHSResult = CheckICE(Exp->getLHS(), Ctx); ICEDiag RHSResult = CheckICE(Exp->getRHS(), Ctx); if (LHSResult.Kind == IK_ICE && RHSResult.Kind == IK_ICEIfUnevaluated) { // Rare case where the RHS has a comma "side-effect"; we need // to actually check the condition to see whether the side // with the comma is evaluated. if ((Exp->getOpcode() == BO_LAnd) != (Exp->getLHS()->EvaluateKnownConstInt(Ctx) == 0)) return RHSResult; return NoDiag(); } return Worst(LHSResult, RHSResult); } } llvm_unreachable("invalid binary operator kind"); } case Expr::ImplicitCastExprClass: case Expr::CStyleCastExprClass: case Expr::CXXFunctionalCastExprClass: case Expr::CXXStaticCastExprClass: case Expr::CXXReinterpretCastExprClass: case Expr::CXXConstCastExprClass: case Expr::ObjCBridgedCastExprClass: { const Expr *SubExpr = cast(E)->getSubExpr(); if (isa(E)) { if (const FloatingLiteral *FL = dyn_cast(SubExpr->IgnoreParenImpCasts())) { unsigned DestWidth = Ctx.getIntWidth(E->getType()); bool DestSigned = E->getType()->isSignedIntegerOrEnumerationType(); APSInt IgnoredVal(DestWidth, !DestSigned); bool Ignored; // If the value does not fit in the destination type, the behavior is // undefined, so we are not required to treat it as a constant // expression. if (FL->getValue().convertToInteger(IgnoredVal, llvm::APFloat::rmTowardZero, &Ignored) & APFloat::opInvalidOp) return ICEDiag(IK_NotICE, E->getBeginLoc()); return NoDiag(); } } switch (cast(E)->getCastKind()) { case CK_LValueToRValue: case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: case CK_NoOp: case CK_IntegralToBoolean: case CK_IntegralCast: return CheckICE(SubExpr, Ctx); default: return ICEDiag(IK_NotICE, E->getBeginLoc()); } } case Expr::BinaryConditionalOperatorClass: { const BinaryConditionalOperator *Exp = cast(E); ICEDiag CommonResult = CheckICE(Exp->getCommon(), Ctx); if (CommonResult.Kind == IK_NotICE) return CommonResult; ICEDiag FalseResult = CheckICE(Exp->getFalseExpr(), Ctx); if (FalseResult.Kind == IK_NotICE) return FalseResult; if (CommonResult.Kind == IK_ICEIfUnevaluated) return CommonResult; if (FalseResult.Kind == IK_ICEIfUnevaluated && Exp->getCommon()->EvaluateKnownConstInt(Ctx) != 0) return NoDiag(); return FalseResult; } case Expr::ConditionalOperatorClass: { const ConditionalOperator *Exp = cast(E); // If the condition (ignoring parens) is a __builtin_constant_p call, // then only the true side is actually considered in an integer constant // expression, and it is fully evaluated. This is an important GNU // extension. See GCC PR38377 for discussion. if (const CallExpr *CallCE = dyn_cast(Exp->getCond()->IgnoreParenCasts())) if (CallCE->getBuiltinCallee() == Builtin::BI__builtin_constant_p) return CheckEvalInICE(E, Ctx); ICEDiag CondResult = CheckICE(Exp->getCond(), Ctx); if (CondResult.Kind == IK_NotICE) return CondResult; ICEDiag TrueResult = CheckICE(Exp->getTrueExpr(), Ctx); ICEDiag FalseResult = CheckICE(Exp->getFalseExpr(), Ctx); if (TrueResult.Kind == IK_NotICE) return TrueResult; if (FalseResult.Kind == IK_NotICE) return FalseResult; if (CondResult.Kind == IK_ICEIfUnevaluated) return CondResult; if (TrueResult.Kind == IK_ICE && FalseResult.Kind == IK_ICE) return NoDiag(); // Rare case where the diagnostics depend on which side is evaluated // Note that if we get here, CondResult is 0, and at least one of // TrueResult and FalseResult is non-zero. if (Exp->getCond()->EvaluateKnownConstInt(Ctx) == 0) return FalseResult; return TrueResult; } case Expr::CXXDefaultArgExprClass: return CheckICE(cast(E)->getExpr(), Ctx); case Expr::CXXDefaultInitExprClass: return CheckICE(cast(E)->getExpr(), Ctx); case Expr::ChooseExprClass: { return CheckICE(cast(E)->getChosenSubExpr(), Ctx); } case Expr::BuiltinBitCastExprClass: { if (!checkBitCastConstexprEligibility(nullptr, Ctx, cast(E))) return ICEDiag(IK_NotICE, E->getBeginLoc()); return CheckICE(cast(E)->getSubExpr(), Ctx); } } llvm_unreachable("Invalid StmtClass!"); } /// Evaluate an expression as a C++11 integral constant expression. static bool EvaluateCPlusPlus11IntegralConstantExpr(const ASTContext &Ctx, const Expr *E, llvm::APSInt *Value, SourceLocation *Loc) { if (!E->getType()->isIntegralOrUnscopedEnumerationType()) { if (Loc) *Loc = E->getExprLoc(); return false; } APValue Result; if (!E->isCXX11ConstantExpr(Ctx, &Result, Loc)) return false; if (!Result.isInt()) { if (Loc) *Loc = E->getExprLoc(); return false; } if (Value) *Value = Result.getInt(); return true; } bool Expr::isIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); if (Ctx.getLangOpts().CPlusPlus11) return EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, nullptr, Loc); ICEDiag D = CheckICE(this, Ctx); if (D.Kind != IK_ICE) { if (Loc) *Loc = D.Loc; return false; } return true; } bool Expr::isIntegerConstantExpr(llvm::APSInt &Value, const ASTContext &Ctx, SourceLocation *Loc, bool isEvaluated) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); if (Ctx.getLangOpts().CPlusPlus11) return EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, &Value, Loc); if (!isIntegerConstantExpr(Ctx, Loc)) return false; // The only possible side-effects here are due to UB discovered in the // evaluation (for instance, INT_MAX + 1). In such a case, we are still // required to treat the expression as an ICE, so we produce the folded // value. EvalResult ExprResult; Expr::EvalStatus Status; EvalInfo Info(Ctx, Status, EvalInfo::EM_IgnoreSideEffects); Info.InConstantContext = true; if (!::EvaluateAsInt(this, ExprResult, Ctx, SE_AllowSideEffects, Info)) llvm_unreachable("ICE cannot be evaluated!"); Value = ExprResult.Val.getInt(); return true; } bool Expr::isCXX98IntegralConstantExpr(const ASTContext &Ctx) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); return CheckICE(this, Ctx).Kind == IK_ICE; } bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result, SourceLocation *Loc) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); // We support this checking in C++98 mode in order to diagnose compatibility // issues. assert(Ctx.getLangOpts().CPlusPlus); // Build evaluation settings. Expr::EvalStatus Status; SmallVector Diags; Status.Diag = &Diags; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression); APValue Scratch; bool IsConstExpr = ::EvaluateAsRValue(Info, this, Result ? *Result : Scratch) && // FIXME: We don't produce a diagnostic for this, but the callers that // call us on arbitrary full-expressions should generally not care. Info.discardCleanups() && !Status.HasSideEffects; if (!Diags.empty()) { IsConstExpr = false; if (Loc) *Loc = Diags[0].first; } else if (!IsConstExpr) { // FIXME: This shouldn't happen. if (Loc) *Loc = getExprLoc(); } return IsConstExpr; } bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx, const FunctionDecl *Callee, ArrayRef Args, const Expr *This) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); Expr::EvalStatus Status; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpressionUnevaluated); Info.InConstantContext = true; LValue ThisVal; const LValue *ThisPtr = nullptr; if (This) { #ifndef NDEBUG auto *MD = dyn_cast(Callee); assert(MD && "Don't provide `this` for non-methods."); assert(!MD->isStatic() && "Don't provide `this` for static methods."); #endif if (!This->isValueDependent() && EvaluateObjectArgument(Info, This, ThisVal) && !Info.EvalStatus.HasSideEffects) ThisPtr = &ThisVal; // Ignore any side-effects from a failed evaluation. This is safe because // they can't interfere with any other argument evaluation. Info.EvalStatus.HasSideEffects = false; } ArgVector ArgValues(Args.size()); for (ArrayRef::iterator I = Args.begin(), E = Args.end(); I != E; ++I) { if ((*I)->isValueDependent() || !Evaluate(ArgValues[I - Args.begin()], Info, *I) || Info.EvalStatus.HasSideEffects) // If evaluation fails, throw away the argument entirely. ArgValues[I - Args.begin()] = APValue(); // Ignore any side-effects from a failed evaluation. This is safe because // they can't interfere with any other argument evaluation. Info.EvalStatus.HasSideEffects = false; } // Parameter cleanups happen in the caller and are not part of this // evaluation. Info.discardCleanups(); Info.EvalStatus.HasSideEffects = false; // Build fake call to Callee. CallStackFrame Frame(Info, Callee->getLocation(), Callee, ThisPtr, ArgValues.data()); // FIXME: Missing ExprWithCleanups in enable_if conditions? FullExpressionRAII Scope(Info); return Evaluate(Value, Info, this) && Scope.destroy() && !Info.EvalStatus.HasSideEffects; } bool Expr::isPotentialConstantExpr(const FunctionDecl *FD, SmallVectorImpl< PartialDiagnosticAt> &Diags) { // FIXME: It would be useful to check constexpr function templates, but at the // moment the constant expression evaluator cannot cope with the non-rigorous // ASTs which we build for dependent expressions. if (FD->isDependentContext()) return true; // Bail out if a constexpr constructor has an initializer that contains an // error. We deliberately don't produce a diagnostic, as we have produced a // relevant diagnostic when parsing the error initializer. if (const auto *Ctor = dyn_cast(FD)) { for (const auto *InitExpr : Ctor->inits()) { if (InitExpr->getInit() && InitExpr->getInit()->containsErrors()) return false; } } Expr::EvalStatus Status; Status.Diag = &Diags; EvalInfo Info(FD->getASTContext(), Status, EvalInfo::EM_ConstantExpression); Info.InConstantContext = true; Info.CheckingPotentialConstantExpression = true; // The constexpr VM attempts to compile all methods to bytecode here. if (Info.EnableNewConstInterp) { Info.Ctx.getInterpContext().isPotentialConstantExpr(Info, FD); return Diags.empty(); } const CXXMethodDecl *MD = dyn_cast(FD); const CXXRecordDecl *RD = MD ? MD->getParent()->getCanonicalDecl() : nullptr; // Fabricate an arbitrary expression on the stack and pretend that it // is a temporary being used as the 'this' pointer. LValue This; ImplicitValueInitExpr VIE(RD ? Info.Ctx.getRecordType(RD) : Info.Ctx.IntTy); This.set({&VIE, Info.CurrentCall->Index}); ArrayRef Args; APValue Scratch; if (const CXXConstructorDecl *CD = dyn_cast(FD)) { // Evaluate the call as a constant initializer, to allow the construction // of objects of non-literal types. Info.setEvaluatingDecl(This.getLValueBase(), Scratch); HandleConstructorCall(&VIE, This, Args, CD, Info, Scratch); } else { SourceLocation Loc = FD->getLocation(); HandleFunctionCall(Loc, FD, (MD && MD->isInstance()) ? &This : nullptr, Args, FD->getBody(), Info, Scratch, nullptr); } return Diags.empty(); } bool Expr::isPotentialConstantExprUnevaluated(Expr *E, const FunctionDecl *FD, SmallVectorImpl< PartialDiagnosticAt> &Diags) { assert(!E->isValueDependent() && "Expression evaluator can't be called on a dependent expression."); Expr::EvalStatus Status; Status.Diag = &Diags; EvalInfo Info(FD->getASTContext(), Status, EvalInfo::EM_ConstantExpressionUnevaluated); Info.InConstantContext = true; Info.CheckingPotentialConstantExpression = true; // Fabricate a call stack frame to give the arguments a plausible cover story. ArrayRef Args; ArgVector ArgValues(0); bool Success = EvaluateArgs(Args, ArgValues, Info, FD); (void)Success; assert(Success && "Failed to set up arguments for potential constant evaluation"); CallStackFrame Frame(Info, SourceLocation(), FD, nullptr, ArgValues.data()); APValue ResultScratch; Evaluate(ResultScratch, Info, E); return Diags.empty(); } bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const { if (!getType()->isPointerType()) return false; Expr::EvalStatus Status; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); return tryEvaluateBuiltinObjectSize(this, Type, Info, Result); } diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp index eef4ab16ec15..117eb598bd5e 100644 --- a/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1,4686 +1,4688 @@ //===- ASTReaderDecl.cpp - Decl Deserialization ---------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the ASTReader::readDeclRecord method, which is the // entrypoint for loading a decl. // //===----------------------------------------------------------------------===// #include "ASTCommon.h" #include "ASTReaderInternals.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/AttrIterator.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclVisitor.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExternalASTSource.h" #include "clang/AST/LambdaCapture.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/Redeclarable.h" #include "clang/AST/Stmt.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/Type.h" #include "clang/AST/UnresolvedSet.h" #include "clang/Basic/AttrKinds.h" #include "clang/Basic/ExceptionSpecificationType.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/Lambda.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/Linkage.h" #include "clang/Basic/Module.h" #include "clang/Basic/PragmaKinds.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" #include "clang/Sema/IdentifierResolver.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTRecordReader.h" #include "clang/Serialization/ContinuousRangeMap.h" #include "clang/Serialization/ModuleFile.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include #include #include #include #include #include using namespace clang; using namespace serialization; //===----------------------------------------------------------------------===// // Declaration deserialization //===----------------------------------------------------------------------===// namespace clang { class ASTDeclReader : public DeclVisitor { ASTReader &Reader; ASTRecordReader &Record; ASTReader::RecordLocation Loc; const DeclID ThisDeclID; const SourceLocation ThisDeclLoc; using RecordData = ASTReader::RecordData; TypeID DeferredTypeID = 0; unsigned AnonymousDeclNumber; GlobalDeclID NamedDeclForTagDecl = 0; IdentifierInfo *TypedefNameForLinkage = nullptr; bool HasPendingBody = false; ///A flag to carry the information for a decl from the entity is /// used. We use it to delay the marking of the canonical decl as used until /// the entire declaration is deserialized and merged. bool IsDeclMarkedUsed = false; uint64_t GetCurrentCursorOffset(); uint64_t ReadLocalOffset() { uint64_t LocalOffset = Record.readInt(); assert(LocalOffset < Loc.Offset && "offset point after current record"); return LocalOffset ? Loc.Offset - LocalOffset : 0; } uint64_t ReadGlobalOffset() { uint64_t Local = ReadLocalOffset(); return Local ? Record.getGlobalBitOffset(Local) : 0; } SourceLocation readSourceLocation() { return Record.readSourceLocation(); } SourceRange readSourceRange() { return Record.readSourceRange(); } TypeSourceInfo *readTypeSourceInfo() { return Record.readTypeSourceInfo(); } serialization::DeclID readDeclID() { return Record.readDeclID(); } std::string readString() { return Record.readString(); } void readDeclIDList(SmallVectorImpl &IDs) { for (unsigned I = 0, Size = Record.readInt(); I != Size; ++I) IDs.push_back(readDeclID()); } Decl *readDecl() { return Record.readDecl(); } template T *readDeclAs() { return Record.readDeclAs(); } serialization::SubmoduleID readSubmoduleID() { if (Record.getIdx() == Record.size()) return 0; return Record.getGlobalSubmoduleID(Record.readInt()); } Module *readModule() { return Record.getSubmodule(readSubmoduleID()); } void ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update); void ReadCXXDefinitionData(struct CXXRecordDecl::DefinitionData &Data, const CXXRecordDecl *D); void MergeDefinitionData(CXXRecordDecl *D, struct CXXRecordDecl::DefinitionData &&NewDD); void ReadObjCDefinitionData(struct ObjCInterfaceDecl::DefinitionData &Data); void MergeDefinitionData(ObjCInterfaceDecl *D, struct ObjCInterfaceDecl::DefinitionData &&NewDD); void ReadObjCDefinitionData(struct ObjCProtocolDecl::DefinitionData &Data); void MergeDefinitionData(ObjCProtocolDecl *D, struct ObjCProtocolDecl::DefinitionData &&NewDD); static DeclContext *getPrimaryDCForAnonymousDecl(DeclContext *LexicalDC); static NamedDecl *getAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index); static void setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index, NamedDecl *D); /// Results from loading a RedeclarableDecl. class RedeclarableResult { Decl *MergeWith; GlobalDeclID FirstID; bool IsKeyDecl; public: RedeclarableResult(Decl *MergeWith, GlobalDeclID FirstID, bool IsKeyDecl) : MergeWith(MergeWith), FirstID(FirstID), IsKeyDecl(IsKeyDecl) {} /// Retrieve the first ID. GlobalDeclID getFirstID() const { return FirstID; } /// Is this declaration a key declaration? bool isKeyDecl() const { return IsKeyDecl; } /// Get a known declaration that this should be merged with, if /// any. Decl *getKnownMergeTarget() const { return MergeWith; } }; /// Class used to capture the result of searching for an existing /// declaration of a specific kind and name, along with the ability /// to update the place where this result was found (the declaration /// chain hanging off an identifier or the DeclContext we searched in) /// if requested. class FindExistingResult { ASTReader &Reader; NamedDecl *New = nullptr; NamedDecl *Existing = nullptr; bool AddResult = false; unsigned AnonymousDeclNumber = 0; IdentifierInfo *TypedefNameForLinkage = nullptr; public: FindExistingResult(ASTReader &Reader) : Reader(Reader) {} FindExistingResult(ASTReader &Reader, NamedDecl *New, NamedDecl *Existing, unsigned AnonymousDeclNumber, IdentifierInfo *TypedefNameForLinkage) : Reader(Reader), New(New), Existing(Existing), AddResult(true), AnonymousDeclNumber(AnonymousDeclNumber), TypedefNameForLinkage(TypedefNameForLinkage) {} FindExistingResult(FindExistingResult &&Other) : Reader(Other.Reader), New(Other.New), Existing(Other.Existing), AddResult(Other.AddResult), AnonymousDeclNumber(Other.AnonymousDeclNumber), TypedefNameForLinkage(Other.TypedefNameForLinkage) { Other.AddResult = false; } FindExistingResult &operator=(FindExistingResult &&) = delete; ~FindExistingResult(); /// Suppress the addition of this result into the known set of /// names. void suppress() { AddResult = false; } operator NamedDecl*() const { return Existing; } template operator T*() const { return dyn_cast_or_null(Existing); } }; static DeclContext *getPrimaryContextForMerging(ASTReader &Reader, DeclContext *DC); FindExistingResult findExisting(NamedDecl *D); public: ASTDeclReader(ASTReader &Reader, ASTRecordReader &Record, ASTReader::RecordLocation Loc, DeclID thisDeclID, SourceLocation ThisDeclLoc) : Reader(Reader), Record(Record), Loc(Loc), ThisDeclID(thisDeclID), ThisDeclLoc(ThisDeclLoc) {} template static void AddLazySpecializations(T *D, SmallVectorImpl& IDs) { if (IDs.empty()) return; // FIXME: We should avoid this pattern of getting the ASTContext. ASTContext &C = D->getASTContext(); auto *&LazySpecializations = D->getCommonPtr()->LazySpecializations; if (auto &Old = LazySpecializations) { IDs.insert(IDs.end(), Old + 1, Old + 1 + Old[0]); llvm::sort(IDs); IDs.erase(std::unique(IDs.begin(), IDs.end()), IDs.end()); } auto *Result = new (C) serialization::DeclID[1 + IDs.size()]; *Result = IDs.size(); std::copy(IDs.begin(), IDs.end(), Result + 1); LazySpecializations = Result; } template static Decl *getMostRecentDeclImpl(Redeclarable *D); static Decl *getMostRecentDeclImpl(...); static Decl *getMostRecentDecl(Decl *D); template static void attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon); static void attachPreviousDeclImpl(ASTReader &Reader, ...); static void attachPreviousDecl(ASTReader &Reader, Decl *D, Decl *Previous, Decl *Canon); template static void attachLatestDeclImpl(Redeclarable *D, Decl *Latest); static void attachLatestDeclImpl(...); static void attachLatestDecl(Decl *D, Decl *latest); template static void markIncompleteDeclChainImpl(Redeclarable *D); static void markIncompleteDeclChainImpl(...); /// Determine whether this declaration has a pending body. bool hasPendingBody() const { return HasPendingBody; } void ReadFunctionDefinition(FunctionDecl *FD); void Visit(Decl *D); void UpdateDecl(Decl *D, SmallVectorImpl &); static void setNextObjCCategory(ObjCCategoryDecl *Cat, ObjCCategoryDecl *Next) { Cat->NextClassCategory = Next; } void VisitDecl(Decl *D); void VisitPragmaCommentDecl(PragmaCommentDecl *D); void VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D); void VisitTranslationUnitDecl(TranslationUnitDecl *TU); void VisitNamedDecl(NamedDecl *ND); void VisitLabelDecl(LabelDecl *LD); void VisitNamespaceDecl(NamespaceDecl *D); void VisitUsingDirectiveDecl(UsingDirectiveDecl *D); void VisitNamespaceAliasDecl(NamespaceAliasDecl *D); void VisitTypeDecl(TypeDecl *TD); RedeclarableResult VisitTypedefNameDecl(TypedefNameDecl *TD); void VisitTypedefDecl(TypedefDecl *TD); void VisitTypeAliasDecl(TypeAliasDecl *TD); void VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D); RedeclarableResult VisitTagDecl(TagDecl *TD); void VisitEnumDecl(EnumDecl *ED); RedeclarableResult VisitRecordDeclImpl(RecordDecl *RD); void VisitRecordDecl(RecordDecl *RD) { VisitRecordDeclImpl(RD); } RedeclarableResult VisitCXXRecordDeclImpl(CXXRecordDecl *D); void VisitCXXRecordDecl(CXXRecordDecl *D) { VisitCXXRecordDeclImpl(D); } RedeclarableResult VisitClassTemplateSpecializationDeclImpl( ClassTemplateSpecializationDecl *D); void VisitClassTemplateSpecializationDecl( ClassTemplateSpecializationDecl *D) { VisitClassTemplateSpecializationDeclImpl(D); } void VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D); void VisitClassScopeFunctionSpecializationDecl( ClassScopeFunctionSpecializationDecl *D); RedeclarableResult VisitVarTemplateSpecializationDeclImpl(VarTemplateSpecializationDecl *D); void VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D) { VisitVarTemplateSpecializationDeclImpl(D); } void VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D); void VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D); void VisitValueDecl(ValueDecl *VD); void VisitEnumConstantDecl(EnumConstantDecl *ECD); void VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D); void VisitDeclaratorDecl(DeclaratorDecl *DD); void VisitFunctionDecl(FunctionDecl *FD); void VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *GD); void VisitCXXMethodDecl(CXXMethodDecl *D); void VisitCXXConstructorDecl(CXXConstructorDecl *D); void VisitCXXDestructorDecl(CXXDestructorDecl *D); void VisitCXXConversionDecl(CXXConversionDecl *D); void VisitFieldDecl(FieldDecl *FD); void VisitMSPropertyDecl(MSPropertyDecl *FD); void VisitMSGuidDecl(MSGuidDecl *D); void VisitIndirectFieldDecl(IndirectFieldDecl *FD); RedeclarableResult VisitVarDeclImpl(VarDecl *D); void VisitVarDecl(VarDecl *VD) { VisitVarDeclImpl(VD); } void VisitImplicitParamDecl(ImplicitParamDecl *PD); void VisitParmVarDecl(ParmVarDecl *PD); void VisitDecompositionDecl(DecompositionDecl *DD); void VisitBindingDecl(BindingDecl *BD); void VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D); DeclID VisitTemplateDecl(TemplateDecl *D); void VisitConceptDecl(ConceptDecl *D); void VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D); RedeclarableResult VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D); void VisitClassTemplateDecl(ClassTemplateDecl *D); void VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D); void VisitVarTemplateDecl(VarTemplateDecl *D); void VisitFunctionTemplateDecl(FunctionTemplateDecl *D); void VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D); void VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D); void VisitUsingDecl(UsingDecl *D); void VisitUsingPackDecl(UsingPackDecl *D); void VisitUsingShadowDecl(UsingShadowDecl *D); void VisitConstructorUsingShadowDecl(ConstructorUsingShadowDecl *D); void VisitLinkageSpecDecl(LinkageSpecDecl *D); void VisitExportDecl(ExportDecl *D); void VisitFileScopeAsmDecl(FileScopeAsmDecl *AD); void VisitImportDecl(ImportDecl *D); void VisitAccessSpecDecl(AccessSpecDecl *D); void VisitFriendDecl(FriendDecl *D); void VisitFriendTemplateDecl(FriendTemplateDecl *D); void VisitStaticAssertDecl(StaticAssertDecl *D); void VisitBlockDecl(BlockDecl *BD); void VisitCapturedDecl(CapturedDecl *CD); void VisitEmptyDecl(EmptyDecl *D); void VisitLifetimeExtendedTemporaryDecl(LifetimeExtendedTemporaryDecl *D); std::pair VisitDeclContext(DeclContext *DC); template RedeclarableResult VisitRedeclarable(Redeclarable *D); template void mergeRedeclarable(Redeclarable *D, RedeclarableResult &Redecl, DeclID TemplatePatternID = 0); template void mergeRedeclarable(Redeclarable *D, T *Existing, RedeclarableResult &Redecl, DeclID TemplatePatternID = 0); template void mergeMergeable(Mergeable *D); void mergeMergeable(LifetimeExtendedTemporaryDecl *D); void mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, DeclID DsID, bool IsKeyDecl); ObjCTypeParamList *ReadObjCTypeParamList(); // FIXME: Reorder according to DeclNodes.td? void VisitObjCMethodDecl(ObjCMethodDecl *D); void VisitObjCTypeParamDecl(ObjCTypeParamDecl *D); void VisitObjCContainerDecl(ObjCContainerDecl *D); void VisitObjCInterfaceDecl(ObjCInterfaceDecl *D); void VisitObjCIvarDecl(ObjCIvarDecl *D); void VisitObjCProtocolDecl(ObjCProtocolDecl *D); void VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D); void VisitObjCCategoryDecl(ObjCCategoryDecl *D); void VisitObjCImplDecl(ObjCImplDecl *D); void VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D); void VisitObjCImplementationDecl(ObjCImplementationDecl *D); void VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D); void VisitObjCPropertyDecl(ObjCPropertyDecl *D); void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D); void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D); void VisitOMPAllocateDecl(OMPAllocateDecl *D); void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D); void VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D); void VisitOMPRequiresDecl(OMPRequiresDecl *D); void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D); }; } // namespace clang namespace { /// Iterator over the redeclarations of a declaration that have already /// been merged into the same redeclaration chain. template class MergedRedeclIterator { DeclT *Start; DeclT *Canonical = nullptr; DeclT *Current = nullptr; public: MergedRedeclIterator() = default; MergedRedeclIterator(DeclT *Start) : Start(Start), Current(Start) {} DeclT *operator*() { return Current; } MergedRedeclIterator &operator++() { if (Current->isFirstDecl()) { Canonical = Current; Current = Current->getMostRecentDecl(); } else Current = Current->getPreviousDecl(); // If we started in the merged portion, we'll reach our start position // eventually. Otherwise, we'll never reach it, but the second declaration // we reached was the canonical declaration, so stop when we see that one // again. if (Current == Start || Current == Canonical) Current = nullptr; return *this; } friend bool operator!=(const MergedRedeclIterator &A, const MergedRedeclIterator &B) { return A.Current != B.Current; } }; } // namespace template static llvm::iterator_range> merged_redecls(DeclT *D) { return llvm::make_range(MergedRedeclIterator(D), MergedRedeclIterator()); } uint64_t ASTDeclReader::GetCurrentCursorOffset() { return Loc.F->DeclsCursor.GetCurrentBitNo() + Loc.F->GlobalBitOffset; } void ASTDeclReader::ReadFunctionDefinition(FunctionDecl *FD) { if (Record.readInt()) { Reader.DefinitionSource[FD] = Loc.F->Kind == ModuleKind::MK_MainFile; if (Reader.getContext().getLangOpts().BuildingPCHWithObjectFile && Reader.DeclIsFromPCHWithObjectFile(FD)) Reader.DefinitionSource[FD] = true; } if (auto *CD = dyn_cast(FD)) { CD->setNumCtorInitializers(Record.readInt()); if (CD->getNumCtorInitializers()) CD->CtorInitializers = ReadGlobalOffset(); } // Store the offset of the body so we can lazily load it later. Reader.PendingBodies[FD] = GetCurrentCursorOffset(); HasPendingBody = true; } void ASTDeclReader::Visit(Decl *D) { DeclVisitor::Visit(D); // At this point we have deserialized and merged the decl and it is safe to // update its canonical decl to signal that the entire entity is used. D->getCanonicalDecl()->Used |= IsDeclMarkedUsed; IsDeclMarkedUsed = false; if (auto *DD = dyn_cast(D)) { if (auto *TInfo = DD->getTypeSourceInfo()) Record.readTypeLoc(TInfo->getTypeLoc()); } if (auto *TD = dyn_cast(D)) { // We have a fully initialized TypeDecl. Read its type now. TD->setTypeForDecl(Reader.GetType(DeferredTypeID).getTypePtrOrNull()); // If this is a tag declaration with a typedef name for linkage, it's safe // to load that typedef now. if (NamedDeclForTagDecl) cast(D)->TypedefNameDeclOrQualifier = cast(Reader.GetDecl(NamedDeclForTagDecl)); } else if (auto *ID = dyn_cast(D)) { // if we have a fully initialized TypeDecl, we can safely read its type now. ID->TypeForDecl = Reader.GetType(DeferredTypeID).getTypePtrOrNull(); } else if (auto *FD = dyn_cast(D)) { // FunctionDecl's body was written last after all other Stmts/Exprs. // We only read it if FD doesn't already have a body (e.g., from another // module). // FIXME: Can we diagnose ODR violations somehow? if (Record.readInt()) ReadFunctionDefinition(FD); } } void ASTDeclReader::VisitDecl(Decl *D) { if (D->isTemplateParameter() || D->isTemplateParameterPack() || isa(D) || isa(D)) { // We don't want to deserialize the DeclContext of a template // parameter or of a parameter of a function template immediately. These // entities might be used in the formulation of its DeclContext (for // example, a function parameter can be used in decltype() in trailing // return type of the function). Use the translation unit DeclContext as a // placeholder. GlobalDeclID SemaDCIDForTemplateParmDecl = readDeclID(); GlobalDeclID LexicalDCIDForTemplateParmDecl = readDeclID(); if (!LexicalDCIDForTemplateParmDecl) LexicalDCIDForTemplateParmDecl = SemaDCIDForTemplateParmDecl; Reader.addPendingDeclContextInfo(D, SemaDCIDForTemplateParmDecl, LexicalDCIDForTemplateParmDecl); D->setDeclContext(Reader.getContext().getTranslationUnitDecl()); } else { auto *SemaDC = readDeclAs(); auto *LexicalDC = readDeclAs(); if (!LexicalDC) LexicalDC = SemaDC; DeclContext *MergedSemaDC = Reader.MergedDeclContexts.lookup(SemaDC); // Avoid calling setLexicalDeclContext() directly because it uses // Decl::getASTContext() internally which is unsafe during derialization. D->setDeclContextsImpl(MergedSemaDC ? MergedSemaDC : SemaDC, LexicalDC, Reader.getContext()); } D->setLocation(ThisDeclLoc); D->setInvalidDecl(Record.readInt()); if (Record.readInt()) { // hasAttrs AttrVec Attrs; Record.readAttributes(Attrs); // Avoid calling setAttrs() directly because it uses Decl::getASTContext() // internally which is unsafe during derialization. D->setAttrsImpl(Attrs, Reader.getContext()); } D->setImplicit(Record.readInt()); D->Used = Record.readInt(); IsDeclMarkedUsed |= D->Used; D->setReferenced(Record.readInt()); D->setTopLevelDeclInObjCContainer(Record.readInt()); D->setAccess((AccessSpecifier)Record.readInt()); D->FromASTFile = true; bool ModulePrivate = Record.readInt(); // Determine whether this declaration is part of a (sub)module. If so, it // may not yet be visible. if (unsigned SubmoduleID = readSubmoduleID()) { // Store the owning submodule ID in the declaration. D->setModuleOwnershipKind( ModulePrivate ? Decl::ModuleOwnershipKind::ModulePrivate : Decl::ModuleOwnershipKind::VisibleWhenImported); D->setOwningModuleID(SubmoduleID); if (ModulePrivate) { // Module-private declarations are never visible, so there is no work to // do. } else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) { // If local visibility is being tracked, this declaration will become // hidden and visible as the owning module does. } else if (Module *Owner = Reader.getSubmodule(SubmoduleID)) { // Mark the declaration as visible when its owning module becomes visible. if (Owner->NameVisibility == Module::AllVisible) D->setVisibleDespiteOwningModule(); else Reader.HiddenNamesMap[Owner].push_back(D); } } else if (ModulePrivate) { D->setModuleOwnershipKind(Decl::ModuleOwnershipKind::ModulePrivate); } } void ASTDeclReader::VisitPragmaCommentDecl(PragmaCommentDecl *D) { VisitDecl(D); D->setLocation(readSourceLocation()); D->CommentKind = (PragmaMSCommentKind)Record.readInt(); std::string Arg = readString(); memcpy(D->getTrailingObjects(), Arg.data(), Arg.size()); D->getTrailingObjects()[Arg.size()] = '\0'; } void ASTDeclReader::VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D) { VisitDecl(D); D->setLocation(readSourceLocation()); std::string Name = readString(); memcpy(D->getTrailingObjects(), Name.data(), Name.size()); D->getTrailingObjects()[Name.size()] = '\0'; D->ValueStart = Name.size() + 1; std::string Value = readString(); memcpy(D->getTrailingObjects() + D->ValueStart, Value.data(), Value.size()); D->getTrailingObjects()[D->ValueStart + Value.size()] = '\0'; } void ASTDeclReader::VisitTranslationUnitDecl(TranslationUnitDecl *TU) { llvm_unreachable("Translation units are not serialized"); } void ASTDeclReader::VisitNamedDecl(NamedDecl *ND) { VisitDecl(ND); ND->setDeclName(Record.readDeclarationName()); AnonymousDeclNumber = Record.readInt(); } void ASTDeclReader::VisitTypeDecl(TypeDecl *TD) { VisitNamedDecl(TD); TD->setLocStart(readSourceLocation()); // Delay type reading until after we have fully initialized the decl. DeferredTypeID = Record.getGlobalTypeID(Record.readInt()); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTypedefNameDecl(TypedefNameDecl *TD) { RedeclarableResult Redecl = VisitRedeclarable(TD); VisitTypeDecl(TD); TypeSourceInfo *TInfo = readTypeSourceInfo(); if (Record.readInt()) { // isModed QualType modedT = Record.readType(); TD->setModedTypeSourceInfo(TInfo, modedT); } else TD->setTypeSourceInfo(TInfo); // Read and discard the declaration for which this is a typedef name for // linkage, if it exists. We cannot rely on our type to pull in this decl, // because it might have been merged with a type from another module and // thus might not refer to our version of the declaration. readDecl(); return Redecl; } void ASTDeclReader::VisitTypedefDecl(TypedefDecl *TD) { RedeclarableResult Redecl = VisitTypedefNameDecl(TD); mergeRedeclarable(TD, Redecl); } void ASTDeclReader::VisitTypeAliasDecl(TypeAliasDecl *TD) { RedeclarableResult Redecl = VisitTypedefNameDecl(TD); if (auto *Template = readDeclAs()) // Merged when we merge the template. TD->setDescribedAliasTemplate(Template); else mergeRedeclarable(TD, Redecl); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTagDecl(TagDecl *TD) { RedeclarableResult Redecl = VisitRedeclarable(TD); VisitTypeDecl(TD); TD->IdentifierNamespace = Record.readInt(); TD->setTagKind((TagDecl::TagKind)Record.readInt()); if (!isa(TD)) TD->setCompleteDefinition(Record.readInt()); TD->setEmbeddedInDeclarator(Record.readInt()); TD->setFreeStanding(Record.readInt()); TD->setCompleteDefinitionRequired(Record.readInt()); TD->setBraceRange(readSourceRange()); switch (Record.readInt()) { case 0: break; case 1: { // ExtInfo auto *Info = new (Reader.getContext()) TagDecl::ExtInfo(); Record.readQualifierInfo(*Info); TD->TypedefNameDeclOrQualifier = Info; break; } case 2: // TypedefNameForAnonDecl NamedDeclForTagDecl = readDeclID(); TypedefNameForLinkage = Record.readIdentifier(); break; default: llvm_unreachable("unexpected tag info kind"); } if (!isa(TD)) mergeRedeclarable(TD, Redecl); return Redecl; } void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) { VisitTagDecl(ED); if (TypeSourceInfo *TI = readTypeSourceInfo()) ED->setIntegerTypeSourceInfo(TI); else ED->setIntegerType(Record.readType()); ED->setPromotionType(Record.readType()); ED->setNumPositiveBits(Record.readInt()); ED->setNumNegativeBits(Record.readInt()); ED->setScoped(Record.readInt()); ED->setScopedUsingClassTag(Record.readInt()); ED->setFixed(Record.readInt()); ED->setHasODRHash(true); ED->ODRHash = Record.readInt(); // If this is a definition subject to the ODR, and we already have a // definition, merge this one into it. if (ED->isCompleteDefinition() && Reader.getContext().getLangOpts().Modules && Reader.getContext().getLangOpts().CPlusPlus) { EnumDecl *&OldDef = Reader.EnumDefinitions[ED->getCanonicalDecl()]; if (!OldDef) { // This is the first time we've seen an imported definition. Look for a // local definition before deciding that we are the first definition. for (auto *D : merged_redecls(ED->getCanonicalDecl())) { if (!D->isFromASTFile() && D->isCompleteDefinition()) { OldDef = D; break; } } } if (OldDef) { Reader.MergedDeclContexts.insert(std::make_pair(ED, OldDef)); ED->setCompleteDefinition(false); Reader.mergeDefinitionVisibility(OldDef, ED); if (OldDef->getODRHash() != ED->getODRHash()) Reader.PendingEnumOdrMergeFailures[OldDef].push_back(ED); } else { OldDef = ED; } } if (auto *InstED = readDeclAs()) { auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); ED->setInstantiationOfMemberEnum(Reader.getContext(), InstED, TSK); ED->getMemberSpecializationInfo()->setPointOfInstantiation(POI); } } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) { RedeclarableResult Redecl = VisitTagDecl(RD); RD->setHasFlexibleArrayMember(Record.readInt()); RD->setAnonymousStructOrUnion(Record.readInt()); RD->setHasObjectMember(Record.readInt()); RD->setHasVolatileMember(Record.readInt()); RD->setNonTrivialToPrimitiveDefaultInitialize(Record.readInt()); RD->setNonTrivialToPrimitiveCopy(Record.readInt()); RD->setNonTrivialToPrimitiveDestroy(Record.readInt()); RD->setHasNonTrivialToPrimitiveDefaultInitializeCUnion(Record.readInt()); RD->setHasNonTrivialToPrimitiveDestructCUnion(Record.readInt()); RD->setHasNonTrivialToPrimitiveCopyCUnion(Record.readInt()); RD->setParamDestroyedInCallee(Record.readInt()); RD->setArgPassingRestrictions((RecordDecl::ArgPassingKind)Record.readInt()); return Redecl; } void ASTDeclReader::VisitValueDecl(ValueDecl *VD) { VisitNamedDecl(VD); // For function declarations, defer reading the type in case the function has // a deduced return type that references an entity declared within the // function. if (isa(VD)) DeferredTypeID = Record.getGlobalTypeID(Record.readInt()); else VD->setType(Record.readType()); } void ASTDeclReader::VisitEnumConstantDecl(EnumConstantDecl *ECD) { VisitValueDecl(ECD); if (Record.readInt()) ECD->setInitExpr(Record.readExpr()); ECD->setInitVal(Record.readAPSInt()); mergeMergeable(ECD); } void ASTDeclReader::VisitDeclaratorDecl(DeclaratorDecl *DD) { VisitValueDecl(DD); DD->setInnerLocStart(readSourceLocation()); if (Record.readInt()) { // hasExtInfo auto *Info = new (Reader.getContext()) DeclaratorDecl::ExtInfo(); Record.readQualifierInfo(*Info); Info->TrailingRequiresClause = Record.readExpr(); DD->DeclInfo = Info; } QualType TSIType = Record.readType(); DD->setTypeSourceInfo( TSIType.isNull() ? nullptr : Reader.getContext().CreateTypeSourceInfo(TSIType)); } void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { RedeclarableResult Redecl = VisitRedeclarable(FD); VisitDeclaratorDecl(FD); // Attach a type to this function. Use the real type if possible, but fall // back to the type as written if it involves a deduced return type. if (FD->getTypeSourceInfo() && FD->getTypeSourceInfo()->getType()->castAs() ->getReturnType()->getContainedAutoType()) { // We'll set up the real type in Visit, once we've finished loading the // function. FD->setType(FD->getTypeSourceInfo()->getType()); Reader.PendingFunctionTypes.push_back({FD, DeferredTypeID}); } else { FD->setType(Reader.GetType(DeferredTypeID)); } DeferredTypeID = 0; FD->DNLoc = Record.readDeclarationNameLoc(FD->getDeclName()); FD->IdentifierNamespace = Record.readInt(); // FunctionDecl's body is handled last at ASTDeclReader::Visit, // after everything else is read. FD->setStorageClass(static_cast(Record.readInt())); FD->setInlineSpecified(Record.readInt()); FD->setImplicitlyInline(Record.readInt()); FD->setVirtualAsWritten(Record.readInt()); FD->setPure(Record.readInt()); FD->setHasInheritedPrototype(Record.readInt()); FD->setHasWrittenPrototype(Record.readInt()); FD->setDeletedAsWritten(Record.readInt()); FD->setTrivial(Record.readInt()); FD->setTrivialForCall(Record.readInt()); FD->setDefaulted(Record.readInt()); FD->setExplicitlyDefaulted(Record.readInt()); FD->setHasImplicitReturnZero(Record.readInt()); FD->setConstexprKind(static_cast(Record.readInt())); FD->setUsesSEHTry(Record.readInt()); FD->setHasSkippedBody(Record.readInt()); FD->setIsMultiVersion(Record.readInt()); FD->setLateTemplateParsed(Record.readInt()); FD->setCachedLinkage(static_cast(Record.readInt())); FD->EndRangeLoc = readSourceLocation(); FD->ODRHash = Record.readInt(); FD->setHasODRHash(true); FD->setUsesFPIntrin(Record.readInt()); if (FD->isDefaulted()) { if (unsigned NumLookups = Record.readInt()) { SmallVector Lookups; for (unsigned I = 0; I != NumLookups; ++I) { NamedDecl *ND = Record.readDeclAs(); AccessSpecifier AS = (AccessSpecifier)Record.readInt(); Lookups.push_back(DeclAccessPair::make(ND, AS)); } FD->setDefaultedFunctionInfo(FunctionDecl::DefaultedFunctionInfo::Create( Reader.getContext(), Lookups)); } } switch ((FunctionDecl::TemplatedKind)Record.readInt()) { case FunctionDecl::TK_NonTemplate: mergeRedeclarable(FD, Redecl); break; case FunctionDecl::TK_FunctionTemplate: // Merged when we merge the template. FD->setDescribedFunctionTemplate(readDeclAs()); break; case FunctionDecl::TK_MemberSpecialization: { auto *InstFD = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); FD->setInstantiationOfMemberFunction(Reader.getContext(), InstFD, TSK); FD->getMemberSpecializationInfo()->setPointOfInstantiation(POI); mergeRedeclarable(FD, Redecl); break; } case FunctionDecl::TK_FunctionTemplateSpecialization: { auto *Template = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); // Template arguments. SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true); // Template args as written. SmallVector TemplArgLocs; SourceLocation LAngleLoc, RAngleLoc; bool HasTemplateArgumentsAsWritten = Record.readInt(); if (HasTemplateArgumentsAsWritten) { unsigned NumTemplateArgLocs = Record.readInt(); TemplArgLocs.reserve(NumTemplateArgLocs); for (unsigned i = 0; i != NumTemplateArgLocs; ++i) TemplArgLocs.push_back(Record.readTemplateArgumentLoc()); LAngleLoc = readSourceLocation(); RAngleLoc = readSourceLocation(); } SourceLocation POI = readSourceLocation(); ASTContext &C = Reader.getContext(); TemplateArgumentList *TemplArgList = TemplateArgumentList::CreateCopy(C, TemplArgs); TemplateArgumentListInfo TemplArgsInfo(LAngleLoc, RAngleLoc); for (unsigned i = 0, e = TemplArgLocs.size(); i != e; ++i) TemplArgsInfo.addArgument(TemplArgLocs[i]); MemberSpecializationInfo *MSInfo = nullptr; if (Record.readInt()) { auto *FD = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); MSInfo = new (C) MemberSpecializationInfo(FD, TSK); MSInfo->setPointOfInstantiation(POI); } FunctionTemplateSpecializationInfo *FTInfo = FunctionTemplateSpecializationInfo::Create( C, FD, Template, TSK, TemplArgList, HasTemplateArgumentsAsWritten ? &TemplArgsInfo : nullptr, POI, MSInfo); FD->TemplateOrSpecialization = FTInfo; if (FD->isCanonicalDecl()) { // if canonical add to template's set. // The template that contains the specializations set. It's not safe to // use getCanonicalDecl on Template since it may still be initializing. auto *CanonTemplate = readDeclAs(); // Get the InsertPos by FindNodeOrInsertPos() instead of calling // InsertNode(FTInfo) directly to avoid the getASTContext() call in // FunctionTemplateSpecializationInfo's Profile(). // We avoid getASTContext because a decl in the parent hierarchy may // be initializing. llvm::FoldingSetNodeID ID; FunctionTemplateSpecializationInfo::Profile(ID, TemplArgs, C); void *InsertPos = nullptr; FunctionTemplateDecl::Common *CommonPtr = CanonTemplate->getCommonPtr(); FunctionTemplateSpecializationInfo *ExistingInfo = CommonPtr->Specializations.FindNodeOrInsertPos(ID, InsertPos); if (InsertPos) CommonPtr->Specializations.InsertNode(FTInfo, InsertPos); else { assert(Reader.getContext().getLangOpts().Modules && "already deserialized this template specialization"); mergeRedeclarable(FD, ExistingInfo->getFunction(), Redecl); } } break; } case FunctionDecl::TK_DependentFunctionTemplateSpecialization: { // Templates. UnresolvedSet<8> TemplDecls; unsigned NumTemplates = Record.readInt(); while (NumTemplates--) TemplDecls.addDecl(readDeclAs()); // Templates args. TemplateArgumentListInfo TemplArgs; unsigned NumArgs = Record.readInt(); while (NumArgs--) TemplArgs.addArgument(Record.readTemplateArgumentLoc()); TemplArgs.setLAngleLoc(readSourceLocation()); TemplArgs.setRAngleLoc(readSourceLocation()); FD->setDependentTemplateSpecialization(Reader.getContext(), TemplDecls, TemplArgs); // These are not merged; we don't need to merge redeclarations of dependent // template friends. break; } } // Read in the parameters. unsigned NumParams = Record.readInt(); SmallVector Params; Params.reserve(NumParams); for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); FD->setParams(Reader.getContext(), Params); } void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) { VisitNamedDecl(MD); if (Record.readInt()) { // Load the body on-demand. Most clients won't care, because method // definitions rarely show up in headers. Reader.PendingBodies[MD] = GetCurrentCursorOffset(); HasPendingBody = true; } MD->setSelfDecl(readDeclAs()); MD->setCmdDecl(readDeclAs()); MD->setInstanceMethod(Record.readInt()); MD->setVariadic(Record.readInt()); MD->setPropertyAccessor(Record.readInt()); MD->setSynthesizedAccessorStub(Record.readInt()); MD->setDefined(Record.readInt()); MD->setOverriding(Record.readInt()); MD->setHasSkippedBody(Record.readInt()); MD->setIsRedeclaration(Record.readInt()); MD->setHasRedeclaration(Record.readInt()); if (MD->hasRedeclaration()) Reader.getContext().setObjCMethodRedeclaration(MD, readDeclAs()); MD->setDeclImplementation((ObjCMethodDecl::ImplementationControl)Record.readInt()); MD->setObjCDeclQualifier((Decl::ObjCDeclQualifier)Record.readInt()); MD->setRelatedResultType(Record.readInt()); MD->setReturnType(Record.readType()); MD->setReturnTypeSourceInfo(readTypeSourceInfo()); MD->DeclEndLoc = readSourceLocation(); unsigned NumParams = Record.readInt(); SmallVector Params; Params.reserve(NumParams); for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); MD->setSelLocsKind((SelectorLocationsKind)Record.readInt()); unsigned NumStoredSelLocs = Record.readInt(); SmallVector SelLocs; SelLocs.reserve(NumStoredSelLocs); for (unsigned i = 0; i != NumStoredSelLocs; ++i) SelLocs.push_back(readSourceLocation()); MD->setParamsAndSelLocs(Reader.getContext(), Params, SelLocs); } void ASTDeclReader::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) { VisitTypedefNameDecl(D); D->Variance = Record.readInt(); D->Index = Record.readInt(); D->VarianceLoc = readSourceLocation(); D->ColonLoc = readSourceLocation(); } void ASTDeclReader::VisitObjCContainerDecl(ObjCContainerDecl *CD) { VisitNamedDecl(CD); CD->setAtStartLoc(readSourceLocation()); CD->setAtEndRange(readSourceRange()); } ObjCTypeParamList *ASTDeclReader::ReadObjCTypeParamList() { unsigned numParams = Record.readInt(); if (numParams == 0) return nullptr; SmallVector typeParams; typeParams.reserve(numParams); for (unsigned i = 0; i != numParams; ++i) { auto *typeParam = readDeclAs(); if (!typeParam) return nullptr; typeParams.push_back(typeParam); } SourceLocation lAngleLoc = readSourceLocation(); SourceLocation rAngleLoc = readSourceLocation(); return ObjCTypeParamList::create(Reader.getContext(), lAngleLoc, typeParams, rAngleLoc); } void ASTDeclReader::ReadObjCDefinitionData( struct ObjCInterfaceDecl::DefinitionData &Data) { // Read the superclass. Data.SuperClassTInfo = readTypeSourceInfo(); Data.EndLoc = readSourceLocation(); Data.HasDesignatedInitializers = Record.readInt(); // Read the directly referenced protocols and their SourceLocations. unsigned NumProtocols = Record.readInt(); SmallVector Protocols; Protocols.reserve(NumProtocols); for (unsigned I = 0; I != NumProtocols; ++I) Protocols.push_back(readDeclAs()); SmallVector ProtoLocs; ProtoLocs.reserve(NumProtocols); for (unsigned I = 0; I != NumProtocols; ++I) ProtoLocs.push_back(readSourceLocation()); Data.ReferencedProtocols.set(Protocols.data(), NumProtocols, ProtoLocs.data(), Reader.getContext()); // Read the transitive closure of protocols referenced by this class. NumProtocols = Record.readInt(); Protocols.clear(); Protocols.reserve(NumProtocols); for (unsigned I = 0; I != NumProtocols; ++I) Protocols.push_back(readDeclAs()); Data.AllReferencedProtocols.set(Protocols.data(), NumProtocols, Reader.getContext()); } void ASTDeclReader::MergeDefinitionData(ObjCInterfaceDecl *D, struct ObjCInterfaceDecl::DefinitionData &&NewDD) { // FIXME: odr checking? } void ASTDeclReader::VisitObjCInterfaceDecl(ObjCInterfaceDecl *ID) { RedeclarableResult Redecl = VisitRedeclarable(ID); VisitObjCContainerDecl(ID); DeferredTypeID = Record.getGlobalTypeID(Record.readInt()); mergeRedeclarable(ID, Redecl); ID->TypeParamList = ReadObjCTypeParamList(); if (Record.readInt()) { // Read the definition. ID->allocateDefinitionData(); ReadObjCDefinitionData(ID->data()); ObjCInterfaceDecl *Canon = ID->getCanonicalDecl(); if (Canon->Data.getPointer()) { // If we already have a definition, keep the definition invariant and // merge the data. MergeDefinitionData(Canon, std::move(ID->data())); ID->Data = Canon->Data; } else { // Set the definition data of the canonical declaration, so other // redeclarations will see it. ID->getCanonicalDecl()->Data = ID->Data; // We will rebuild this list lazily. ID->setIvarList(nullptr); } // Note that we have deserialized a definition. Reader.PendingDefinitions.insert(ID); // Note that we've loaded this Objective-C class. Reader.ObjCClassesLoaded.push_back(ID); } else { ID->Data = ID->getCanonicalDecl()->Data; } } void ASTDeclReader::VisitObjCIvarDecl(ObjCIvarDecl *IVD) { VisitFieldDecl(IVD); IVD->setAccessControl((ObjCIvarDecl::AccessControl)Record.readInt()); // This field will be built lazily. IVD->setNextIvar(nullptr); bool synth = Record.readInt(); IVD->setSynthesize(synth); } void ASTDeclReader::ReadObjCDefinitionData( struct ObjCProtocolDecl::DefinitionData &Data) { unsigned NumProtoRefs = Record.readInt(); SmallVector ProtoRefs; ProtoRefs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoRefs.push_back(readDeclAs()); SmallVector ProtoLocs; ProtoLocs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoLocs.push_back(readSourceLocation()); Data.ReferencedProtocols.set(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(), Reader.getContext()); } void ASTDeclReader::MergeDefinitionData(ObjCProtocolDecl *D, struct ObjCProtocolDecl::DefinitionData &&NewDD) { // FIXME: odr checking? } void ASTDeclReader::VisitObjCProtocolDecl(ObjCProtocolDecl *PD) { RedeclarableResult Redecl = VisitRedeclarable(PD); VisitObjCContainerDecl(PD); mergeRedeclarable(PD, Redecl); if (Record.readInt()) { // Read the definition. PD->allocateDefinitionData(); ReadObjCDefinitionData(PD->data()); ObjCProtocolDecl *Canon = PD->getCanonicalDecl(); if (Canon->Data.getPointer()) { // If we already have a definition, keep the definition invariant and // merge the data. MergeDefinitionData(Canon, std::move(PD->data())); PD->Data = Canon->Data; } else { // Set the definition data of the canonical declaration, so other // redeclarations will see it. PD->getCanonicalDecl()->Data = PD->Data; } // Note that we have deserialized a definition. Reader.PendingDefinitions.insert(PD); } else { PD->Data = PD->getCanonicalDecl()->Data; } } void ASTDeclReader::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *FD) { VisitFieldDecl(FD); } void ASTDeclReader::VisitObjCCategoryDecl(ObjCCategoryDecl *CD) { VisitObjCContainerDecl(CD); CD->setCategoryNameLoc(readSourceLocation()); CD->setIvarLBraceLoc(readSourceLocation()); CD->setIvarRBraceLoc(readSourceLocation()); // Note that this category has been deserialized. We do this before // deserializing the interface declaration, so that it will consider this /// category. Reader.CategoriesDeserialized.insert(CD); CD->ClassInterface = readDeclAs(); CD->TypeParamList = ReadObjCTypeParamList(); unsigned NumProtoRefs = Record.readInt(); SmallVector ProtoRefs; ProtoRefs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoRefs.push_back(readDeclAs()); SmallVector ProtoLocs; ProtoLocs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoLocs.push_back(readSourceLocation()); CD->setProtocolList(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(), Reader.getContext()); // Protocols in the class extension belong to the class. if (NumProtoRefs > 0 && CD->ClassInterface && CD->IsClassExtension()) CD->ClassInterface->mergeClassExtensionProtocolList( (ObjCProtocolDecl *const *)ProtoRefs.data(), NumProtoRefs, Reader.getContext()); } void ASTDeclReader::VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *CAD) { VisitNamedDecl(CAD); CAD->setClassInterface(readDeclAs()); } void ASTDeclReader::VisitObjCPropertyDecl(ObjCPropertyDecl *D) { VisitNamedDecl(D); D->setAtLoc(readSourceLocation()); D->setLParenLoc(readSourceLocation()); QualType T = Record.readType(); TypeSourceInfo *TSI = readTypeSourceInfo(); D->setType(T, TSI); D->setPropertyAttributes((ObjCPropertyAttribute::Kind)Record.readInt()); D->setPropertyAttributesAsWritten( (ObjCPropertyAttribute::Kind)Record.readInt()); D->setPropertyImplementation( (ObjCPropertyDecl::PropertyControl)Record.readInt()); DeclarationName GetterName = Record.readDeclarationName(); SourceLocation GetterLoc = readSourceLocation(); D->setGetterName(GetterName.getObjCSelector(), GetterLoc); DeclarationName SetterName = Record.readDeclarationName(); SourceLocation SetterLoc = readSourceLocation(); D->setSetterName(SetterName.getObjCSelector(), SetterLoc); D->setGetterMethodDecl(readDeclAs()); D->setSetterMethodDecl(readDeclAs()); D->setPropertyIvarDecl(readDeclAs()); } void ASTDeclReader::VisitObjCImplDecl(ObjCImplDecl *D) { VisitObjCContainerDecl(D); D->setClassInterface(readDeclAs()); } void ASTDeclReader::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D) { VisitObjCImplDecl(D); D->CategoryNameLoc = readSourceLocation(); } void ASTDeclReader::VisitObjCImplementationDecl(ObjCImplementationDecl *D) { VisitObjCImplDecl(D); D->setSuperClass(readDeclAs()); D->SuperLoc = readSourceLocation(); D->setIvarLBraceLoc(readSourceLocation()); D->setIvarRBraceLoc(readSourceLocation()); D->setHasNonZeroConstructors(Record.readInt()); D->setHasDestructors(Record.readInt()); D->NumIvarInitializers = Record.readInt(); if (D->NumIvarInitializers) D->IvarInitializers = ReadGlobalOffset(); } void ASTDeclReader::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) { VisitDecl(D); D->setAtLoc(readSourceLocation()); D->setPropertyDecl(readDeclAs()); D->PropertyIvarDecl = readDeclAs(); D->IvarLoc = readSourceLocation(); D->setGetterMethodDecl(readDeclAs()); D->setSetterMethodDecl(readDeclAs()); D->setGetterCXXConstructor(Record.readExpr()); D->setSetterCXXAssignment(Record.readExpr()); } void ASTDeclReader::VisitFieldDecl(FieldDecl *FD) { VisitDeclaratorDecl(FD); FD->Mutable = Record.readInt(); if (auto ISK = static_cast(Record.readInt())) { FD->InitStorage.setInt(ISK); FD->InitStorage.setPointer(ISK == FieldDecl::ISK_CapturedVLAType ? Record.readType().getAsOpaquePtr() : Record.readExpr()); } if (auto *BW = Record.readExpr()) FD->setBitWidth(BW); if (!FD->getDeclName()) { if (auto *Tmpl = readDeclAs()) Reader.getContext().setInstantiatedFromUnnamedFieldDecl(FD, Tmpl); } mergeMergeable(FD); } void ASTDeclReader::VisitMSPropertyDecl(MSPropertyDecl *PD) { VisitDeclaratorDecl(PD); PD->GetterId = Record.readIdentifier(); PD->SetterId = Record.readIdentifier(); } void ASTDeclReader::VisitMSGuidDecl(MSGuidDecl *D) { VisitValueDecl(D); D->PartVal.Part1 = Record.readInt(); D->PartVal.Part2 = Record.readInt(); D->PartVal.Part3 = Record.readInt(); for (auto &C : D->PartVal.Part4And5) C = Record.readInt(); // Add this GUID to the AST context's lookup structure, and merge if needed. if (MSGuidDecl *Existing = Reader.getContext().MSGuidDecls.GetOrInsertNode(D)) Reader.getContext().setPrimaryMergedDecl(D, Existing->getCanonicalDecl()); } void ASTDeclReader::VisitIndirectFieldDecl(IndirectFieldDecl *FD) { VisitValueDecl(FD); FD->ChainingSize = Record.readInt(); assert(FD->ChainingSize >= 2 && "Anonymous chaining must be >= 2"); FD->Chaining = new (Reader.getContext())NamedDecl*[FD->ChainingSize]; for (unsigned I = 0; I != FD->ChainingSize; ++I) FD->Chaining[I] = readDeclAs(); mergeMergeable(FD); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) { RedeclarableResult Redecl = VisitRedeclarable(VD); VisitDeclaratorDecl(VD); VD->VarDeclBits.SClass = (StorageClass)Record.readInt(); VD->VarDeclBits.TSCSpec = Record.readInt(); VD->VarDeclBits.InitStyle = Record.readInt(); VD->VarDeclBits.ARCPseudoStrong = Record.readInt(); if (!isa(VD)) { VD->NonParmVarDeclBits.IsThisDeclarationADemotedDefinition = Record.readInt(); VD->NonParmVarDeclBits.ExceptionVar = Record.readInt(); VD->NonParmVarDeclBits.NRVOVariable = Record.readInt(); VD->NonParmVarDeclBits.CXXForRangeDecl = Record.readInt(); VD->NonParmVarDeclBits.ObjCForDecl = Record.readInt(); VD->NonParmVarDeclBits.IsInline = Record.readInt(); VD->NonParmVarDeclBits.IsInlineSpecified = Record.readInt(); VD->NonParmVarDeclBits.IsConstexpr = Record.readInt(); VD->NonParmVarDeclBits.IsInitCapture = Record.readInt(); VD->NonParmVarDeclBits.PreviousDeclInSameBlockScope = Record.readInt(); VD->NonParmVarDeclBits.ImplicitParamKind = Record.readInt(); VD->NonParmVarDeclBits.EscapingByref = Record.readInt(); } auto VarLinkage = Linkage(Record.readInt()); VD->setCachedLinkage(VarLinkage); // Reconstruct the one piece of the IdentifierNamespace that we need. if (VD->getStorageClass() == SC_Extern && VarLinkage != NoLinkage && VD->getLexicalDeclContext()->isFunctionOrMethod()) VD->setLocalExternDecl(); if (uint64_t Val = Record.readInt()) { VD->setInit(Record.readExpr()); if (Val > 1) { EvaluatedStmt *Eval = VD->ensureEvaluatedStmt(); Eval->CheckedICE = true; Eval->IsICE = (Val & 1) != 0; Eval->HasConstantDestruction = (Val & 4) != 0; } } if (VD->hasAttr() && VD->getType()->getAsCXXRecordDecl()) { Expr *CopyExpr = Record.readExpr(); if (CopyExpr) Reader.getContext().setBlockVarCopyInit(VD, CopyExpr, Record.readInt()); } if (VD->getStorageDuration() == SD_Static && Record.readInt()) { Reader.DefinitionSource[VD] = Loc.F->Kind == ModuleKind::MK_MainFile; if (Reader.getContext().getLangOpts().BuildingPCHWithObjectFile && Reader.DeclIsFromPCHWithObjectFile(VD)) Reader.DefinitionSource[VD] = true; } enum VarKind { VarNotTemplate = 0, VarTemplate, StaticDataMemberSpecialization }; switch ((VarKind)Record.readInt()) { case VarNotTemplate: // Only true variables (not parameters or implicit parameters) can be // merged; the other kinds are not really redeclarable at all. if (!isa(VD) && !isa(VD) && !isa(VD)) mergeRedeclarable(VD, Redecl); break; case VarTemplate: // Merged when we merge the template. VD->setDescribedVarTemplate(readDeclAs()); break; case StaticDataMemberSpecialization: { // HasMemberSpecializationInfo. auto *Tmpl = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); Reader.getContext().setInstantiatedFromStaticDataMember(VD, Tmpl, TSK,POI); mergeRedeclarable(VD, Redecl); break; } } return Redecl; } void ASTDeclReader::VisitImplicitParamDecl(ImplicitParamDecl *PD) { VisitVarDecl(PD); } void ASTDeclReader::VisitParmVarDecl(ParmVarDecl *PD) { VisitVarDecl(PD); unsigned isObjCMethodParam = Record.readInt(); unsigned scopeDepth = Record.readInt(); unsigned scopeIndex = Record.readInt(); unsigned declQualifier = Record.readInt(); if (isObjCMethodParam) { assert(scopeDepth == 0); PD->setObjCMethodScopeInfo(scopeIndex); PD->ParmVarDeclBits.ScopeDepthOrObjCQuals = declQualifier; } else { PD->setScopeInfo(scopeDepth, scopeIndex); } PD->ParmVarDeclBits.IsKNRPromoted = Record.readInt(); PD->ParmVarDeclBits.HasInheritedDefaultArg = Record.readInt(); if (Record.readInt()) // hasUninstantiatedDefaultArg. PD->setUninstantiatedDefaultArg(Record.readExpr()); // FIXME: If this is a redeclaration of a function from another module, handle // inheritance of default arguments. } void ASTDeclReader::VisitDecompositionDecl(DecompositionDecl *DD) { VisitVarDecl(DD); auto **BDs = DD->getTrailingObjects(); for (unsigned I = 0; I != DD->NumBindings; ++I) { BDs[I] = readDeclAs(); BDs[I]->setDecomposedDecl(DD); } } void ASTDeclReader::VisitBindingDecl(BindingDecl *BD) { VisitValueDecl(BD); BD->Binding = Record.readExpr(); } void ASTDeclReader::VisitFileScopeAsmDecl(FileScopeAsmDecl *AD) { VisitDecl(AD); AD->setAsmString(cast(Record.readExpr())); AD->setRParenLoc(readSourceLocation()); } void ASTDeclReader::VisitBlockDecl(BlockDecl *BD) { VisitDecl(BD); BD->setBody(cast_or_null(Record.readStmt())); BD->setSignatureAsWritten(readTypeSourceInfo()); unsigned NumParams = Record.readInt(); SmallVector Params; Params.reserve(NumParams); for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); BD->setParams(Params); BD->setIsVariadic(Record.readInt()); BD->setBlockMissingReturnType(Record.readInt()); BD->setIsConversionFromLambda(Record.readInt()); BD->setDoesNotEscape(Record.readInt()); BD->setCanAvoidCopyToHeap(Record.readInt()); bool capturesCXXThis = Record.readInt(); unsigned numCaptures = Record.readInt(); SmallVector captures; captures.reserve(numCaptures); for (unsigned i = 0; i != numCaptures; ++i) { auto *decl = readDeclAs(); unsigned flags = Record.readInt(); bool byRef = (flags & 1); bool nested = (flags & 2); Expr *copyExpr = ((flags & 4) ? Record.readExpr() : nullptr); captures.push_back(BlockDecl::Capture(decl, byRef, nested, copyExpr)); } BD->setCaptures(Reader.getContext(), captures, capturesCXXThis); } void ASTDeclReader::VisitCapturedDecl(CapturedDecl *CD) { VisitDecl(CD); unsigned ContextParamPos = Record.readInt(); CD->setNothrow(Record.readInt() != 0); // Body is set by VisitCapturedStmt. for (unsigned I = 0; I < CD->NumParams; ++I) { if (I != ContextParamPos) CD->setParam(I, readDeclAs()); else CD->setContextParam(I, readDeclAs()); } } void ASTDeclReader::VisitLinkageSpecDecl(LinkageSpecDecl *D) { VisitDecl(D); D->setLanguage((LinkageSpecDecl::LanguageIDs)Record.readInt()); D->setExternLoc(readSourceLocation()); D->setRBraceLoc(readSourceLocation()); } void ASTDeclReader::VisitExportDecl(ExportDecl *D) { VisitDecl(D); D->RBraceLoc = readSourceLocation(); } void ASTDeclReader::VisitLabelDecl(LabelDecl *D) { VisitNamedDecl(D); D->setLocStart(readSourceLocation()); } void ASTDeclReader::VisitNamespaceDecl(NamespaceDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); VisitNamedDecl(D); D->setInline(Record.readInt()); D->LocStart = readSourceLocation(); D->RBraceLoc = readSourceLocation(); // Defer loading the anonymous namespace until we've finished merging // this namespace; loading it might load a later declaration of the // same namespace, and we have an invariant that older declarations // get merged before newer ones try to merge. GlobalDeclID AnonNamespace = 0; if (Redecl.getFirstID() == ThisDeclID) { AnonNamespace = readDeclID(); } else { // Link this namespace back to the first declaration, which has already // been deserialized. D->AnonOrFirstNamespaceAndInline.setPointer(D->getFirstDecl()); } mergeRedeclarable(D, Redecl); if (AnonNamespace) { // Each module has its own anonymous namespace, which is disjoint from // any other module's anonymous namespaces, so don't attach the anonymous // namespace at all. auto *Anon = cast(Reader.GetDecl(AnonNamespace)); if (!Record.isModule()) D->setAnonymousNamespace(Anon); } } void ASTDeclReader::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); VisitNamedDecl(D); D->NamespaceLoc = readSourceLocation(); D->IdentLoc = readSourceLocation(); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->Namespace = readDeclAs(); mergeRedeclarable(D, Redecl); } void ASTDeclReader::VisitUsingDecl(UsingDecl *D) { VisitNamedDecl(D); D->setUsingLoc(readSourceLocation()); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->DNLoc = Record.readDeclarationNameLoc(D->getDeclName()); D->FirstUsingShadow.setPointer(readDeclAs()); D->setTypename(Record.readInt()); if (auto *Pattern = readDeclAs()) Reader.getContext().setInstantiatedFromUsingDecl(D, Pattern); mergeMergeable(D); } void ASTDeclReader::VisitUsingPackDecl(UsingPackDecl *D) { VisitNamedDecl(D); D->InstantiatedFrom = readDeclAs(); auto **Expansions = D->getTrailingObjects(); for (unsigned I = 0; I != D->NumExpansions; ++I) Expansions[I] = readDeclAs(); mergeMergeable(D); } void ASTDeclReader::VisitUsingShadowDecl(UsingShadowDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); VisitNamedDecl(D); D->Underlying = readDeclAs(); D->IdentifierNamespace = Record.readInt(); D->UsingOrNextShadow = readDeclAs(); auto *Pattern = readDeclAs(); if (Pattern) Reader.getContext().setInstantiatedFromUsingShadowDecl(D, Pattern); mergeRedeclarable(D, Redecl); } void ASTDeclReader::VisitConstructorUsingShadowDecl( ConstructorUsingShadowDecl *D) { VisitUsingShadowDecl(D); D->NominatedBaseClassShadowDecl = readDeclAs(); D->ConstructedBaseClassShadowDecl = readDeclAs(); D->IsVirtual = Record.readInt(); } void ASTDeclReader::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) { VisitNamedDecl(D); D->UsingLoc = readSourceLocation(); D->NamespaceLoc = readSourceLocation(); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->NominatedNamespace = readDeclAs(); D->CommonAncestor = readDeclAs(); } void ASTDeclReader::VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D) { VisitValueDecl(D); D->setUsingLoc(readSourceLocation()); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->DNLoc = Record.readDeclarationNameLoc(D->getDeclName()); D->EllipsisLoc = readSourceLocation(); mergeMergeable(D); } void ASTDeclReader::VisitUnresolvedUsingTypenameDecl( UnresolvedUsingTypenameDecl *D) { VisitTypeDecl(D); D->TypenameLocation = readSourceLocation(); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->EllipsisLoc = readSourceLocation(); mergeMergeable(D); } void ASTDeclReader::ReadCXXDefinitionData( struct CXXRecordDecl::DefinitionData &Data, const CXXRecordDecl *D) { #define FIELD(Name, Width, Merge) \ Data.Name = Record.readInt(); #include "clang/AST/CXXRecordDeclDefinitionBits.def" // Note: the caller has deserialized the IsLambda bit already. Data.ODRHash = Record.readInt(); Data.HasODRHash = true; if (Record.readInt()) { Reader.DefinitionSource[D] = Loc.F->Kind == ModuleKind::MK_MainFile; if (Reader.getContext().getLangOpts().BuildingPCHWithObjectFile && Reader.DeclIsFromPCHWithObjectFile(D)) Reader.DefinitionSource[D] = true; } Data.NumBases = Record.readInt(); if (Data.NumBases) Data.Bases = ReadGlobalOffset(); Data.NumVBases = Record.readInt(); if (Data.NumVBases) Data.VBases = ReadGlobalOffset(); Record.readUnresolvedSet(Data.Conversions); Data.ComputedVisibleConversions = Record.readInt(); if (Data.ComputedVisibleConversions) Record.readUnresolvedSet(Data.VisibleConversions); assert(Data.Definition && "Data.Definition should be already set!"); Data.FirstFriend = readDeclID(); if (Data.IsLambda) { using Capture = LambdaCapture; auto &Lambda = static_cast(Data); Lambda.Dependent = Record.readInt(); Lambda.IsGenericLambda = Record.readInt(); Lambda.CaptureDefault = Record.readInt(); Lambda.NumCaptures = Record.readInt(); Lambda.NumExplicitCaptures = Record.readInt(); Lambda.HasKnownInternalLinkage = Record.readInt(); Lambda.ManglingNumber = Record.readInt(); Lambda.ContextDecl = readDeclID(); Lambda.Captures = (Capture *)Reader.getContext().Allocate( sizeof(Capture) * Lambda.NumCaptures); Capture *ToCapture = Lambda.Captures; Lambda.MethodTyInfo = readTypeSourceInfo(); for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) { SourceLocation Loc = readSourceLocation(); bool IsImplicit = Record.readInt(); auto Kind = static_cast(Record.readInt()); switch (Kind) { case LCK_StarThis: case LCK_This: case LCK_VLAType: *ToCapture++ = Capture(Loc, IsImplicit, Kind, nullptr,SourceLocation()); break; case LCK_ByCopy: case LCK_ByRef: auto *Var = readDeclAs(); SourceLocation EllipsisLoc = readSourceLocation(); *ToCapture++ = Capture(Loc, IsImplicit, Kind, Var, EllipsisLoc); break; } } } } void ASTDeclReader::MergeDefinitionData( CXXRecordDecl *D, struct CXXRecordDecl::DefinitionData &&MergeDD) { assert(D->DefinitionData && "merging class definition into non-definition"); auto &DD = *D->DefinitionData; if (DD.Definition != MergeDD.Definition) { // Track that we merged the definitions. Reader.MergedDeclContexts.insert(std::make_pair(MergeDD.Definition, DD.Definition)); Reader.PendingDefinitions.erase(MergeDD.Definition); MergeDD.Definition->setCompleteDefinition(false); Reader.mergeDefinitionVisibility(DD.Definition, MergeDD.Definition); assert(Reader.Lookups.find(MergeDD.Definition) == Reader.Lookups.end() && "already loaded pending lookups for merged definition"); } auto PFDI = Reader.PendingFakeDefinitionData.find(&DD); if (PFDI != Reader.PendingFakeDefinitionData.end() && PFDI->second == ASTReader::PendingFakeDefinitionKind::Fake) { // We faked up this definition data because we found a class for which we'd // not yet loaded the definition. Replace it with the real thing now. assert(!DD.IsLambda && !MergeDD.IsLambda && "faked up lambda definition?"); PFDI->second = ASTReader::PendingFakeDefinitionKind::FakeLoaded; // Don't change which declaration is the definition; that is required // to be invariant once we select it. auto *Def = DD.Definition; DD = std::move(MergeDD); DD.Definition = Def; return; } bool DetectedOdrViolation = false; #define FIELD(Name, Width, Merge) Merge(Name) #define MERGE_OR(Field) DD.Field |= MergeDD.Field; #define NO_MERGE(Field) \ DetectedOdrViolation |= DD.Field != MergeDD.Field; \ MERGE_OR(Field) #include "clang/AST/CXXRecordDeclDefinitionBits.def" NO_MERGE(IsLambda) #undef NO_MERGE #undef MERGE_OR if (DD.NumBases != MergeDD.NumBases || DD.NumVBases != MergeDD.NumVBases) DetectedOdrViolation = true; // FIXME: Issue a diagnostic if the base classes don't match when we come // to lazily load them. // FIXME: Issue a diagnostic if the list of conversion functions doesn't // match when we come to lazily load them. if (MergeDD.ComputedVisibleConversions && !DD.ComputedVisibleConversions) { DD.VisibleConversions = std::move(MergeDD.VisibleConversions); DD.ComputedVisibleConversions = true; } // FIXME: Issue a diagnostic if FirstFriend doesn't match when we come to // lazily load it. if (DD.IsLambda) { // FIXME: ODR-checking for merging lambdas (this happens, for instance, // when they occur within the body of a function template specialization). } if (D->getODRHash() != MergeDD.ODRHash) { DetectedOdrViolation = true; } if (DetectedOdrViolation) Reader.PendingOdrMergeFailures[DD.Definition].push_back( {MergeDD.Definition, &MergeDD}); } void ASTDeclReader::ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update) { struct CXXRecordDecl::DefinitionData *DD; ASTContext &C = Reader.getContext(); // Determine whether this is a lambda closure type, so that we can // allocate the appropriate DefinitionData structure. bool IsLambda = Record.readInt(); if (IsLambda) DD = new (C) CXXRecordDecl::LambdaDefinitionData(D, nullptr, false, false, LCD_None); else DD = new (C) struct CXXRecordDecl::DefinitionData(D); CXXRecordDecl *Canon = D->getCanonicalDecl(); // Set decl definition data before reading it, so that during deserialization // when we read CXXRecordDecl, it already has definition data and we don't // set fake one. if (!Canon->DefinitionData) Canon->DefinitionData = DD; D->DefinitionData = Canon->DefinitionData; ReadCXXDefinitionData(*DD, D); // We might already have a different definition for this record. This can // happen either because we're reading an update record, or because we've // already done some merging. Either way, just merge into it. if (Canon->DefinitionData != DD) { MergeDefinitionData(Canon, std::move(*DD)); return; } // Mark this declaration as being a definition. D->setCompleteDefinition(true); // If this is not the first declaration or is an update record, we can have // other redeclarations already. Make a note that we need to propagate the // DefinitionData pointer onto them. if (Update || Canon != D) Reader.PendingDefinitions.insert(D); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitCXXRecordDeclImpl(CXXRecordDecl *D) { RedeclarableResult Redecl = VisitRecordDeclImpl(D); ASTContext &C = Reader.getContext(); enum CXXRecKind { CXXRecNotTemplate = 0, CXXRecTemplate, CXXRecMemberSpecialization }; switch ((CXXRecKind)Record.readInt()) { case CXXRecNotTemplate: // Merged when we merge the folding set entry in the primary template. if (!isa(D)) mergeRedeclarable(D, Redecl); break; case CXXRecTemplate: { // Merged when we merge the template. auto *Template = readDeclAs(); D->TemplateOrInstantiation = Template; if (!Template->getTemplatedDecl()) { // We've not actually loaded the ClassTemplateDecl yet, because we're // currently being loaded as its pattern. Rely on it to set up our // TypeForDecl (see VisitClassTemplateDecl). // // Beware: we do not yet know our canonical declaration, and may still // get merged once the surrounding class template has got off the ground. DeferredTypeID = 0; } break; } case CXXRecMemberSpecialization: { auto *RD = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); MemberSpecializationInfo *MSI = new (C) MemberSpecializationInfo(RD, TSK); MSI->setPointOfInstantiation(POI); D->TemplateOrInstantiation = MSI; mergeRedeclarable(D, Redecl); break; } } bool WasDefinition = Record.readInt(); if (WasDefinition) ReadCXXRecordDefinition(D, /*Update*/false); else // Propagate DefinitionData pointer from the canonical declaration. D->DefinitionData = D->getCanonicalDecl()->DefinitionData; // Lazily load the key function to avoid deserializing every method so we can // compute it. if (WasDefinition) { DeclID KeyFn = readDeclID(); if (KeyFn && D->isCompleteDefinition()) // FIXME: This is wrong for the ARM ABI, where some other module may have // made this function no longer be a key function. We need an update // record or similar for that case. C.KeyFunctions[D] = KeyFn; } return Redecl; } void ASTDeclReader::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) { D->setExplicitSpecifier(Record.readExplicitSpec()); VisitFunctionDecl(D); D->setIsCopyDeductionCandidate(Record.readInt()); } void ASTDeclReader::VisitCXXMethodDecl(CXXMethodDecl *D) { VisitFunctionDecl(D); unsigned NumOverridenMethods = Record.readInt(); if (D->isCanonicalDecl()) { while (NumOverridenMethods--) { // Avoid invariant checking of CXXMethodDecl::addOverriddenMethod, // MD may be initializing. if (auto *MD = readDeclAs()) Reader.getContext().addOverriddenMethod(D, MD->getCanonicalDecl()); } } else { // We don't care about which declarations this used to override; we get // the relevant information from the canonical declaration. Record.skipInts(NumOverridenMethods); } } void ASTDeclReader::VisitCXXConstructorDecl(CXXConstructorDecl *D) { // We need the inherited constructor information to merge the declaration, // so we have to read it before we call VisitCXXMethodDecl. D->setExplicitSpecifier(Record.readExplicitSpec()); if (D->isInheritingConstructor()) { auto *Shadow = readDeclAs(); auto *Ctor = readDeclAs(); *D->getTrailingObjects() = InheritedConstructor(Shadow, Ctor); } VisitCXXMethodDecl(D); } void ASTDeclReader::VisitCXXDestructorDecl(CXXDestructorDecl *D) { VisitCXXMethodDecl(D); if (auto *OperatorDelete = readDeclAs()) { CXXDestructorDecl *Canon = D->getCanonicalDecl(); auto *ThisArg = Record.readExpr(); // FIXME: Check consistency if we have an old and new operator delete. if (!Canon->OperatorDelete) { Canon->OperatorDelete = OperatorDelete; Canon->OperatorDeleteThisArg = ThisArg; } } } void ASTDeclReader::VisitCXXConversionDecl(CXXConversionDecl *D) { D->setExplicitSpecifier(Record.readExplicitSpec()); VisitCXXMethodDecl(D); } void ASTDeclReader::VisitImportDecl(ImportDecl *D) { VisitDecl(D); D->ImportedModule = readModule(); D->setImportComplete(Record.readInt()); auto *StoredLocs = D->getTrailingObjects(); for (unsigned I = 0, N = Record.back(); I != N; ++I) StoredLocs[I] = readSourceLocation(); Record.skipInts(1); // The number of stored source locations. } void ASTDeclReader::VisitAccessSpecDecl(AccessSpecDecl *D) { VisitDecl(D); D->setColonLoc(readSourceLocation()); } void ASTDeclReader::VisitFriendDecl(FriendDecl *D) { VisitDecl(D); if (Record.readInt()) // hasFriendDecl D->Friend = readDeclAs(); else D->Friend = readTypeSourceInfo(); for (unsigned i = 0; i != D->NumTPLists; ++i) D->getTrailingObjects()[i] = Record.readTemplateParameterList(); D->NextFriend = readDeclID(); D->UnsupportedFriend = (Record.readInt() != 0); D->FriendLoc = readSourceLocation(); } void ASTDeclReader::VisitFriendTemplateDecl(FriendTemplateDecl *D) { VisitDecl(D); unsigned NumParams = Record.readInt(); D->NumParams = NumParams; D->Params = new TemplateParameterList*[NumParams]; for (unsigned i = 0; i != NumParams; ++i) D->Params[i] = Record.readTemplateParameterList(); if (Record.readInt()) // HasFriendDecl D->Friend = readDeclAs(); else D->Friend = readTypeSourceInfo(); D->FriendLoc = readSourceLocation(); } DeclID ASTDeclReader::VisitTemplateDecl(TemplateDecl *D) { VisitNamedDecl(D); DeclID PatternID = readDeclID(); auto *TemplatedDecl = cast_or_null(Reader.GetDecl(PatternID)); TemplateParameterList *TemplateParams = Record.readTemplateParameterList(); D->init(TemplatedDecl, TemplateParams); return PatternID; } void ASTDeclReader::VisitConceptDecl(ConceptDecl *D) { VisitTemplateDecl(D); D->ConstraintExpr = Record.readExpr(); mergeMergeable(D); } void ASTDeclReader::VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D) { } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); // Make sure we've allocated the Common pointer first. We do this before // VisitTemplateDecl so that getCommonPtr() can be used during initialization. RedeclarableTemplateDecl *CanonD = D->getCanonicalDecl(); if (!CanonD->Common) { CanonD->Common = CanonD->newCommon(Reader.getContext()); Reader.PendingDefinitions.insert(CanonD); } D->Common = CanonD->Common; // If this is the first declaration of the template, fill in the information // for the 'common' pointer. if (ThisDeclID == Redecl.getFirstID()) { if (auto *RTD = readDeclAs()) { assert(RTD->getKind() == D->getKind() && "InstantiatedFromMemberTemplate kind mismatch"); D->setInstantiatedFromMemberTemplate(RTD); if (Record.readInt()) D->setMemberSpecialization(); } } DeclID PatternID = VisitTemplateDecl(D); D->IdentifierNamespace = Record.readInt(); mergeRedeclarable(D, Redecl, PatternID); // If we merged the template with a prior declaration chain, merge the common // pointer. // FIXME: Actually merge here, don't just overwrite. D->Common = D->getCanonicalDecl()->Common; return Redecl; } void ASTDeclReader::VisitClassTemplateDecl(ClassTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); if (ThisDeclID == Redecl.getFirstID()) { // This ClassTemplateDecl owns a CommonPtr; read it to keep track of all of // the specializations. SmallVector SpecIDs; readDeclIDList(SpecIDs); ASTDeclReader::AddLazySpecializations(D, SpecIDs); } if (D->getTemplatedDecl()->TemplateOrInstantiation) { // We were loaded before our templated declaration was. We've not set up // its corresponding type yet (see VisitCXXRecordDeclImpl), so reconstruct // it now. Reader.getContext().getInjectedClassNameType( D->getTemplatedDecl(), D->getInjectedClassNameSpecialization()); } } void ASTDeclReader::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) { llvm_unreachable("BuiltinTemplates are not serialized"); } /// TODO: Unify with ClassTemplateDecl version? /// May require unifying ClassTemplateDecl and /// VarTemplateDecl beyond TemplateDecl... void ASTDeclReader::VisitVarTemplateDecl(VarTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); if (ThisDeclID == Redecl.getFirstID()) { // This VarTemplateDecl owns a CommonPtr; read it to keep track of all of // the specializations. SmallVector SpecIDs; readDeclIDList(SpecIDs); ASTDeclReader::AddLazySpecializations(D, SpecIDs); } } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitClassTemplateSpecializationDeclImpl( ClassTemplateSpecializationDecl *D) { RedeclarableResult Redecl = VisitCXXRecordDeclImpl(D); ASTContext &C = Reader.getContext(); if (Decl *InstD = readDecl()) { if (auto *CTD = dyn_cast(InstD)) { D->SpecializedTemplate = CTD; } else { SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs); TemplateArgumentList *ArgList = TemplateArgumentList::CreateCopy(C, TemplArgs); auto *PS = new (C) ClassTemplateSpecializationDecl:: SpecializedPartialSpecialization(); PS->PartialSpecialization = cast(InstD); PS->TemplateArgs = ArgList; D->SpecializedTemplate = PS; } } SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true); D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs); D->PointOfInstantiation = readSourceLocation(); D->SpecializationKind = (TemplateSpecializationKind)Record.readInt(); bool writtenAsCanonicalDecl = Record.readInt(); if (writtenAsCanonicalDecl) { auto *CanonPattern = readDeclAs(); if (D->isCanonicalDecl()) { // It's kept in the folding set. // Set this as, or find, the canonical declaration for this specialization ClassTemplateSpecializationDecl *CanonSpec; if (auto *Partial = dyn_cast(D)) { CanonSpec = CanonPattern->getCommonPtr()->PartialSpecializations .GetOrInsertNode(Partial); } else { CanonSpec = CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D); } // If there was already a canonical specialization, merge into it. if (CanonSpec != D) { mergeRedeclarable(D, CanonSpec, Redecl); // This declaration might be a definition. Merge with any existing // definition. if (auto *DDD = D->DefinitionData) { if (CanonSpec->DefinitionData) MergeDefinitionData(CanonSpec, std::move(*DDD)); else CanonSpec->DefinitionData = D->DefinitionData; } D->DefinitionData = CanonSpec->DefinitionData; } } } // Explicit info. if (TypeSourceInfo *TyInfo = readTypeSourceInfo()) { auto *ExplicitInfo = new (C) ClassTemplateSpecializationDecl::ExplicitSpecializationInfo; ExplicitInfo->TypeAsWritten = TyInfo; ExplicitInfo->ExternLoc = readSourceLocation(); ExplicitInfo->TemplateKeywordLoc = readSourceLocation(); D->ExplicitInfo = ExplicitInfo; } return Redecl; } void ASTDeclReader::VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D) { // We need to read the template params first because redeclarable is going to // need them for profiling TemplateParameterList *Params = Record.readTemplateParameterList(); D->TemplateParams = Params; D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo(); RedeclarableResult Redecl = VisitClassTemplateSpecializationDeclImpl(D); // These are read/set from/to the first declaration. if (ThisDeclID == Redecl.getFirstID()) { D->InstantiatedFromMember.setPointer( readDeclAs()); D->InstantiatedFromMember.setInt(Record.readInt()); } } void ASTDeclReader::VisitClassScopeFunctionSpecializationDecl( ClassScopeFunctionSpecializationDecl *D) { VisitDecl(D); D->Specialization = readDeclAs(); if (Record.readInt()) D->TemplateArgs = Record.readASTTemplateArgumentListInfo(); } void ASTDeclReader::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); if (ThisDeclID == Redecl.getFirstID()) { // This FunctionTemplateDecl owns a CommonPtr; read it. SmallVector SpecIDs; readDeclIDList(SpecIDs); ASTDeclReader::AddLazySpecializations(D, SpecIDs); } } /// TODO: Unify with ClassTemplateSpecializationDecl version? /// May require unifying ClassTemplate(Partial)SpecializationDecl and /// VarTemplate(Partial)SpecializationDecl with a new data /// structure Template(Partial)SpecializationDecl, and /// using Template(Partial)SpecializationDecl as input type. ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarTemplateSpecializationDeclImpl( VarTemplateSpecializationDecl *D) { RedeclarableResult Redecl = VisitVarDeclImpl(D); ASTContext &C = Reader.getContext(); if (Decl *InstD = readDecl()) { if (auto *VTD = dyn_cast(InstD)) { D->SpecializedTemplate = VTD; } else { SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs); TemplateArgumentList *ArgList = TemplateArgumentList::CreateCopy( C, TemplArgs); auto *PS = new (C) VarTemplateSpecializationDecl::SpecializedPartialSpecialization(); PS->PartialSpecialization = cast(InstD); PS->TemplateArgs = ArgList; D->SpecializedTemplate = PS; } } // Explicit info. if (TypeSourceInfo *TyInfo = readTypeSourceInfo()) { auto *ExplicitInfo = new (C) VarTemplateSpecializationDecl::ExplicitSpecializationInfo; ExplicitInfo->TypeAsWritten = TyInfo; ExplicitInfo->ExternLoc = readSourceLocation(); ExplicitInfo->TemplateKeywordLoc = readSourceLocation(); D->ExplicitInfo = ExplicitInfo; } SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true); D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs); D->PointOfInstantiation = readSourceLocation(); D->SpecializationKind = (TemplateSpecializationKind)Record.readInt(); D->IsCompleteDefinition = Record.readInt(); bool writtenAsCanonicalDecl = Record.readInt(); if (writtenAsCanonicalDecl) { auto *CanonPattern = readDeclAs(); if (D->isCanonicalDecl()) { // It's kept in the folding set. // FIXME: If it's already present, merge it. if (auto *Partial = dyn_cast(D)) { CanonPattern->getCommonPtr()->PartialSpecializations .GetOrInsertNode(Partial); } else { CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D); } } } return Redecl; } /// TODO: Unify with ClassTemplatePartialSpecializationDecl version? /// May require unifying ClassTemplate(Partial)SpecializationDecl and /// VarTemplate(Partial)SpecializationDecl with a new data /// structure Template(Partial)SpecializationDecl, and /// using Template(Partial)SpecializationDecl as input type. void ASTDeclReader::VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D) { TemplateParameterList *Params = Record.readTemplateParameterList(); D->TemplateParams = Params; D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo(); RedeclarableResult Redecl = VisitVarTemplateSpecializationDeclImpl(D); // These are read/set from/to the first declaration. if (ThisDeclID == Redecl.getFirstID()) { D->InstantiatedFromMember.setPointer( readDeclAs()); D->InstantiatedFromMember.setInt(Record.readInt()); } } void ASTDeclReader::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) { VisitTypeDecl(D); D->setDeclaredWithTypename(Record.readInt()); if (Record.readBool()) { NestedNameSpecifierLoc NNS = Record.readNestedNameSpecifierLoc(); DeclarationNameInfo DN = Record.readDeclarationNameInfo(); ConceptDecl *NamedConcept = Record.readDeclAs(); const ASTTemplateArgumentListInfo *ArgsAsWritten = nullptr; if (Record.readBool()) ArgsAsWritten = Record.readASTTemplateArgumentListInfo(); Expr *ImmediatelyDeclaredConstraint = Record.readExpr(); D->setTypeConstraint(NNS, DN, /*FoundDecl=*/nullptr, NamedConcept, ArgsAsWritten, ImmediatelyDeclaredConstraint); if ((D->ExpandedParameterPack = Record.readInt())) D->NumExpanded = Record.readInt(); } if (Record.readInt()) D->setDefaultArgument(readTypeSourceInfo()); } void ASTDeclReader::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) { VisitDeclaratorDecl(D); // TemplateParmPosition. D->setDepth(Record.readInt()); D->setPosition(Record.readInt()); if (D->hasPlaceholderTypeConstraint()) D->setPlaceholderTypeConstraint(Record.readExpr()); if (D->isExpandedParameterPack()) { auto TypesAndInfos = D->getTrailingObjects>(); for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) { new (&TypesAndInfos[I].first) QualType(Record.readType()); TypesAndInfos[I].second = readTypeSourceInfo(); } } else { // Rest of NonTypeTemplateParmDecl. D->ParameterPack = Record.readInt(); if (Record.readInt()) D->setDefaultArgument(Record.readExpr()); } } void ASTDeclReader::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) { VisitTemplateDecl(D); // TemplateParmPosition. D->setDepth(Record.readInt()); D->setPosition(Record.readInt()); if (D->isExpandedParameterPack()) { auto **Data = D->getTrailingObjects(); for (unsigned I = 0, N = D->getNumExpansionTemplateParameters(); I != N; ++I) Data[I] = Record.readTemplateParameterList(); } else { // Rest of TemplateTemplateParmDecl. D->ParameterPack = Record.readInt(); if (Record.readInt()) D->setDefaultArgument(Reader.getContext(), Record.readTemplateArgumentLoc()); } } void ASTDeclReader::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) { VisitRedeclarableTemplateDecl(D); } void ASTDeclReader::VisitStaticAssertDecl(StaticAssertDecl *D) { VisitDecl(D); D->AssertExprAndFailed.setPointer(Record.readExpr()); D->AssertExprAndFailed.setInt(Record.readInt()); D->Message = cast_or_null(Record.readExpr()); D->RParenLoc = readSourceLocation(); } void ASTDeclReader::VisitEmptyDecl(EmptyDecl *D) { VisitDecl(D); } void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl( LifetimeExtendedTemporaryDecl *D) { VisitDecl(D); D->ExtendingDecl = readDeclAs(); D->ExprWithTemporary = Record.readStmt(); if (Record.readInt()) D->Value = new (D->getASTContext()) APValue(Record.readAPValue()); D->ManglingNumber = Record.readInt(); mergeMergeable(D); } std::pair ASTDeclReader::VisitDeclContext(DeclContext *DC) { uint64_t LexicalOffset = ReadLocalOffset(); uint64_t VisibleOffset = ReadLocalOffset(); return std::make_pair(LexicalOffset, VisibleOffset); } template ASTDeclReader::RedeclarableResult ASTDeclReader::VisitRedeclarable(Redeclarable *D) { DeclID FirstDeclID = readDeclID(); Decl *MergeWith = nullptr; bool IsKeyDecl = ThisDeclID == FirstDeclID; bool IsFirstLocalDecl = false; uint64_t RedeclOffset = 0; // 0 indicates that this declaration was the only declaration of its entity, // and is used for space optimization. if (FirstDeclID == 0) { FirstDeclID = ThisDeclID; IsKeyDecl = true; IsFirstLocalDecl = true; } else if (unsigned N = Record.readInt()) { // This declaration was the first local declaration, but may have imported // other declarations. IsKeyDecl = N == 1; IsFirstLocalDecl = true; // We have some declarations that must be before us in our redeclaration // chain. Read them now, and remember that we ought to merge with one of // them. // FIXME: Provide a known merge target to the second and subsequent such // declaration. for (unsigned I = 0; I != N - 1; ++I) MergeWith = readDecl(); RedeclOffset = ReadLocalOffset(); } else { // This declaration was not the first local declaration. Read the first // local declaration now, to trigger the import of other redeclarations. (void)readDecl(); } auto *FirstDecl = cast_or_null(Reader.GetDecl(FirstDeclID)); if (FirstDecl != D) { // We delay loading of the redeclaration chain to avoid deeply nested calls. // We temporarily set the first (canonical) declaration as the previous one // which is the one that matters and mark the real previous DeclID to be // loaded & attached later on. D->RedeclLink = Redeclarable::PreviousDeclLink(FirstDecl); D->First = FirstDecl->getCanonicalDecl(); } auto *DAsT = static_cast(D); // Note that we need to load local redeclarations of this decl and build a // decl chain for them. This must happen *after* we perform the preloading // above; this ensures that the redeclaration chain is built in the correct // order. if (IsFirstLocalDecl) Reader.PendingDeclChains.push_back(std::make_pair(DAsT, RedeclOffset)); return RedeclarableResult(MergeWith, FirstDeclID, IsKeyDecl); } /// Attempts to merge the given declaration (D) with another declaration /// of the same entity. template void ASTDeclReader::mergeRedeclarable(Redeclarable *DBase, RedeclarableResult &Redecl, DeclID TemplatePatternID) { // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; // If we're not the canonical declaration, we don't need to merge. if (!DBase->isFirstDecl()) return; auto *D = static_cast(DBase); if (auto *Existing = Redecl.getKnownMergeTarget()) // We already know of an existing declaration we should merge with. mergeRedeclarable(D, cast(Existing), Redecl, TemplatePatternID); else if (FindExistingResult ExistingRes = findExisting(D)) if (T *Existing = ExistingRes) mergeRedeclarable(D, Existing, Redecl, TemplatePatternID); } /// "Cast" to type T, asserting if we don't have an implicit conversion. /// We use this to put code in a template that will only be valid for certain /// instantiations. template static T assert_cast(T t) { return t; } template static T assert_cast(...) { llvm_unreachable("bad assert_cast"); } /// Merge together the pattern declarations from two template /// declarations. void ASTDeclReader::mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, DeclID DsID, bool IsKeyDecl) { auto *DPattern = D->getTemplatedDecl(); auto *ExistingPattern = Existing->getTemplatedDecl(); RedeclarableResult Result(/*MergeWith*/ ExistingPattern, DPattern->getCanonicalDecl()->getGlobalID(), IsKeyDecl); if (auto *DClass = dyn_cast(DPattern)) { // Merge with any existing definition. // FIXME: This is duplicated in several places. Refactor. auto *ExistingClass = cast(ExistingPattern)->getCanonicalDecl(); if (auto *DDD = DClass->DefinitionData) { if (ExistingClass->DefinitionData) { MergeDefinitionData(ExistingClass, std::move(*DDD)); } else { ExistingClass->DefinitionData = DClass->DefinitionData; // We may have skipped this before because we thought that DClass // was the canonical declaration. Reader.PendingDefinitions.insert(DClass); } } DClass->DefinitionData = ExistingClass->DefinitionData; return mergeRedeclarable(DClass, cast(ExistingPattern), Result); } if (auto *DFunction = dyn_cast(DPattern)) return mergeRedeclarable(DFunction, cast(ExistingPattern), Result); if (auto *DVar = dyn_cast(DPattern)) return mergeRedeclarable(DVar, cast(ExistingPattern), Result); if (auto *DAlias = dyn_cast(DPattern)) return mergeRedeclarable(DAlias, cast(ExistingPattern), Result); llvm_unreachable("merged an unknown kind of redeclarable template"); } /// Attempts to merge the given declaration (D) with another declaration /// of the same entity. template void ASTDeclReader::mergeRedeclarable(Redeclarable *DBase, T *Existing, RedeclarableResult &Redecl, DeclID TemplatePatternID) { auto *D = static_cast(DBase); T *ExistingCanon = Existing->getCanonicalDecl(); T *DCanon = D->getCanonicalDecl(); if (ExistingCanon != DCanon) { assert(DCanon->getGlobalID() == Redecl.getFirstID() && "already merged this declaration"); // Have our redeclaration link point back at the canonical declaration // of the existing declaration, so that this declaration has the // appropriate canonical declaration. D->RedeclLink = Redeclarable::PreviousDeclLink(ExistingCanon); D->First = ExistingCanon; ExistingCanon->Used |= D->Used; D->Used = false; // When we merge a namespace, update its pointer to the first namespace. // We cannot have loaded any redeclarations of this declaration yet, so // there's nothing else that needs to be updated. if (auto *Namespace = dyn_cast(D)) Namespace->AnonOrFirstNamespaceAndInline.setPointer( assert_cast(ExistingCanon)); // When we merge a template, merge its pattern. if (auto *DTemplate = dyn_cast(D)) mergeTemplatePattern( DTemplate, assert_cast(ExistingCanon), TemplatePatternID, Redecl.isKeyDecl()); // If this declaration is a key declaration, make a note of that. if (Redecl.isKeyDecl()) Reader.KeyDecls[ExistingCanon].push_back(Redecl.getFirstID()); } } /// ODR-like semantics for C/ObjC allow us to merge tag types and a structural /// check in Sema guarantees the types can be merged (see C11 6.2.7/1 or C89 /// 6.1.2.6/1). Although most merging is done in Sema, we need to guarantee /// that some types are mergeable during deserialization, otherwise name /// lookup fails. This is the case for EnumConstantDecl. static bool allowODRLikeMergeInC(NamedDecl *ND) { if (!ND) return false; // TODO: implement merge for other necessary decls. if (isa(ND)) return true; return false; } /// Attempts to merge LifetimeExtendedTemporaryDecl with /// identical class definitions from two different modules. void ASTDeclReader::mergeMergeable(LifetimeExtendedTemporaryDecl *D) { // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; LifetimeExtendedTemporaryDecl *LETDecl = D; LifetimeExtendedTemporaryDecl *&LookupResult = Reader.LETemporaryForMerging[std::make_pair( LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())]; if (LookupResult) Reader.getContext().setPrimaryMergedDecl(LETDecl, LookupResult->getCanonicalDecl()); else LookupResult = LETDecl; } /// Attempts to merge the given declaration (D) with another declaration /// of the same entity, for the case where the entity is not actually /// redeclarable. This happens, for instance, when merging the fields of /// identical class definitions from two different modules. template void ASTDeclReader::mergeMergeable(Mergeable *D) { // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; // ODR-based merging is performed in C++ and in some cases (tag types) in C. // Note that C identically-named things in different translation units are // not redeclarations, but may still have compatible types, where ODR-like // semantics may apply. if (!Reader.getContext().getLangOpts().CPlusPlus && !allowODRLikeMergeInC(dyn_cast(static_cast(D)))) return; if (FindExistingResult ExistingRes = findExisting(static_cast(D))) if (T *Existing = ExistingRes) Reader.getContext().setPrimaryMergedDecl(static_cast(D), Existing->getCanonicalDecl()); } void ASTDeclReader::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) { VisitDecl(D); unsigned NumVars = D->varlist_size(); SmallVector Vars; Vars.reserve(NumVars); for (unsigned i = 0; i != NumVars; ++i) { Vars.push_back(Record.readExpr()); } D->setVars(Vars); } void ASTDeclReader::VisitOMPAllocateDecl(OMPAllocateDecl *D) { VisitDecl(D); unsigned NumVars = D->varlist_size(); unsigned NumClauses = D->clauselist_size(); SmallVector Vars; Vars.reserve(NumVars); for (unsigned i = 0; i != NumVars; ++i) { Vars.push_back(Record.readExpr()); } D->setVars(Vars); SmallVector Clauses; Clauses.reserve(NumClauses); for (unsigned I = 0; I != NumClauses; ++I) Clauses.push_back(Record.readOMPClause()); D->setClauses(Clauses); } void ASTDeclReader::VisitOMPRequiresDecl(OMPRequiresDecl * D) { VisitDecl(D); unsigned NumClauses = D->clauselist_size(); SmallVector Clauses; Clauses.reserve(NumClauses); for (unsigned I = 0; I != NumClauses; ++I) Clauses.push_back(Record.readOMPClause()); D->setClauses(Clauses); } void ASTDeclReader::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) { VisitValueDecl(D); D->setLocation(readSourceLocation()); Expr *In = Record.readExpr(); Expr *Out = Record.readExpr(); D->setCombinerData(In, Out); Expr *Combiner = Record.readExpr(); D->setCombiner(Combiner); Expr *Orig = Record.readExpr(); Expr *Priv = Record.readExpr(); D->setInitializerData(Orig, Priv); Expr *Init = Record.readExpr(); auto IK = static_cast(Record.readInt()); D->setInitializer(Init, IK); D->PrevDeclInScope = readDeclID(); } void ASTDeclReader::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { VisitValueDecl(D); D->setLocation(readSourceLocation()); Expr *MapperVarRefE = Record.readExpr(); D->setMapperVarRef(MapperVarRefE); D->VarName = Record.readDeclarationName(); D->PrevDeclInScope = readDeclID(); unsigned NumClauses = D->clauselist_size(); SmallVector Clauses; Clauses.reserve(NumClauses); for (unsigned I = 0; I != NumClauses; ++I) Clauses.push_back(Record.readOMPClause()); D->setClauses(Clauses); } void ASTDeclReader::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) { VisitVarDecl(D); } //===----------------------------------------------------------------------===// // Attribute Reading //===----------------------------------------------------------------------===// namespace { class AttrReader { ASTRecordReader &Reader; public: AttrReader(ASTRecordReader &Reader) : Reader(Reader) {} uint64_t readInt() { return Reader.readInt(); } SourceRange readSourceRange() { return Reader.readSourceRange(); } SourceLocation readSourceLocation() { return Reader.readSourceLocation(); } Expr *readExpr() { return Reader.readExpr(); } std::string readString() { return Reader.readString(); } TypeSourceInfo *readTypeSourceInfo() { return Reader.readTypeSourceInfo(); } IdentifierInfo *readIdentifier() { return Reader.readIdentifier(); } VersionTuple readVersionTuple() { return Reader.readVersionTuple(); } OMPTraitInfo *readOMPTraitInfo() { return Reader.readOMPTraitInfo(); } template T *GetLocalDeclAs(uint32_t LocalID) { return Reader.GetLocalDeclAs(LocalID); } }; } Attr *ASTRecordReader::readAttr() { AttrReader Record(*this); auto V = Record.readInt(); if (!V) return nullptr; Attr *New = nullptr; // Kind is stored as a 1-based integer because 0 is used to indicate a null // Attr pointer. auto Kind = static_cast(V - 1); ASTContext &Context = getContext(); IdentifierInfo *AttrName = Record.readIdentifier(); IdentifierInfo *ScopeName = Record.readIdentifier(); SourceRange AttrRange = Record.readSourceRange(); SourceLocation ScopeLoc = Record.readSourceLocation(); unsigned ParsedKind = Record.readInt(); unsigned Syntax = Record.readInt(); unsigned SpellingIndex = Record.readInt(); AttributeCommonInfo Info(AttrName, ScopeName, AttrRange, ScopeLoc, AttributeCommonInfo::Kind(ParsedKind), AttributeCommonInfo::Syntax(Syntax), SpellingIndex); #include "clang/Serialization/AttrPCHRead.inc" assert(New && "Unable to decode attribute?"); return New; } /// Reads attributes from the current stream position. void ASTRecordReader::readAttributes(AttrVec &Attrs) { for (unsigned I = 0, E = readInt(); I != E; ++I) Attrs.push_back(readAttr()); } //===----------------------------------------------------------------------===// // ASTReader Implementation //===----------------------------------------------------------------------===// /// Note that we have loaded the declaration with the given /// Index. /// /// This routine notes that this declaration has already been loaded, /// so that future GetDecl calls will return this declaration rather /// than trying to load a new declaration. inline void ASTReader::LoadedDecl(unsigned Index, Decl *D) { assert(!DeclsLoaded[Index] && "Decl loaded twice?"); DeclsLoaded[Index] = D; } /// Determine whether the consumer will be interested in seeing /// this declaration (via HandleTopLevelDecl). /// /// This routine should return true for anything that might affect /// code generation, e.g., inline function definitions, Objective-C /// declarations with metadata, etc. static bool isConsumerInterestedIn(ASTContext &Ctx, Decl *D, bool HasBody) { // An ObjCMethodDecl is never considered as "interesting" because its // implementation container always is. // An ImportDecl or VarDecl imported from a module map module will get // emitted when we import the relevant module. if (isPartOfPerModuleInitializer(D)) { auto *M = D->getImportedOwningModule(); if (M && M->Kind == Module::ModuleMapModule && Ctx.DeclMustBeEmitted(D)) return false; } if (isa(D) || isa(D) || isa(D) || isa(D) || isa(D) || isa(D)) return true; if (isa(D) || isa(D) || isa(D) || isa(D) || isa(D)) return !D->getDeclContext()->isFunctionOrMethod(); if (const auto *Var = dyn_cast(D)) return Var->isFileVarDecl() && (Var->isThisDeclarationADefinition() == VarDecl::Definition || OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Var)); if (const auto *Func = dyn_cast(D)) return Func->doesThisDeclarationHaveABody() || HasBody; if (auto *ES = D->getASTContext().getExternalSource()) if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) return true; return false; } /// Get the correct cursor and offset for loading a declaration. ASTReader::RecordLocation ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) { GlobalDeclMapType::iterator I = GlobalDeclMap.find(ID); assert(I != GlobalDeclMap.end() && "Corrupted global declaration map"); ModuleFile *M = I->second; const DeclOffset &DOffs = M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS]; Loc = TranslateSourceLocation(*M, DOffs.getLocation()); return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset)); } ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) { auto I = GlobalBitOffsetsMap.find(GlobalOffset); assert(I != GlobalBitOffsetsMap.end() && "Corrupted global bit offsets map"); return RecordLocation(I->second, GlobalOffset - I->second->GlobalBitOffset); } uint64_t ASTReader::getGlobalBitOffset(ModuleFile &M, uint64_t LocalOffset) { return LocalOffset + M.GlobalBitOffset; } static bool isSameTemplateParameterList(const ASTContext &C, const TemplateParameterList *X, const TemplateParameterList *Y); /// Determine whether two template parameters are similar enough /// that they may be used in declarations of the same template. static bool isSameTemplateParameter(const NamedDecl *X, const NamedDecl *Y) { if (X->getKind() != Y->getKind()) return false; if (const auto *TX = dyn_cast(X)) { const auto *TY = cast(Y); if (TX->isParameterPack() != TY->isParameterPack()) return false; if (TX->hasTypeConstraint() != TY->hasTypeConstraint()) return false; - if (TX->hasTypeConstraint()) { - const TypeConstraint *TXTC = TX->getTypeConstraint(); - const TypeConstraint *TYTC = TY->getTypeConstraint(); + const TypeConstraint *TXTC = TX->getTypeConstraint(); + const TypeConstraint *TYTC = TY->getTypeConstraint(); + if (!TXTC != !TYTC) + return false; + if (TXTC && TYTC) { if (TXTC->getNamedConcept() != TYTC->getNamedConcept()) return false; if (TXTC->hasExplicitTemplateArgs() != TYTC->hasExplicitTemplateArgs()) return false; if (TXTC->hasExplicitTemplateArgs()) { const auto *TXTCArgs = TXTC->getTemplateArgsAsWritten(); const auto *TYTCArgs = TYTC->getTemplateArgsAsWritten(); if (TXTCArgs->NumTemplateArgs != TYTCArgs->NumTemplateArgs) return false; llvm::FoldingSetNodeID XID, YID; for (const auto &ArgLoc : TXTCArgs->arguments()) ArgLoc.getArgument().Profile(XID, X->getASTContext()); for (const auto &ArgLoc : TYTCArgs->arguments()) ArgLoc.getArgument().Profile(YID, Y->getASTContext()); if (XID != YID) return false; } } return true; } if (const auto *TX = dyn_cast(X)) { const auto *TY = cast(Y); return TX->isParameterPack() == TY->isParameterPack() && TX->getASTContext().hasSameType(TX->getType(), TY->getType()); } const auto *TX = cast(X); const auto *TY = cast(Y); return TX->isParameterPack() == TY->isParameterPack() && isSameTemplateParameterList(TX->getASTContext(), TX->getTemplateParameters(), TY->getTemplateParameters()); } static NamespaceDecl *getNamespace(const NestedNameSpecifier *X) { if (auto *NS = X->getAsNamespace()) return NS; if (auto *NAS = X->getAsNamespaceAlias()) return NAS->getNamespace(); return nullptr; } static bool isSameQualifier(const NestedNameSpecifier *X, const NestedNameSpecifier *Y) { if (auto *NSX = getNamespace(X)) { auto *NSY = getNamespace(Y); if (!NSY || NSX->getCanonicalDecl() != NSY->getCanonicalDecl()) return false; } else if (X->getKind() != Y->getKind()) return false; // FIXME: For namespaces and types, we're permitted to check that the entity // is named via the same tokens. We should probably do so. switch (X->getKind()) { case NestedNameSpecifier::Identifier: if (X->getAsIdentifier() != Y->getAsIdentifier()) return false; break; case NestedNameSpecifier::Namespace: case NestedNameSpecifier::NamespaceAlias: // We've already checked that we named the same namespace. break; case NestedNameSpecifier::TypeSpec: case NestedNameSpecifier::TypeSpecWithTemplate: if (X->getAsType()->getCanonicalTypeInternal() != Y->getAsType()->getCanonicalTypeInternal()) return false; break; case NestedNameSpecifier::Global: case NestedNameSpecifier::Super: return true; } // Recurse into earlier portion of NNS, if any. auto *PX = X->getPrefix(); auto *PY = Y->getPrefix(); if (PX && PY) return isSameQualifier(PX, PY); return !PX && !PY; } /// Determine whether two template parameter lists are similar enough /// that they may be used in declarations of the same template. static bool isSameTemplateParameterList(const ASTContext &C, const TemplateParameterList *X, const TemplateParameterList *Y) { if (X->size() != Y->size()) return false; for (unsigned I = 0, N = X->size(); I != N; ++I) if (!isSameTemplateParameter(X->getParam(I), Y->getParam(I))) return false; const Expr *XRC = X->getRequiresClause(); const Expr *YRC = Y->getRequiresClause(); if (!XRC != !YRC) return false; if (XRC) { llvm::FoldingSetNodeID XRCID, YRCID; XRC->Profile(XRCID, C, /*Canonical=*/true); YRC->Profile(YRCID, C, /*Canonical=*/true); if (XRCID != YRCID) return false; } return true; } /// Determine whether the attributes we can overload on are identical for A and /// B. Will ignore any overloadable attrs represented in the type of A and B. static bool hasSameOverloadableAttrs(const FunctionDecl *A, const FunctionDecl *B) { // Note that pass_object_size attributes are represented in the function's // ExtParameterInfo, so we don't need to check them here. llvm::FoldingSetNodeID Cand1ID, Cand2ID; auto AEnableIfAttrs = A->specific_attrs(); auto BEnableIfAttrs = B->specific_attrs(); for (auto Pair : zip_longest(AEnableIfAttrs, BEnableIfAttrs)) { Optional Cand1A = std::get<0>(Pair); Optional Cand2A = std::get<1>(Pair); // Return false if the number of enable_if attributes is different. if (!Cand1A || !Cand2A) return false; Cand1ID.clear(); Cand2ID.clear(); (*Cand1A)->getCond()->Profile(Cand1ID, A->getASTContext(), true); (*Cand2A)->getCond()->Profile(Cand2ID, B->getASTContext(), true); // Return false if any of the enable_if expressions of A and B are // different. if (Cand1ID != Cand2ID) return false; } return true; } /// Determine whether the two declarations refer to the same entity.pr static bool isSameEntity(NamedDecl *X, NamedDecl *Y) { assert(X->getDeclName() == Y->getDeclName() && "Declaration name mismatch!"); if (X == Y) return true; // Must be in the same context. // // Note that we can't use DeclContext::Equals here, because the DeclContexts // could be two different declarations of the same function. (We will fix the // semantic DC to refer to the primary definition after merging.) if (!declaresSameEntity(cast(X->getDeclContext()->getRedeclContext()), cast(Y->getDeclContext()->getRedeclContext()))) return false; // Two typedefs refer to the same entity if they have the same underlying // type. if (const auto *TypedefX = dyn_cast(X)) if (const auto *TypedefY = dyn_cast(Y)) return X->getASTContext().hasSameType(TypedefX->getUnderlyingType(), TypedefY->getUnderlyingType()); // Must have the same kind. if (X->getKind() != Y->getKind()) return false; // Objective-C classes and protocols with the same name always match. if (isa(X) || isa(X)) return true; if (isa(X)) { // No need to handle these here: we merge them when adding them to the // template. return false; } // Compatible tags match. if (const auto *TagX = dyn_cast(X)) { const auto *TagY = cast(Y); return (TagX->getTagKind() == TagY->getTagKind()) || ((TagX->getTagKind() == TTK_Struct || TagX->getTagKind() == TTK_Class || TagX->getTagKind() == TTK_Interface) && (TagY->getTagKind() == TTK_Struct || TagY->getTagKind() == TTK_Class || TagY->getTagKind() == TTK_Interface)); } // Functions with the same type and linkage match. // FIXME: This needs to cope with merging of prototyped/non-prototyped // functions, etc. if (const auto *FuncX = dyn_cast(X)) { const auto *FuncY = cast(Y); if (const auto *CtorX = dyn_cast(X)) { const auto *CtorY = cast(Y); if (CtorX->getInheritedConstructor() && !isSameEntity(CtorX->getInheritedConstructor().getConstructor(), CtorY->getInheritedConstructor().getConstructor())) return false; } if (FuncX->isMultiVersion() != FuncY->isMultiVersion()) return false; // Multiversioned functions with different feature strings are represented // as separate declarations. if (FuncX->isMultiVersion()) { const auto *TAX = FuncX->getAttr(); const auto *TAY = FuncY->getAttr(); assert(TAX && TAY && "Multiversion Function without target attribute"); if (TAX->getFeaturesStr() != TAY->getFeaturesStr()) return false; } ASTContext &C = FuncX->getASTContext(); const Expr *XRC = FuncX->getTrailingRequiresClause(); const Expr *YRC = FuncY->getTrailingRequiresClause(); if (!XRC != !YRC) return false; if (XRC) { llvm::FoldingSetNodeID XRCID, YRCID; XRC->Profile(XRCID, C, /*Canonical=*/true); YRC->Profile(YRCID, C, /*Canonical=*/true); if (XRCID != YRCID) return false; } auto GetTypeAsWritten = [](const FunctionDecl *FD) { // Map to the first declaration that we've already merged into this one. // The TSI of redeclarations might not match (due to calling conventions // being inherited onto the type but not the TSI), but the TSI type of // the first declaration of the function should match across modules. FD = FD->getCanonicalDecl(); return FD->getTypeSourceInfo() ? FD->getTypeSourceInfo()->getType() : FD->getType(); }; QualType XT = GetTypeAsWritten(FuncX), YT = GetTypeAsWritten(FuncY); if (!C.hasSameType(XT, YT)) { // We can get functions with different types on the redecl chain in C++17 // if they have differing exception specifications and at least one of // the excpetion specs is unresolved. auto *XFPT = XT->getAs(); auto *YFPT = YT->getAs(); if (C.getLangOpts().CPlusPlus17 && XFPT && YFPT && (isUnresolvedExceptionSpec(XFPT->getExceptionSpecType()) || isUnresolvedExceptionSpec(YFPT->getExceptionSpecType())) && C.hasSameFunctionTypeIgnoringExceptionSpec(XT, YT)) return true; return false; } return FuncX->getLinkageInternal() == FuncY->getLinkageInternal() && hasSameOverloadableAttrs(FuncX, FuncY); } // Variables with the same type and linkage match. if (const auto *VarX = dyn_cast(X)) { const auto *VarY = cast(Y); if (VarX->getLinkageInternal() == VarY->getLinkageInternal()) { ASTContext &C = VarX->getASTContext(); if (C.hasSameType(VarX->getType(), VarY->getType())) return true; // We can get decls with different types on the redecl chain. Eg. // template struct S { static T Var[]; }; // #1 // template T S::Var[sizeof(T)]; // #2 // Only? happens when completing an incomplete array type. In this case // when comparing #1 and #2 we should go through their element type. const ArrayType *VarXTy = C.getAsArrayType(VarX->getType()); const ArrayType *VarYTy = C.getAsArrayType(VarY->getType()); if (!VarXTy || !VarYTy) return false; if (VarXTy->isIncompleteArrayType() || VarYTy->isIncompleteArrayType()) return C.hasSameType(VarXTy->getElementType(), VarYTy->getElementType()); } return false; } // Namespaces with the same name and inlinedness match. if (const auto *NamespaceX = dyn_cast(X)) { const auto *NamespaceY = cast(Y); return NamespaceX->isInline() == NamespaceY->isInline(); } // Identical template names and kinds match if their template parameter lists // and patterns match. if (const auto *TemplateX = dyn_cast(X)) { const auto *TemplateY = cast(Y); return isSameEntity(TemplateX->getTemplatedDecl(), TemplateY->getTemplatedDecl()) && isSameTemplateParameterList(TemplateX->getASTContext(), TemplateX->getTemplateParameters(), TemplateY->getTemplateParameters()); } // Fields with the same name and the same type match. if (const auto *FDX = dyn_cast(X)) { const auto *FDY = cast(Y); // FIXME: Also check the bitwidth is odr-equivalent, if any. return X->getASTContext().hasSameType(FDX->getType(), FDY->getType()); } // Indirect fields with the same target field match. if (const auto *IFDX = dyn_cast(X)) { const auto *IFDY = cast(Y); return IFDX->getAnonField()->getCanonicalDecl() == IFDY->getAnonField()->getCanonicalDecl(); } // Enumerators with the same name match. if (isa(X)) // FIXME: Also check the value is odr-equivalent. return true; // Using shadow declarations with the same target match. if (const auto *USX = dyn_cast(X)) { const auto *USY = cast(Y); return USX->getTargetDecl() == USY->getTargetDecl(); } // Using declarations with the same qualifier match. (We already know that // the name matches.) if (const auto *UX = dyn_cast(X)) { const auto *UY = cast(Y); return isSameQualifier(UX->getQualifier(), UY->getQualifier()) && UX->hasTypename() == UY->hasTypename() && UX->isAccessDeclaration() == UY->isAccessDeclaration(); } if (const auto *UX = dyn_cast(X)) { const auto *UY = cast(Y); return isSameQualifier(UX->getQualifier(), UY->getQualifier()) && UX->isAccessDeclaration() == UY->isAccessDeclaration(); } if (const auto *UX = dyn_cast(X)) return isSameQualifier( UX->getQualifier(), cast(Y)->getQualifier()); // Namespace alias definitions with the same target match. if (const auto *NAX = dyn_cast(X)) { const auto *NAY = cast(Y); return NAX->getNamespace()->Equals(NAY->getNamespace()); } return false; } /// Find the context in which we should search for previous declarations when /// looking for declarations to merge. DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader, DeclContext *DC) { if (auto *ND = dyn_cast(DC)) return ND->getOriginalNamespace(); if (auto *RD = dyn_cast(DC)) { // Try to dig out the definition. auto *DD = RD->DefinitionData; if (!DD) DD = RD->getCanonicalDecl()->DefinitionData; // If there's no definition yet, then DC's definition is added by an update // record, but we've not yet loaded that update record. In this case, we // commit to DC being the canonical definition now, and will fix this when // we load the update record. if (!DD) { DD = new (Reader.getContext()) struct CXXRecordDecl::DefinitionData(RD); RD->setCompleteDefinition(true); RD->DefinitionData = DD; RD->getCanonicalDecl()->DefinitionData = DD; // Track that we did this horrible thing so that we can fix it later. Reader.PendingFakeDefinitionData.insert( std::make_pair(DD, ASTReader::PendingFakeDefinitionKind::Fake)); } return DD->Definition; } if (auto *ED = dyn_cast(DC)) return ED->getASTContext().getLangOpts().CPlusPlus? ED->getDefinition() : nullptr; // We can see the TU here only if we have no Sema object. In that case, // there's no TU scope to look in, so using the DC alone is sufficient. if (auto *TU = dyn_cast(DC)) return TU; return nullptr; } ASTDeclReader::FindExistingResult::~FindExistingResult() { // Record that we had a typedef name for linkage whether or not we merge // with that declaration. if (TypedefNameForLinkage) { DeclContext *DC = New->getDeclContext()->getRedeclContext(); Reader.ImportedTypedefNamesForLinkage.insert( std::make_pair(std::make_pair(DC, TypedefNameForLinkage), New)); return; } if (!AddResult || Existing) return; DeclarationName Name = New->getDeclName(); DeclContext *DC = New->getDeclContext()->getRedeclContext(); if (needsAnonymousDeclarationNumber(New)) { setAnonymousDeclForMerging(Reader, New->getLexicalDeclContext(), AnonymousDeclNumber, New); } else if (DC->isTranslationUnit() && !Reader.getContext().getLangOpts().CPlusPlus) { if (Reader.getIdResolver().tryAddTopLevelDecl(New, Name)) Reader.PendingFakeLookupResults[Name.getAsIdentifierInfo()] .push_back(New); } else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) { // Add the declaration to its redeclaration context so later merging // lookups will find it. MergeDC->makeDeclVisibleInContextImpl(New, /*Internal*/true); } } /// Find the declaration that should be merged into, given the declaration found /// by name lookup. If we're merging an anonymous declaration within a typedef, /// we need a matching typedef, and we merge with the type inside it. static NamedDecl *getDeclForMerging(NamedDecl *Found, bool IsTypedefNameForLinkage) { if (!IsTypedefNameForLinkage) return Found; // If we found a typedef declaration that gives a name to some other // declaration, then we want that inner declaration. Declarations from // AST files are handled via ImportedTypedefNamesForLinkage. if (Found->isFromASTFile()) return nullptr; if (auto *TND = dyn_cast(Found)) return TND->getAnonDeclWithTypedefName(/*AnyRedecl*/true); return nullptr; } /// Find the declaration to use to populate the anonymous declaration table /// for the given lexical DeclContext. We only care about finding local /// definitions of the context; we'll merge imported ones as we go. DeclContext * ASTDeclReader::getPrimaryDCForAnonymousDecl(DeclContext *LexicalDC) { // For classes, we track the definition as we merge. if (auto *RD = dyn_cast(LexicalDC)) { auto *DD = RD->getCanonicalDecl()->DefinitionData; return DD ? DD->Definition : nullptr; } // For anything else, walk its merged redeclarations looking for a definition. // Note that we can't just call getDefinition here because the redeclaration // chain isn't wired up. for (auto *D : merged_redecls(cast(LexicalDC))) { if (auto *FD = dyn_cast(D)) if (FD->isThisDeclarationADefinition()) return FD; if (auto *MD = dyn_cast(D)) if (MD->isThisDeclarationADefinition()) return MD; } // No merged definition yet. return nullptr; } NamedDecl *ASTDeclReader::getAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index) { // If the lexical context has been merged, look into the now-canonical // definition. auto *CanonDC = cast(DC)->getCanonicalDecl(); // If we've seen this before, return the canonical declaration. auto &Previous = Reader.AnonymousDeclarationsForMerging[CanonDC]; if (Index < Previous.size() && Previous[Index]) return Previous[Index]; // If this is the first time, but we have parsed a declaration of the context, // build the anonymous declaration list from the parsed declaration. auto *PrimaryDC = getPrimaryDCForAnonymousDecl(DC); if (PrimaryDC && !cast(PrimaryDC)->isFromASTFile()) { numberAnonymousDeclsWithin(PrimaryDC, [&](NamedDecl *ND, unsigned Number) { if (Previous.size() == Number) Previous.push_back(cast(ND->getCanonicalDecl())); else Previous[Number] = cast(ND->getCanonicalDecl()); }); } return Index < Previous.size() ? Previous[Index] : nullptr; } void ASTDeclReader::setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index, NamedDecl *D) { auto *CanonDC = cast(DC)->getCanonicalDecl(); auto &Previous = Reader.AnonymousDeclarationsForMerging[CanonDC]; if (Index >= Previous.size()) Previous.resize(Index + 1); if (!Previous[Index]) Previous[Index] = D; } ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) { DeclarationName Name = TypedefNameForLinkage ? TypedefNameForLinkage : D->getDeclName(); if (!Name && !needsAnonymousDeclarationNumber(D)) { // Don't bother trying to find unnamed declarations that are in // unmergeable contexts. FindExistingResult Result(Reader, D, /*Existing=*/nullptr, AnonymousDeclNumber, TypedefNameForLinkage); Result.suppress(); return Result; } DeclContext *DC = D->getDeclContext()->getRedeclContext(); if (TypedefNameForLinkage) { auto It = Reader.ImportedTypedefNamesForLinkage.find( std::make_pair(DC, TypedefNameForLinkage)); if (It != Reader.ImportedTypedefNamesForLinkage.end()) if (isSameEntity(It->second, D)) return FindExistingResult(Reader, D, It->second, AnonymousDeclNumber, TypedefNameForLinkage); // Go on to check in other places in case an existing typedef name // was not imported. } if (needsAnonymousDeclarationNumber(D)) { // This is an anonymous declaration that we may need to merge. Look it up // in its context by number. if (auto *Existing = getAnonymousDeclForMerging( Reader, D->getLexicalDeclContext(), AnonymousDeclNumber)) if (isSameEntity(Existing, D)) return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber, TypedefNameForLinkage); } else if (DC->isTranslationUnit() && !Reader.getContext().getLangOpts().CPlusPlus) { IdentifierResolver &IdResolver = Reader.getIdResolver(); // Temporarily consider the identifier to be up-to-date. We don't want to // cause additional lookups here. class UpToDateIdentifierRAII { IdentifierInfo *II; bool WasOutToDate = false; public: explicit UpToDateIdentifierRAII(IdentifierInfo *II) : II(II) { if (II) { WasOutToDate = II->isOutOfDate(); if (WasOutToDate) II->setOutOfDate(false); } } ~UpToDateIdentifierRAII() { if (WasOutToDate) II->setOutOfDate(true); } } UpToDate(Name.getAsIdentifierInfo()); for (IdentifierResolver::iterator I = IdResolver.begin(Name), IEnd = IdResolver.end(); I != IEnd; ++I) { if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage)) if (isSameEntity(Existing, D)) return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber, TypedefNameForLinkage); } } else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) { DeclContext::lookup_result R = MergeDC->noload_lookup(Name); for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) { if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage)) if (isSameEntity(Existing, D)) return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber, TypedefNameForLinkage); } } else { // Not in a mergeable context. return FindExistingResult(Reader); } // If this declaration is from a merged context, make a note that we need to // check that the canonical definition of that context contains the decl. // // FIXME: We should do something similar if we merge two definitions of the // same template specialization into the same CXXRecordDecl. auto MergedDCIt = Reader.MergedDeclContexts.find(D->getLexicalDeclContext()); if (MergedDCIt != Reader.MergedDeclContexts.end() && MergedDCIt->second == D->getDeclContext()) Reader.PendingOdrMergeChecks.push_back(D); return FindExistingResult(Reader, D, /*Existing=*/nullptr, AnonymousDeclNumber, TypedefNameForLinkage); } template Decl *ASTDeclReader::getMostRecentDeclImpl(Redeclarable *D) { return D->RedeclLink.getLatestNotUpdated(); } Decl *ASTDeclReader::getMostRecentDeclImpl(...) { llvm_unreachable("getMostRecentDecl on non-redeclarable declaration"); } Decl *ASTDeclReader::getMostRecentDecl(Decl *D) { assert(D); switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ return getMostRecentDeclImpl(cast(D)); #include "clang/AST/DeclNodes.inc" } llvm_unreachable("unknown decl kind"); } Decl *ASTReader::getMostRecentExistingDecl(Decl *D) { return ASTDeclReader::getMostRecentDecl(D->getCanonicalDecl()); } template void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon) { D->RedeclLink.setPrevious(cast(Previous)); D->First = cast(Previous)->First; } namespace clang { template<> void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon) { auto *VD = static_cast(D); auto *PrevVD = cast(Previous); D->RedeclLink.setPrevious(PrevVD); D->First = PrevVD->First; // We should keep at most one definition on the chain. // FIXME: Cache the definition once we've found it. Building a chain with // N definitions currently takes O(N^2) time here. if (VD->isThisDeclarationADefinition() == VarDecl::Definition) { for (VarDecl *CurD = PrevVD; CurD; CurD = CurD->getPreviousDecl()) { if (CurD->isThisDeclarationADefinition() == VarDecl::Definition) { Reader.mergeDefinitionVisibility(CurD, VD); VD->demoteThisDefinitionToDeclaration(); break; } } } } static bool isUndeducedReturnType(QualType T) { auto *DT = T->getContainedDeducedType(); return DT && !DT->isDeduced(); } template<> void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon) { auto *FD = static_cast(D); auto *PrevFD = cast(Previous); FD->RedeclLink.setPrevious(PrevFD); FD->First = PrevFD->First; // If the previous declaration is an inline function declaration, then this // declaration is too. if (PrevFD->isInlined() != FD->isInlined()) { // FIXME: [dcl.fct.spec]p4: // If a function with external linkage is declared inline in one // translation unit, it shall be declared inline in all translation // units in which it appears. // // Be careful of this case: // // module A: // template struct X { void f(); }; // template inline void X::f() {} // // module B instantiates the declaration of X::f // module C instantiates the definition of X::f // // If module B and C are merged, we do not have a violation of this rule. FD->setImplicitlyInline(true); } auto *FPT = FD->getType()->getAs(); auto *PrevFPT = PrevFD->getType()->getAs(); if (FPT && PrevFPT) { // If we need to propagate an exception specification along the redecl // chain, make a note of that so that we can do so later. bool IsUnresolved = isUnresolvedExceptionSpec(FPT->getExceptionSpecType()); bool WasUnresolved = isUnresolvedExceptionSpec(PrevFPT->getExceptionSpecType()); if (IsUnresolved != WasUnresolved) Reader.PendingExceptionSpecUpdates.insert( {Canon, IsUnresolved ? PrevFD : FD}); // If we need to propagate a deduced return type along the redecl chain, // make a note of that so that we can do it later. bool IsUndeduced = isUndeducedReturnType(FPT->getReturnType()); bool WasUndeduced = isUndeducedReturnType(PrevFPT->getReturnType()); if (IsUndeduced != WasUndeduced) Reader.PendingDeducedTypeUpdates.insert( {cast(Canon), (IsUndeduced ? PrevFPT : FPT)->getReturnType()}); } } } // namespace clang void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, ...) { llvm_unreachable("attachPreviousDecl on non-redeclarable declaration"); } /// Inherit the default template argument from \p From to \p To. Returns /// \c false if there is no default template for \p From. template static bool inheritDefaultTemplateArgument(ASTContext &Context, ParmDecl *From, Decl *ToD) { auto *To = cast(ToD); if (!From->hasDefaultArgument()) return false; To->setInheritedDefaultArgument(Context, From); return true; } static void inheritDefaultTemplateArguments(ASTContext &Context, TemplateDecl *From, TemplateDecl *To) { auto *FromTP = From->getTemplateParameters(); auto *ToTP = To->getTemplateParameters(); assert(FromTP->size() == ToTP->size() && "merged mismatched templates?"); for (unsigned I = 0, N = FromTP->size(); I != N; ++I) { NamedDecl *FromParam = FromTP->getParam(I); NamedDecl *ToParam = ToTP->getParam(I); if (auto *FTTP = dyn_cast(FromParam)) inheritDefaultTemplateArgument(Context, FTTP, ToParam); else if (auto *FNTTP = dyn_cast(FromParam)) inheritDefaultTemplateArgument(Context, FNTTP, ToParam); else inheritDefaultTemplateArgument( Context, cast(FromParam), ToParam); } } void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D, Decl *Previous, Decl *Canon) { assert(D && Previous); switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ attachPreviousDeclImpl(Reader, cast(D), Previous, Canon); \ break; #include "clang/AST/DeclNodes.inc" } // If the declaration was visible in one module, a redeclaration of it in // another module remains visible even if it wouldn't be visible by itself. // // FIXME: In this case, the declaration should only be visible if a module // that makes it visible has been imported. D->IdentifierNamespace |= Previous->IdentifierNamespace & (Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Type); // If the declaration declares a template, it may inherit default arguments // from the previous declaration. if (auto *TD = dyn_cast(D)) inheritDefaultTemplateArguments(Reader.getContext(), cast(Previous), TD); } template void ASTDeclReader::attachLatestDeclImpl(Redeclarable *D, Decl *Latest) { D->RedeclLink.setLatest(cast(Latest)); } void ASTDeclReader::attachLatestDeclImpl(...) { llvm_unreachable("attachLatestDecl on non-redeclarable declaration"); } void ASTDeclReader::attachLatestDecl(Decl *D, Decl *Latest) { assert(D && Latest); switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ attachLatestDeclImpl(cast(D), Latest); \ break; #include "clang/AST/DeclNodes.inc" } } template void ASTDeclReader::markIncompleteDeclChainImpl(Redeclarable *D) { D->RedeclLink.markIncomplete(); } void ASTDeclReader::markIncompleteDeclChainImpl(...) { llvm_unreachable("markIncompleteDeclChain on non-redeclarable declaration"); } void ASTReader::markIncompleteDeclChain(Decl *D) { switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ ASTDeclReader::markIncompleteDeclChainImpl(cast(D)); \ break; #include "clang/AST/DeclNodes.inc" } } /// Read the declaration at the given offset from the AST file. Decl *ASTReader::ReadDeclRecord(DeclID ID) { unsigned Index = ID - NUM_PREDEF_DECL_IDS; SourceLocation DeclLoc; RecordLocation Loc = DeclCursorForID(ID, DeclLoc); llvm::BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor; // Keep track of where we are in the stream, then jump back there // after reading this declaration. SavedStreamPosition SavedPosition(DeclsCursor); ReadingKindTracker ReadingKind(Read_Decl, *this); // Note that we are loading a declaration record. Deserializing ADecl(this); auto Fail = [](const char *what, llvm::Error &&Err) { llvm::report_fatal_error(Twine("ASTReader::readDeclRecord failed ") + what + ": " + toString(std::move(Err))); }; if (llvm::Error JumpFailed = DeclsCursor.JumpToBit(Loc.Offset)) Fail("jumping", std::move(JumpFailed)); ASTRecordReader Record(*this, *Loc.F); ASTDeclReader Reader(*this, Record, Loc, ID, DeclLoc); Expected MaybeCode = DeclsCursor.ReadCode(); if (!MaybeCode) Fail("reading code", MaybeCode.takeError()); unsigned Code = MaybeCode.get(); ASTContext &Context = getContext(); Decl *D = nullptr; Expected MaybeDeclCode = Record.readRecord(DeclsCursor, Code); if (!MaybeDeclCode) llvm::report_fatal_error( "ASTReader::readDeclRecord failed reading decl code: " + toString(MaybeDeclCode.takeError())); switch ((DeclCode)MaybeDeclCode.get()) { case DECL_CONTEXT_LEXICAL: case DECL_CONTEXT_VISIBLE: llvm_unreachable("Record cannot be de-serialized with readDeclRecord"); case DECL_TYPEDEF: D = TypedefDecl::CreateDeserialized(Context, ID); break; case DECL_TYPEALIAS: D = TypeAliasDecl::CreateDeserialized(Context, ID); break; case DECL_ENUM: D = EnumDecl::CreateDeserialized(Context, ID); break; case DECL_RECORD: D = RecordDecl::CreateDeserialized(Context, ID); break; case DECL_ENUM_CONSTANT: D = EnumConstantDecl::CreateDeserialized(Context, ID); break; case DECL_FUNCTION: D = FunctionDecl::CreateDeserialized(Context, ID); break; case DECL_LINKAGE_SPEC: D = LinkageSpecDecl::CreateDeserialized(Context, ID); break; case DECL_EXPORT: D = ExportDecl::CreateDeserialized(Context, ID); break; case DECL_LABEL: D = LabelDecl::CreateDeserialized(Context, ID); break; case DECL_NAMESPACE: D = NamespaceDecl::CreateDeserialized(Context, ID); break; case DECL_NAMESPACE_ALIAS: D = NamespaceAliasDecl::CreateDeserialized(Context, ID); break; case DECL_USING: D = UsingDecl::CreateDeserialized(Context, ID); break; case DECL_USING_PACK: D = UsingPackDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_USING_SHADOW: D = UsingShadowDecl::CreateDeserialized(Context, ID); break; case DECL_CONSTRUCTOR_USING_SHADOW: D = ConstructorUsingShadowDecl::CreateDeserialized(Context, ID); break; case DECL_USING_DIRECTIVE: D = UsingDirectiveDecl::CreateDeserialized(Context, ID); break; case DECL_UNRESOLVED_USING_VALUE: D = UnresolvedUsingValueDecl::CreateDeserialized(Context, ID); break; case DECL_UNRESOLVED_USING_TYPENAME: D = UnresolvedUsingTypenameDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_RECORD: D = CXXRecordDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_DEDUCTION_GUIDE: D = CXXDeductionGuideDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_METHOD: D = CXXMethodDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_CONSTRUCTOR: D = CXXConstructorDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_CXX_DESTRUCTOR: D = CXXDestructorDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_CONVERSION: D = CXXConversionDecl::CreateDeserialized(Context, ID); break; case DECL_ACCESS_SPEC: D = AccessSpecDecl::CreateDeserialized(Context, ID); break; case DECL_FRIEND: D = FriendDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_FRIEND_TEMPLATE: D = FriendTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_CLASS_TEMPLATE: D = ClassTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_CLASS_TEMPLATE_SPECIALIZATION: D = ClassTemplateSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION: D = ClassTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_VAR_TEMPLATE: D = VarTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_VAR_TEMPLATE_SPECIALIZATION: D = VarTemplateSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION: D = VarTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_CLASS_SCOPE_FUNCTION_SPECIALIZATION: D = ClassScopeFunctionSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_FUNCTION_TEMPLATE: D = FunctionTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_TEMPLATE_TYPE_PARM: { bool HasTypeConstraint = Record.readInt(); D = TemplateTypeParmDecl::CreateDeserialized(Context, ID, HasTypeConstraint); break; } case DECL_NON_TYPE_TEMPLATE_PARM: { bool HasTypeConstraint = Record.readInt(); D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID, HasTypeConstraint); break; } case DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK: { bool HasTypeConstraint = Record.readInt(); D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID, Record.readInt(), HasTypeConstraint); break; } case DECL_TEMPLATE_TEMPLATE_PARM: D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID); break; case DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK: D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_TYPE_ALIAS_TEMPLATE: D = TypeAliasTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_CONCEPT: D = ConceptDecl::CreateDeserialized(Context, ID); break; case DECL_REQUIRES_EXPR_BODY: D = RequiresExprBodyDecl::CreateDeserialized(Context, ID); break; case DECL_STATIC_ASSERT: D = StaticAssertDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_METHOD: D = ObjCMethodDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_INTERFACE: D = ObjCInterfaceDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_IVAR: D = ObjCIvarDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_PROTOCOL: D = ObjCProtocolDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_AT_DEFS_FIELD: D = ObjCAtDefsFieldDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_CATEGORY: D = ObjCCategoryDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_CATEGORY_IMPL: D = ObjCCategoryImplDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_IMPLEMENTATION: D = ObjCImplementationDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_COMPATIBLE_ALIAS: D = ObjCCompatibleAliasDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_PROPERTY: D = ObjCPropertyDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_PROPERTY_IMPL: D = ObjCPropertyImplDecl::CreateDeserialized(Context, ID); break; case DECL_FIELD: D = FieldDecl::CreateDeserialized(Context, ID); break; case DECL_INDIRECTFIELD: D = IndirectFieldDecl::CreateDeserialized(Context, ID); break; case DECL_VAR: D = VarDecl::CreateDeserialized(Context, ID); break; case DECL_IMPLICIT_PARAM: D = ImplicitParamDecl::CreateDeserialized(Context, ID); break; case DECL_PARM_VAR: D = ParmVarDecl::CreateDeserialized(Context, ID); break; case DECL_DECOMPOSITION: D = DecompositionDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_BINDING: D = BindingDecl::CreateDeserialized(Context, ID); break; case DECL_FILE_SCOPE_ASM: D = FileScopeAsmDecl::CreateDeserialized(Context, ID); break; case DECL_BLOCK: D = BlockDecl::CreateDeserialized(Context, ID); break; case DECL_MS_PROPERTY: D = MSPropertyDecl::CreateDeserialized(Context, ID); break; case DECL_MS_GUID: D = MSGuidDecl::CreateDeserialized(Context, ID); break; case DECL_CAPTURED: D = CapturedDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_CXX_BASE_SPECIFIERS: Error("attempt to read a C++ base-specifier record as a declaration"); return nullptr; case DECL_CXX_CTOR_INITIALIZERS: Error("attempt to read a C++ ctor initializer record as a declaration"); return nullptr; case DECL_IMPORT: // Note: last entry of the ImportDecl record is the number of stored source // locations. D = ImportDecl::CreateDeserialized(Context, ID, Record.back()); break; case DECL_OMP_THREADPRIVATE: D = OMPThreadPrivateDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_OMP_ALLOCATE: { unsigned NumVars = Record.readInt(); unsigned NumClauses = Record.readInt(); D = OMPAllocateDecl::CreateDeserialized(Context, ID, NumVars, NumClauses); break; } case DECL_OMP_REQUIRES: D = OMPRequiresDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_OMP_DECLARE_REDUCTION: D = OMPDeclareReductionDecl::CreateDeserialized(Context, ID); break; case DECL_OMP_DECLARE_MAPPER: D = OMPDeclareMapperDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_OMP_CAPTUREDEXPR: D = OMPCapturedExprDecl::CreateDeserialized(Context, ID); break; case DECL_PRAGMA_COMMENT: D = PragmaCommentDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_PRAGMA_DETECT_MISMATCH: D = PragmaDetectMismatchDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_EMPTY: D = EmptyDecl::CreateDeserialized(Context, ID); break; case DECL_LIFETIME_EXTENDED_TEMPORARY: D = LifetimeExtendedTemporaryDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_TYPE_PARAM: D = ObjCTypeParamDecl::CreateDeserialized(Context, ID); break; } assert(D && "Unknown declaration reading AST file"); LoadedDecl(Index, D); // Set the DeclContext before doing any deserialization, to make sure internal // calls to Decl::getASTContext() by Decl's methods will find the // TranslationUnitDecl without crashing. D->setDeclContext(Context.getTranslationUnitDecl()); Reader.Visit(D); // If this declaration is also a declaration context, get the // offsets for its tables of lexical and visible declarations. if (auto *DC = dyn_cast(D)) { std::pair Offsets = Reader.VisitDeclContext(DC); if (Offsets.first && ReadLexicalDeclContextStorage(*Loc.F, DeclsCursor, Offsets.first, DC)) return nullptr; if (Offsets.second && ReadVisibleDeclContextStorage(*Loc.F, DeclsCursor, Offsets.second, ID)) return nullptr; } assert(Record.getIdx() == Record.size()); // Load any relevant update records. PendingUpdateRecords.push_back( PendingUpdateRecord(ID, D, /*JustLoaded=*/true)); // Load the categories after recursive loading is finished. if (auto *Class = dyn_cast(D)) // If we already have a definition when deserializing the ObjCInterfaceDecl, // we put the Decl in PendingDefinitions so we can pull the categories here. if (Class->isThisDeclarationADefinition() || PendingDefinitions.count(Class)) loadObjCCategories(ID, Class); // If we have deserialized a declaration that has a definition the // AST consumer might need to know about, queue it. // We don't pass it to the consumer immediately because we may be in recursive // loading, and some declarations may still be initializing. PotentiallyInterestingDecls.push_back( InterestingDecl(D, Reader.hasPendingBody())); return D; } void ASTReader::PassInterestingDeclsToConsumer() { assert(Consumer); if (PassingDeclsToConsumer) return; // Guard variable to avoid recursively redoing the process of passing // decls to consumer. SaveAndRestore GuardPassingDeclsToConsumer(PassingDeclsToConsumer, true); // Ensure that we've loaded all potentially-interesting declarations // that need to be eagerly loaded. for (auto ID : EagerlyDeserializedDecls) GetDecl(ID); EagerlyDeserializedDecls.clear(); while (!PotentiallyInterestingDecls.empty()) { InterestingDecl D = PotentiallyInterestingDecls.front(); PotentiallyInterestingDecls.pop_front(); if (isConsumerInterestedIn(getContext(), D.getDecl(), D.hasPendingBody())) PassInterestingDeclToConsumer(D.getDecl()); } } void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { // The declaration may have been modified by files later in the chain. // If this is the case, read the record containing the updates from each file // and pass it to ASTDeclReader to make the modifications. serialization::GlobalDeclID ID = Record.ID; Decl *D = Record.D; ProcessingUpdatesRAIIObj ProcessingUpdates(*this); DeclUpdateOffsetsMap::iterator UpdI = DeclUpdateOffsets.find(ID); SmallVector PendingLazySpecializationIDs; if (UpdI != DeclUpdateOffsets.end()) { auto UpdateOffsets = std::move(UpdI->second); DeclUpdateOffsets.erase(UpdI); // Check if this decl was interesting to the consumer. If we just loaded // the declaration, then we know it was interesting and we skip the call // to isConsumerInterestedIn because it is unsafe to call in the // current ASTReader state. bool WasInteresting = Record.JustLoaded || isConsumerInterestedIn(getContext(), D, false); for (auto &FileAndOffset : UpdateOffsets) { ModuleFile *F = FileAndOffset.first; uint64_t Offset = FileAndOffset.second; llvm::BitstreamCursor &Cursor = F->DeclsCursor; SavedStreamPosition SavedPosition(Cursor); if (llvm::Error JumpFailed = Cursor.JumpToBit(Offset)) // FIXME don't do a fatal error. llvm::report_fatal_error( "ASTReader::loadDeclUpdateRecords failed jumping: " + toString(std::move(JumpFailed))); Expected MaybeCode = Cursor.ReadCode(); if (!MaybeCode) llvm::report_fatal_error( "ASTReader::loadDeclUpdateRecords failed reading code: " + toString(MaybeCode.takeError())); unsigned Code = MaybeCode.get(); ASTRecordReader Record(*this, *F); if (Expected MaybeRecCode = Record.readRecord(Cursor, Code)) assert(MaybeRecCode.get() == DECL_UPDATES && "Expected DECL_UPDATES record!"); else llvm::report_fatal_error( "ASTReader::loadDeclUpdateRecords failed reading rec code: " + toString(MaybeCode.takeError())); ASTDeclReader Reader(*this, Record, RecordLocation(F, Offset), ID, SourceLocation()); Reader.UpdateDecl(D, PendingLazySpecializationIDs); // We might have made this declaration interesting. If so, remember that // we need to hand it off to the consumer. if (!WasInteresting && isConsumerInterestedIn(getContext(), D, Reader.hasPendingBody())) { PotentiallyInterestingDecls.push_back( InterestingDecl(D, Reader.hasPendingBody())); WasInteresting = true; } } } // Add the lazy specializations to the template. assert((PendingLazySpecializationIDs.empty() || isa(D) || isa(D) || isa(D)) && "Must not have pending specializations"); if (auto *CTD = dyn_cast(D)) ASTDeclReader::AddLazySpecializations(CTD, PendingLazySpecializationIDs); else if (auto *FTD = dyn_cast(D)) ASTDeclReader::AddLazySpecializations(FTD, PendingLazySpecializationIDs); else if (auto *VTD = dyn_cast(D)) ASTDeclReader::AddLazySpecializations(VTD, PendingLazySpecializationIDs); PendingLazySpecializationIDs.clear(); // Load the pending visible updates for this decl context, if it has any. auto I = PendingVisibleUpdates.find(ID); if (I != PendingVisibleUpdates.end()) { auto VisibleUpdates = std::move(I->second); PendingVisibleUpdates.erase(I); auto *DC = cast(D)->getPrimaryContext(); for (const auto &Update : VisibleUpdates) Lookups[DC].Table.add( Update.Mod, Update.Data, reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod)); DC->setHasExternalVisibleStorage(true); } } void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) { // Attach FirstLocal to the end of the decl chain. Decl *CanonDecl = FirstLocal->getCanonicalDecl(); if (FirstLocal != CanonDecl) { Decl *PrevMostRecent = ASTDeclReader::getMostRecentDecl(CanonDecl); ASTDeclReader::attachPreviousDecl( *this, FirstLocal, PrevMostRecent ? PrevMostRecent : CanonDecl, CanonDecl); } if (!LocalOffset) { ASTDeclReader::attachLatestDecl(CanonDecl, FirstLocal); return; } // Load the list of other redeclarations from this module file. ModuleFile *M = getOwningModuleFile(FirstLocal); assert(M && "imported decl from no module file"); llvm::BitstreamCursor &Cursor = M->DeclsCursor; SavedStreamPosition SavedPosition(Cursor); if (llvm::Error JumpFailed = Cursor.JumpToBit(LocalOffset)) llvm::report_fatal_error( "ASTReader::loadPendingDeclChain failed jumping: " + toString(std::move(JumpFailed))); RecordData Record; Expected MaybeCode = Cursor.ReadCode(); if (!MaybeCode) llvm::report_fatal_error( "ASTReader::loadPendingDeclChain failed reading code: " + toString(MaybeCode.takeError())); unsigned Code = MaybeCode.get(); if (Expected MaybeRecCode = Cursor.readRecord(Code, Record)) assert(MaybeRecCode.get() == LOCAL_REDECLARATIONS && "expected LOCAL_REDECLARATIONS record!"); else llvm::report_fatal_error( "ASTReader::loadPendingDeclChain failed reading rec code: " + toString(MaybeCode.takeError())); // FIXME: We have several different dispatches on decl kind here; maybe // we should instead generate one loop per kind and dispatch up-front? Decl *MostRecent = FirstLocal; for (unsigned I = 0, N = Record.size(); I != N; ++I) { auto *D = GetLocalDecl(*M, Record[N - I - 1]); ASTDeclReader::attachPreviousDecl(*this, D, MostRecent, CanonDecl); MostRecent = D; } ASTDeclReader::attachLatestDecl(CanonDecl, MostRecent); } namespace { /// Given an ObjC interface, goes through the modules and links to the /// interface all the categories for it. class ObjCCategoriesVisitor { ASTReader &Reader; ObjCInterfaceDecl *Interface; llvm::SmallPtrSetImpl &Deserialized; ObjCCategoryDecl *Tail = nullptr; llvm::DenseMap NameCategoryMap; serialization::GlobalDeclID InterfaceID; unsigned PreviousGeneration; void add(ObjCCategoryDecl *Cat) { // Only process each category once. if (!Deserialized.erase(Cat)) return; // Check for duplicate categories. if (Cat->getDeclName()) { ObjCCategoryDecl *&Existing = NameCategoryMap[Cat->getDeclName()]; if (Existing && Reader.getOwningModuleFile(Existing) != Reader.getOwningModuleFile(Cat)) { // FIXME: We should not warn for duplicates in diamond: // // MT // // / \ // // ML MR // // \ / // // MB // // // If there are duplicates in ML/MR, there will be warning when // creating MB *and* when importing MB. We should not warn when // importing. Reader.Diag(Cat->getLocation(), diag::warn_dup_category_def) << Interface->getDeclName() << Cat->getDeclName(); Reader.Diag(Existing->getLocation(), diag::note_previous_definition); } else if (!Existing) { // Record this category. Existing = Cat; } } // Add this category to the end of the chain. if (Tail) ASTDeclReader::setNextObjCCategory(Tail, Cat); else Interface->setCategoryListRaw(Cat); Tail = Cat; } public: ObjCCategoriesVisitor(ASTReader &Reader, ObjCInterfaceDecl *Interface, llvm::SmallPtrSetImpl &Deserialized, serialization::GlobalDeclID InterfaceID, unsigned PreviousGeneration) : Reader(Reader), Interface(Interface), Deserialized(Deserialized), InterfaceID(InterfaceID), PreviousGeneration(PreviousGeneration) { // Populate the name -> category map with the set of known categories. for (auto *Cat : Interface->known_categories()) { if (Cat->getDeclName()) NameCategoryMap[Cat->getDeclName()] = Cat; // Keep track of the tail of the category list. Tail = Cat; } } bool operator()(ModuleFile &M) { // If we've loaded all of the category information we care about from // this module file, we're done. if (M.Generation <= PreviousGeneration) return true; // Map global ID of the definition down to the local ID used in this // module file. If there is no such mapping, we'll find nothing here // (or in any module it imports). DeclID LocalID = Reader.mapGlobalIDToModuleFileGlobalID(M, InterfaceID); if (!LocalID) return true; // Perform a binary search to find the local redeclarations for this // declaration (if any). const ObjCCategoriesInfo Compare = { LocalID, 0 }; const ObjCCategoriesInfo *Result = std::lower_bound(M.ObjCCategoriesMap, M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap, Compare); if (Result == M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap || Result->DefinitionID != LocalID) { // We didn't find anything. If the class definition is in this module // file, then the module files it depends on cannot have any categories, // so suppress further lookup. return Reader.isDeclIDFromModule(InterfaceID, M); } // We found something. Dig out all of the categories. unsigned Offset = Result->Offset; unsigned N = M.ObjCCategories[Offset]; M.ObjCCategories[Offset++] = 0; // Don't try to deserialize again for (unsigned I = 0; I != N; ++I) add(cast_or_null( Reader.GetLocalDecl(M, M.ObjCCategories[Offset++]))); return true; } }; } // namespace void ASTReader::loadObjCCategories(serialization::GlobalDeclID ID, ObjCInterfaceDecl *D, unsigned PreviousGeneration) { ObjCCategoriesVisitor Visitor(*this, D, CategoriesDeserialized, ID, PreviousGeneration); ModuleMgr.visit(Visitor); } template static void forAllLaterRedecls(DeclT *D, Fn F) { F(D); // Check whether we've already merged D into its redeclaration chain. // MostRecent may or may not be nullptr if D has not been merged. If // not, walk the merged redecl chain and see if it's there. auto *MostRecent = D->getMostRecentDecl(); bool Found = false; for (auto *Redecl = MostRecent; Redecl && !Found; Redecl = Redecl->getPreviousDecl()) Found = (Redecl == D); // If this declaration is merged, apply the functor to all later decls. if (Found) { for (auto *Redecl = MostRecent; Redecl != D; Redecl = Redecl->getPreviousDecl()) F(Redecl); } } void ASTDeclReader::UpdateDecl(Decl *D, llvm::SmallVectorImpl &PendingLazySpecializationIDs) { while (Record.getIdx() < Record.size()) { switch ((DeclUpdateKind)Record.readInt()) { case UPD_CXX_ADDED_IMPLICIT_MEMBER: { auto *RD = cast(D); // FIXME: If we also have an update record for instantiating the // definition of D, we need that to happen before we get here. Decl *MD = Record.readDecl(); assert(MD && "couldn't read decl from update record"); // FIXME: We should call addHiddenDecl instead, to add the member // to its DeclContext. RD->addedMember(MD); break; } case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION: // It will be added to the template's lazy specialization set. PendingLazySpecializationIDs.push_back(readDeclID()); break; case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE: { auto *Anon = readDeclAs(); // Each module has its own anonymous namespace, which is disjoint from // any other module's anonymous namespaces, so don't attach the anonymous // namespace at all. if (!Record.isModule()) { if (auto *TU = dyn_cast(D)) TU->setAnonymousNamespace(Anon); else cast(D)->setAnonymousNamespace(Anon); } break; } case UPD_CXX_ADDED_VAR_DEFINITION: { auto *VD = cast(D); VD->NonParmVarDeclBits.IsInline = Record.readInt(); VD->NonParmVarDeclBits.IsInlineSpecified = Record.readInt(); uint64_t Val = Record.readInt(); if (Val && !VD->getInit()) { VD->setInit(Record.readExpr()); if (Val > 1) { // IsInitKnownICE = 1, IsInitNotICE = 2, IsInitICE = 3 EvaluatedStmt *Eval = VD->ensureEvaluatedStmt(); Eval->CheckedICE = true; Eval->IsICE = Val == 3; } } break; } case UPD_CXX_POINT_OF_INSTANTIATION: { SourceLocation POI = Record.readSourceLocation(); if (auto *VTSD = dyn_cast(D)) { VTSD->setPointOfInstantiation(POI); } else if (auto *VD = dyn_cast(D)) { VD->getMemberSpecializationInfo()->setPointOfInstantiation(POI); } else { auto *FD = cast(D); if (auto *FTSInfo = FD->TemplateOrSpecialization .dyn_cast()) FTSInfo->setPointOfInstantiation(POI); else FD->TemplateOrSpecialization.get() ->setPointOfInstantiation(POI); } break; } case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: { auto *Param = cast(D); // We have to read the default argument regardless of whether we use it // so that hypothetical further update records aren't messed up. // TODO: Add a function to skip over the next expr record. auto *DefaultArg = Record.readExpr(); // Only apply the update if the parameter still has an uninstantiated // default argument. if (Param->hasUninstantiatedDefaultArg()) Param->setDefaultArg(DefaultArg); break; } case UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER: { auto *FD = cast(D); auto *DefaultInit = Record.readExpr(); // Only apply the update if the field still has an uninstantiated // default member initializer. if (FD->hasInClassInitializer() && !FD->getInClassInitializer()) { if (DefaultInit) FD->setInClassInitializer(DefaultInit); else // Instantiation failed. We can get here if we serialized an AST for // an invalid program. FD->removeInClassInitializer(); } break; } case UPD_CXX_ADDED_FUNCTION_DEFINITION: { auto *FD = cast(D); if (Reader.PendingBodies[FD]) { // FIXME: Maybe check for ODR violations. // It's safe to stop now because this update record is always last. return; } if (Record.readInt()) { // Maintain AST consistency: any later redeclarations of this function // are inline if this one is. (We might have merged another declaration // into this one.) forAllLaterRedecls(FD, [](FunctionDecl *FD) { FD->setImplicitlyInline(); }); } FD->setInnerLocStart(readSourceLocation()); ReadFunctionDefinition(FD); assert(Record.getIdx() == Record.size() && "lazy body must be last"); break; } case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: { auto *RD = cast(D); auto *OldDD = RD->getCanonicalDecl()->DefinitionData; bool HadRealDefinition = OldDD && (OldDD->Definition != RD || !Reader.PendingFakeDefinitionData.count(OldDD)); RD->setParamDestroyedInCallee(Record.readInt()); RD->setArgPassingRestrictions( (RecordDecl::ArgPassingKind)Record.readInt()); ReadCXXRecordDefinition(RD, /*Update*/true); // Visible update is handled separately. uint64_t LexicalOffset = ReadLocalOffset(); if (!HadRealDefinition && LexicalOffset) { Record.readLexicalDeclContextStorage(LexicalOffset, RD); Reader.PendingFakeDefinitionData.erase(OldDD); } auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); if (MemberSpecializationInfo *MSInfo = RD->getMemberSpecializationInfo()) { MSInfo->setTemplateSpecializationKind(TSK); MSInfo->setPointOfInstantiation(POI); } else { auto *Spec = cast(RD); Spec->setTemplateSpecializationKind(TSK); Spec->setPointOfInstantiation(POI); if (Record.readInt()) { auto *PartialSpec = readDeclAs(); SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs); auto *TemplArgList = TemplateArgumentList::CreateCopy( Reader.getContext(), TemplArgs); // FIXME: If we already have a partial specialization set, // check that it matches. if (!Spec->getSpecializedTemplateOrPartial() .is()) Spec->setInstantiationOf(PartialSpec, TemplArgList); } } RD->setTagKind((TagTypeKind)Record.readInt()); RD->setLocation(readSourceLocation()); RD->setLocStart(readSourceLocation()); RD->setBraceRange(readSourceRange()); if (Record.readInt()) { AttrVec Attrs; Record.readAttributes(Attrs); // If the declaration already has attributes, we assume that some other // AST file already loaded them. if (!D->hasAttrs()) D->setAttrsImpl(Attrs, Reader.getContext()); } break; } case UPD_CXX_RESOLVED_DTOR_DELETE: { // Set the 'operator delete' directly to avoid emitting another update // record. auto *Del = readDeclAs(); auto *First = cast(D->getCanonicalDecl()); auto *ThisArg = Record.readExpr(); // FIXME: Check consistency if we have an old and new operator delete. if (!First->OperatorDelete) { First->OperatorDelete = Del; First->OperatorDeleteThisArg = ThisArg; } break; } case UPD_CXX_RESOLVED_EXCEPTION_SPEC: { SmallVector ExceptionStorage; auto ESI = Record.readExceptionSpecInfo(ExceptionStorage); // Update this declaration's exception specification, if needed. auto *FD = cast(D); auto *FPT = FD->getType()->castAs(); // FIXME: If the exception specification is already present, check that it // matches. if (isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) { FD->setType(Reader.getContext().getFunctionType( FPT->getReturnType(), FPT->getParamTypes(), FPT->getExtProtoInfo().withExceptionSpec(ESI))); // When we get to the end of deserializing, see if there are other decls // that we need to propagate this exception specification onto. Reader.PendingExceptionSpecUpdates.insert( std::make_pair(FD->getCanonicalDecl(), FD)); } break; } case UPD_CXX_DEDUCED_RETURN_TYPE: { auto *FD = cast(D); QualType DeducedResultType = Record.readType(); Reader.PendingDeducedTypeUpdates.insert( {FD->getCanonicalDecl(), DeducedResultType}); break; } case UPD_DECL_MARKED_USED: // Maintain AST consistency: any later redeclarations are used too. D->markUsed(Reader.getContext()); break; case UPD_MANGLING_NUMBER: Reader.getContext().setManglingNumber(cast(D), Record.readInt()); break; case UPD_STATIC_LOCAL_NUMBER: Reader.getContext().setStaticLocalNumber(cast(D), Record.readInt()); break; case UPD_DECL_MARKED_OPENMP_THREADPRIVATE: D->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit( Reader.getContext(), readSourceRange(), AttributeCommonInfo::AS_Pragma)); break; case UPD_DECL_MARKED_OPENMP_ALLOCATE: { auto AllocatorKind = static_cast(Record.readInt()); Expr *Allocator = Record.readExpr(); SourceRange SR = readSourceRange(); D->addAttr(OMPAllocateDeclAttr::CreateImplicit( Reader.getContext(), AllocatorKind, Allocator, SR, AttributeCommonInfo::AS_Pragma)); break; } case UPD_DECL_EXPORTED: { unsigned SubmoduleID = readSubmoduleID(); auto *Exported = cast(D); Module *Owner = SubmoduleID ? Reader.getSubmodule(SubmoduleID) : nullptr; Reader.getContext().mergeDefinitionIntoModule(Exported, Owner); Reader.PendingMergedDefinitionsToDeduplicate.insert(Exported); break; } case UPD_DECL_MARKED_OPENMP_DECLARETARGET: { OMPDeclareTargetDeclAttr::MapTypeTy MapType = static_cast(Record.readInt()); OMPDeclareTargetDeclAttr::DevTypeTy DevType = static_cast(Record.readInt()); D->addAttr(OMPDeclareTargetDeclAttr::CreateImplicit( Reader.getContext(), MapType, DevType, readSourceRange(), AttributeCommonInfo::AS_Pragma)); break; } case UPD_ADDED_ATTR_TO_RECORD: AttrVec Attrs; Record.readAttributes(Attrs); assert(Attrs.size() == 1); D->addAttr(Attrs[0]); break; } } } diff --git a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPort.h b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPort.h index 20cf5d660c6a..4493dd512ff0 100644 --- a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPort.h +++ b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPort.h @@ -1,144 +1,144 @@ /*===- InstrProfilingPort.h- Support library for PGO instrumentation ------===*\ |* |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |* See https://llvm.org/LICENSE.txt for license information. |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |* \*===----------------------------------------------------------------------===*/ /* This header must be included after all others so it can provide fallback definitions for stuff missing in system headers. */ #ifndef PROFILE_INSTRPROFILING_PORT_H_ #define PROFILE_INSTRPROFILING_PORT_H_ #ifdef _MSC_VER #define COMPILER_RT_ALIGNAS(x) __declspec(align(x)) #define COMPILER_RT_VISIBILITY /* FIXME: selectany does not have the same semantics as weak. */ #define COMPILER_RT_WEAK __declspec(selectany) /* Need to include */ #define COMPILER_RT_ALLOCA _alloca /* Need to include and */ #define COMPILER_RT_FTRUNCATE(f,l) _chsize(_fileno(f),l) #define COMPILER_RT_ALWAYS_INLINE __forceinline #define COMPILER_RT_CLEANUP(x) #elif __GNUC__ #define COMPILER_RT_ALIGNAS(x) __attribute__((aligned(x))) #define COMPILER_RT_VISIBILITY __attribute__((visibility("hidden"))) #define COMPILER_RT_WEAK __attribute__((weak)) #define COMPILER_RT_ALLOCA __builtin_alloca #define COMPILER_RT_FTRUNCATE(f,l) ftruncate(fileno(f),l) #define COMPILER_RT_ALWAYS_INLINE inline __attribute((always_inline)) #define COMPILER_RT_CLEANUP(x) __attribute__((cleanup(x))) #endif #if defined(__APPLE__) #define COMPILER_RT_SEG "__DATA," #else #define COMPILER_RT_SEG "" #endif #ifdef _MSC_VER #define COMPILER_RT_SECTION(Sect) __declspec(allocate(Sect)) #else #define COMPILER_RT_SECTION(Sect) __attribute__((section(Sect))) #endif #define COMPILER_RT_MAX_HOSTLEN 128 #ifdef __ORBIS__ #define COMPILER_RT_GETHOSTNAME(Name, Len) ((void)(Name), (void)(Len), (-1)) #else #define COMPILER_RT_GETHOSTNAME(Name, Len) lprofGetHostName(Name, Len) #endif #if COMPILER_RT_HAS_ATOMICS == 1 -#ifdef _MSC_VER +#ifdef _WIN32 #include -#if _MSC_VER < 1900 +#if defined(_MSC_VER) && _MSC_VER < 1900 #define snprintf _snprintf #endif #if defined(_WIN64) #define COMPILER_RT_BOOL_CMPXCHG(Ptr, OldV, NewV) \ (InterlockedCompareExchange64((LONGLONG volatile *)Ptr, (LONGLONG)NewV, \ (LONGLONG)OldV) == (LONGLONG)OldV) #define COMPILER_RT_PTR_FETCH_ADD(DomType, PtrVar, PtrIncr) \ (DomType *)InterlockedExchangeAdd64((LONGLONG volatile *)&PtrVar, \ (LONGLONG)sizeof(DomType) * PtrIncr) #else /* !defined(_WIN64) */ #define COMPILER_RT_BOOL_CMPXCHG(Ptr, OldV, NewV) \ (InterlockedCompareExchange((LONG volatile *)Ptr, (LONG)NewV, (LONG)OldV) == \ (LONG)OldV) #define COMPILER_RT_PTR_FETCH_ADD(DomType, PtrVar, PtrIncr) \ (DomType *)InterlockedExchangeAdd((LONG volatile *)&PtrVar, \ (LONG)sizeof(DomType) * PtrIncr) #endif -#else /* !defined(_MSC_VER) */ +#else /* !defined(_WIN32) */ #define COMPILER_RT_BOOL_CMPXCHG(Ptr, OldV, NewV) \ __sync_bool_compare_and_swap(Ptr, OldV, NewV) #define COMPILER_RT_PTR_FETCH_ADD(DomType, PtrVar, PtrIncr) \ (DomType *)__sync_fetch_and_add((long *)&PtrVar, sizeof(DomType) * PtrIncr) #endif #else /* COMPILER_RT_HAS_ATOMICS != 1 */ #include "InstrProfilingUtil.h" #define COMPILER_RT_BOOL_CMPXCHG(Ptr, OldV, NewV) \ lprofBoolCmpXchg((void **)Ptr, OldV, NewV) #define COMPILER_RT_PTR_FETCH_ADD(DomType, PtrVar, PtrIncr) \ (DomType *)lprofPtrFetchAdd((void **)&PtrVar, sizeof(DomType) * PtrIncr) #endif #if defined(_WIN32) #define DIR_SEPARATOR '\\' #define DIR_SEPARATOR_2 '/' #else #define DIR_SEPARATOR '/' #endif #ifndef DIR_SEPARATOR_2 #define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) #else /* DIR_SEPARATOR_2 */ #define IS_DIR_SEPARATOR(ch) \ (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) #endif /* DIR_SEPARATOR_2 */ #if defined(_WIN32) #include static inline size_t getpagesize() { SYSTEM_INFO S; GetNativeSystemInfo(&S); return S.dwPageSize; } #else /* defined(_WIN32) */ #include #endif /* defined(_WIN32) */ #define PROF_ERR(Format, ...) \ fprintf(stderr, "LLVM Profile Error: " Format, __VA_ARGS__); #define PROF_WARN(Format, ...) \ fprintf(stderr, "LLVM Profile Warning: " Format, __VA_ARGS__); #define PROF_NOTE(Format, ...) \ fprintf(stderr, "LLVM Profile Note: " Format, __VA_ARGS__); #ifndef MAP_FILE #define MAP_FILE 0 #endif #ifndef O_BINARY #define O_BINARY 0 #endif #if defined(__FreeBSD__) #include #include #else /* defined(__FreeBSD__) */ #include #include #endif /* defined(__FreeBSD__) && defined(__i386__) */ #endif /* PROFILE_INSTRPROFILING_PORT_H_ */ diff --git a/contrib/llvm-project/lld/ELF/InputSection.cpp b/contrib/llvm-project/lld/ELF/InputSection.cpp index 7a7ebd974909..a6c97a3506ba 100644 --- a/contrib/llvm-project/lld/ELF/InputSection.cpp +++ b/contrib/llvm-project/lld/ELF/InputSection.cpp @@ -1,1446 +1,1449 @@ //===- InputSection.cpp ---------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "InputSection.h" #include "Config.h" #include "EhFrame.h" #include "InputFiles.h" #include "LinkerScript.h" #include "OutputSections.h" #include "Relocations.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "Thunks.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Threading.h" #include "llvm/Support/xxhash.h" #include #include #include #include using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; using namespace llvm::sys; using namespace lld; using namespace lld::elf; std::vector elf::inputSections; DenseSet> elf::ppc64noTocRelax; // Returns a string to construct an error message. std::string lld::toString(const InputSectionBase *sec) { return (toString(sec->file) + ":(" + sec->name + ")").str(); } template static ArrayRef getSectionContents(ObjFile &file, const typename ELFT::Shdr &hdr) { if (hdr.sh_type == SHT_NOBITS) return makeArrayRef(nullptr, hdr.sh_size); return check(file.getObj().getSectionContents(&hdr)); } InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags, uint32_t type, uint64_t entsize, uint32_t link, uint32_t info, uint32_t alignment, ArrayRef data, StringRef name, Kind sectionKind) : SectionBase(sectionKind, name, flags, entsize, alignment, type, info, link), file(file), rawData(data) { // In order to reduce memory allocation, we assume that mergeable // sections are smaller than 4 GiB, which is not an unreasonable // assumption as of 2017. if (sectionKind == SectionBase::Merge && rawData.size() > UINT32_MAX) error(toString(this) + ": section too large"); numRelocations = 0; areRelocsRela = false; // The ELF spec states that a value of 0 means the section has // no alignment constraints. uint32_t v = std::max(alignment, 1); if (!isPowerOf2_64(v)) fatal(toString(this) + ": sh_addralign is not a power of 2"); this->alignment = v; // In ELF, each section can be compressed by zlib, and if compressed, // section name may be mangled by appending "z" (e.g. ".zdebug_info"). // If that's the case, demangle section name so that we can handle a // section as if it weren't compressed. if ((flags & SHF_COMPRESSED) || name.startswith(".zdebug")) { if (!zlib::isAvailable()) error(toString(file) + ": contains a compressed section, " + "but zlib is not available"); parseCompressedHeader(); } } // Drop SHF_GROUP bit unless we are producing a re-linkable object file. // SHF_GROUP is a marker that a section belongs to some comdat group. // That flag doesn't make sense in an executable. static uint64_t getFlags(uint64_t flags) { flags &= ~(uint64_t)SHF_INFO_LINK; if (!config->relocatable) flags &= ~(uint64_t)SHF_GROUP; return flags; } // GNU assembler 2.24 and LLVM 4.0.0's MC (the newest release as of // March 2017) fail to infer section types for sections starting with // ".init_array." or ".fini_array.". They set SHT_PROGBITS instead of // SHF_INIT_ARRAY. As a result, the following assembler directive // creates ".init_array.100" with SHT_PROGBITS, for example. // // .section .init_array.100, "aw" // // This function forces SHT_{INIT,FINI}_ARRAY so that we can handle // incorrect inputs as if they were correct from the beginning. static uint64_t getType(uint64_t type, StringRef name) { if (type == SHT_PROGBITS && name.startswith(".init_array.")) return SHT_INIT_ARRAY; if (type == SHT_PROGBITS && name.startswith(".fini_array.")) return SHT_FINI_ARRAY; return type; } template InputSectionBase::InputSectionBase(ObjFile &file, const typename ELFT::Shdr &hdr, StringRef name, Kind sectionKind) : InputSectionBase(&file, getFlags(hdr.sh_flags), getType(hdr.sh_type, name), hdr.sh_entsize, hdr.sh_link, hdr.sh_info, hdr.sh_addralign, getSectionContents(file, hdr), name, sectionKind) { // We reject object files having insanely large alignments even though // they are allowed by the spec. I think 4GB is a reasonable limitation. // We might want to relax this in the future. if (hdr.sh_addralign > UINT32_MAX) fatal(toString(&file) + ": section sh_addralign is too large"); } size_t InputSectionBase::getSize() const { if (auto *s = dyn_cast(this)) return s->getSize(); if (uncompressedSize >= 0) return uncompressedSize; return rawData.size() - bytesDropped; } void InputSectionBase::uncompress() const { size_t size = uncompressedSize; char *uncompressedBuf; { static std::mutex mu; std::lock_guard lock(mu); uncompressedBuf = bAlloc.Allocate(size); } if (Error e = zlib::uncompress(toStringRef(rawData), uncompressedBuf, size)) fatal(toString(this) + ": uncompress failed: " + llvm::toString(std::move(e))); rawData = makeArrayRef((uint8_t *)uncompressedBuf, size); uncompressedSize = -1; } uint64_t InputSectionBase::getOffsetInFile() const { const uint8_t *fileStart = (const uint8_t *)file->mb.getBufferStart(); const uint8_t *secStart = data().begin(); return secStart - fileStart; } uint64_t SectionBase::getOffset(uint64_t offset) const { switch (kind()) { case Output: { auto *os = cast(this); // For output sections we treat offset -1 as the end of the section. return offset == uint64_t(-1) ? os->size : offset; } case Regular: case Synthetic: return cast(this)->getOffset(offset); case EHFrame: // The file crtbeginT.o has relocations pointing to the start of an empty // .eh_frame that is known to be the first in the link. It does that to // identify the start of the output .eh_frame. return offset; case Merge: const MergeInputSection *ms = cast(this); if (InputSection *isec = ms->getParent()) return isec->getOffset(ms->getParentOffset(offset)); return ms->getParentOffset(offset); } llvm_unreachable("invalid section kind"); } uint64_t SectionBase::getVA(uint64_t offset) const { const OutputSection *out = getOutputSection(); return (out ? out->addr : 0) + getOffset(offset); } OutputSection *SectionBase::getOutputSection() { InputSection *sec; if (auto *isec = dyn_cast(this)) sec = isec; else if (auto *ms = dyn_cast(this)) sec = ms->getParent(); else if (auto *eh = dyn_cast(this)) sec = eh->getParent(); else return cast(this); return sec ? sec->getParent() : nullptr; } // When a section is compressed, `rawData` consists with a header followed // by zlib-compressed data. This function parses a header to initialize // `uncompressedSize` member and remove the header from `rawData`. void InputSectionBase::parseCompressedHeader() { using Chdr64 = typename ELF64LE::Chdr; using Chdr32 = typename ELF32LE::Chdr; // Old-style header if (name.startswith(".zdebug")) { if (!toStringRef(rawData).startswith("ZLIB")) { error(toString(this) + ": corrupted compressed section header"); return; } rawData = rawData.slice(4); if (rawData.size() < 8) { error(toString(this) + ": corrupted compressed section header"); return; } uncompressedSize = read64be(rawData.data()); rawData = rawData.slice(8); // Restore the original section name. // (e.g. ".zdebug_info" -> ".debug_info") name = saver.save("." + name.substr(2)); return; } assert(flags & SHF_COMPRESSED); flags &= ~(uint64_t)SHF_COMPRESSED; // New-style 64-bit header if (config->is64) { if (rawData.size() < sizeof(Chdr64)) { error(toString(this) + ": corrupted compressed section"); return; } auto *hdr = reinterpret_cast(rawData.data()); if (hdr->ch_type != ELFCOMPRESS_ZLIB) { error(toString(this) + ": unsupported compression type"); return; } uncompressedSize = hdr->ch_size; alignment = std::max(hdr->ch_addralign, 1); rawData = rawData.slice(sizeof(*hdr)); return; } // New-style 32-bit header if (rawData.size() < sizeof(Chdr32)) { error(toString(this) + ": corrupted compressed section"); return; } auto *hdr = reinterpret_cast(rawData.data()); if (hdr->ch_type != ELFCOMPRESS_ZLIB) { error(toString(this) + ": unsupported compression type"); return; } uncompressedSize = hdr->ch_size; alignment = std::max(hdr->ch_addralign, 1); rawData = rawData.slice(sizeof(*hdr)); } InputSection *InputSectionBase::getLinkOrderDep() const { assert(link); assert(flags & SHF_LINK_ORDER); return cast(file->getSections()[link]); } // Find a function symbol that encloses a given location. template Defined *InputSectionBase::getEnclosingFunction(uint64_t offset) { for (Symbol *b : file->getSymbols()) if (Defined *d = dyn_cast(b)) if (d->section == this && d->type == STT_FUNC && d->value <= offset && offset < d->value + d->size) return d; return nullptr; } // Returns a source location string. Used to construct an error message. template std::string InputSectionBase::getLocation(uint64_t offset) { std::string secAndOffset = (name + "+0x" + utohexstr(offset)).str(); // We don't have file for synthetic sections. if (getFile() == nullptr) return (config->outputFile + ":(" + secAndOffset + ")") .str(); // First check if we can get desired values from debugging information. if (Optional info = getFile()->getDILineInfo(this, offset)) return info->FileName + ":" + std::to_string(info->Line) + ":(" + secAndOffset + ")"; // File->sourceFile contains STT_FILE symbol that contains a // source file name. If it's missing, we use an object file name. std::string srcFile = std::string(getFile()->sourceFile); if (srcFile.empty()) srcFile = toString(file); if (Defined *d = getEnclosingFunction(offset)) return srcFile + ":(function " + toString(*d) + ": " + secAndOffset + ")"; // If there's no symbol, print out the offset in the section. return (srcFile + ":(" + secAndOffset + ")"); } // This function is intended to be used for constructing an error message. // The returned message looks like this: // // foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42) // // Returns an empty string if there's no way to get line info. std::string InputSectionBase::getSrcMsg(const Symbol &sym, uint64_t offset) { return file->getSrcMsg(sym, *this, offset); } // Returns a filename string along with an optional section name. This // function is intended to be used for constructing an error // message. The returned message looks like this: // // path/to/foo.o:(function bar) // // or // // path/to/foo.o:(function bar) in archive path/to/bar.a std::string InputSectionBase::getObjMsg(uint64_t off) { std::string filename = std::string(file->getName()); std::string archive; if (!file->archiveName.empty()) archive = " in archive " + file->archiveName; // Find a symbol that encloses a given location. for (Symbol *b : file->getSymbols()) if (auto *d = dyn_cast(b)) if (d->section == this && d->value <= off && off < d->value + d->size) return filename + ":(" + toString(*d) + ")" + archive; // If there's no symbol, print out the offset in the section. return (filename + ":(" + name + "+0x" + utohexstr(off) + ")" + archive) .str(); } InputSection InputSection::discarded(nullptr, 0, 0, 0, ArrayRef(), ""); InputSection::InputSection(InputFile *f, uint64_t flags, uint32_t type, uint32_t alignment, ArrayRef data, StringRef name, Kind k) : InputSectionBase(f, flags, type, /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, alignment, data, name, k) {} template InputSection::InputSection(ObjFile &f, const typename ELFT::Shdr &header, StringRef name) : InputSectionBase(f, header, name, InputSectionBase::Regular) {} bool InputSection::classof(const SectionBase *s) { return s->kind() == SectionBase::Regular || s->kind() == SectionBase::Synthetic; } OutputSection *InputSection::getParent() const { return cast_or_null(parent); } // Copy SHT_GROUP section contents. Used only for the -r option. template void InputSection::copyShtGroup(uint8_t *buf) { // ELFT::Word is the 32-bit integral type in the target endianness. using u32 = typename ELFT::Word; ArrayRef from = getDataAs(); auto *to = reinterpret_cast(buf); // The first entry is not a section number but a flag. *to++ = from[0]; // Adjust section numbers because section numbers in an input object // files are different in the output. ArrayRef sections = file->getSections(); for (uint32_t idx : from.slice(1)) *to++ = sections[idx]->getOutputSection()->sectionIndex; } InputSectionBase *InputSection::getRelocatedSection() const { if (!file || (type != SHT_RELA && type != SHT_REL)) return nullptr; ArrayRef sections = file->getSections(); return sections[info]; } // This is used for -r and --emit-relocs. We can't use memcpy to copy // relocations because we need to update symbol table offset and section index // for each relocation. So we copy relocations one by one. template void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { InputSectionBase *sec = getRelocatedSection(); for (const RelTy &rel : rels) { RelType type = rel.getType(config->isMips64EL); const ObjFile *file = getFile(); Symbol &sym = file->getRelocTargetSym(rel); auto *p = reinterpret_cast(buf); buf += sizeof(RelTy); if (RelTy::IsRela) p->r_addend = getAddend(rel); // Output section VA is zero for -r, so r_offset is an offset within the // section, but for --emit-relocs it is a virtual address. p->r_offset = sec->getVA(rel.r_offset); p->setSymbolAndType(in.symTab->getSymbolIndex(&sym), type, config->isMips64EL); if (sym.type == STT_SECTION) { // We combine multiple section symbols into only one per // section. This means we have to update the addend. That is // trivial for Elf_Rela, but for Elf_Rel we have to write to the // section data. We do that by adding to the Relocation vector. // .eh_frame is horribly special and can reference discarded sections. To // avoid having to parse and recreate .eh_frame, we just replace any // relocation in it pointing to discarded sections with R_*_NONE, which // hopefully creates a frame that is ignored at runtime. Also, don't warn // on .gcc_except_table and debug sections. // // See the comment in maybeReportUndefined for PPC32 .got2 and PPC64 .toc auto *d = dyn_cast(&sym); if (!d) { if (!isDebugSection(*sec) && sec->name != ".eh_frame" && sec->name != ".gcc_except_table" && sec->name != ".got2" && sec->name != ".toc") { uint32_t secIdx = cast(sym).discardedSecIdx; Elf_Shdr_Impl sec = CHECK(file->getObj().sections(), file)[secIdx]; warn("relocation refers to a discarded section: " + CHECK(file->getObj().getSectionName(&sec), file) + "\n>>> referenced by " + getObjMsg(p->r_offset)); } p->setSymbolAndType(0, 0, false); continue; } SectionBase *section = d->section->repl; if (!section->isLive()) { p->setSymbolAndType(0, 0, false); continue; } int64_t addend = getAddend(rel); const uint8_t *bufLoc = sec->data().begin() + rel.r_offset; if (!RelTy::IsRela) addend = target->getImplicitAddend(bufLoc, type); if (config->emachine == EM_MIPS && target->getRelExpr(type, sym, bufLoc) == R_MIPS_GOTREL) { // Some MIPS relocations depend on "gp" value. By default, // this value has 0x7ff0 offset from a .got section. But // relocatable files produced by a compiler or a linker // might redefine this default value and we must use it // for a calculation of the relocation result. When we // generate EXE or DSO it's trivial. Generating a relocatable // output is more difficult case because the linker does // not calculate relocations in this mode and loses // individual "gp" values used by each input object file. // As a workaround we add the "gp" value to the relocation // addend and save it back to the file. addend += sec->getFile()->mipsGp0; } if (RelTy::IsRela) p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr; else if (config->relocatable && type != target->noneRel) sec->relocations.push_back({R_ABS, type, rel.r_offset, addend, &sym}); } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 && p->r_addend >= 0x8000) { // Similar to R_MIPS_GPREL{16,32}. If the addend of R_PPC_PLTREL24 // indicates that r30 is relative to the input section .got2 // (r_addend>=0x8000), after linking, r30 should be relative to the output // section .got2 . To compensate for the shift, adjust r_addend by // ppc32Got2OutSecOff. p->r_addend += sec->file->ppc32Got2OutSecOff; } } } // The ARM and AArch64 ABI handle pc-relative relocations to undefined weak // references specially. The general rule is that the value of the symbol in // this context is the address of the place P. A further special case is that // branch relocations to an undefined weak reference resolve to the next // instruction. static uint32_t getARMUndefinedRelativeWeakVA(RelType type, uint32_t a, uint32_t p) { switch (type) { // Unresolved branch relocations to weak references resolve to next // instruction, this will be either 2 or 4 bytes on from P. case R_ARM_THM_JUMP11: return p + 2 + a; case R_ARM_CALL: case R_ARM_JUMP24: case R_ARM_PC24: case R_ARM_PLT32: case R_ARM_PREL31: case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: return p + 4 + a; case R_ARM_THM_CALL: // We don't want an interworking BLX to ARM return p + 5 + a; // Unresolved non branch pc-relative relocations // R_ARM_TARGET2 which can be resolved relatively is not present as it never // targets a weak-reference. case R_ARM_MOVW_PREL_NC: case R_ARM_MOVT_PREL: case R_ARM_REL32: case R_ARM_THM_ALU_PREL_11_0: case R_ARM_THM_MOVW_PREL_NC: case R_ARM_THM_MOVT_PREL: case R_ARM_THM_PC12: return p + a; // p + a is unrepresentable as negative immediates can't be encoded. case R_ARM_THM_PC8: return p; } llvm_unreachable("ARM pc-relative relocation expected\n"); } // The comment above getARMUndefinedRelativeWeakVA applies to this function. static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t type, uint64_t a, uint64_t p) { switch (type) { // Unresolved branch relocations to weak references resolve to next // instruction, this is 4 bytes on from P. case R_AARCH64_CALL26: case R_AARCH64_CONDBR19: case R_AARCH64_JUMP26: case R_AARCH64_TSTBR14: return p + 4 + a; // Unresolved non branch pc-relative relocations case R_AARCH64_PREL16: case R_AARCH64_PREL32: case R_AARCH64_PREL64: case R_AARCH64_ADR_PREL_LO21: case R_AARCH64_LD_PREL_LO19: case R_AARCH64_PLT32: return p + a; } llvm_unreachable("AArch64 pc-relative relocation expected\n"); } // ARM SBREL relocations are of the form S + A - B where B is the static base // The ARM ABI defines base to be "addressing origin of the output segment // defining the symbol S". We defined the "addressing origin"/static base to be // the base of the PT_LOAD segment containing the Sym. // The procedure call standard only defines a Read Write Position Independent // RWPI variant so in practice we should expect the static base to be the base // of the RW segment. static uint64_t getARMStaticBase(const Symbol &sym) { OutputSection *os = sym.getOutputSection(); if (!os || !os->ptLoad || !os->ptLoad->firstSec) fatal("SBREL relocation to " + sym.getName() + " without static base"); return os->ptLoad->firstSec->addr; } // For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}), the symbol actually // points the corresponding R_RISCV_PCREL_HI20 relocation, and the target VA // is calculated using PCREL_HI20's symbol. // // This function returns the R_RISCV_PCREL_HI20 relocation from // R_RISCV_PCREL_LO12's symbol and addend. static Relocation *getRISCVPCRelHi20(const Symbol *sym, uint64_t addend) { const Defined *d = cast(sym); if (!d->section) { error("R_RISCV_PCREL_LO12 relocation points to an absolute symbol: " + sym->getName()); return nullptr; } InputSection *isec = cast(d->section); if (addend != 0) warn("Non-zero addend in R_RISCV_PCREL_LO12 relocation to " + isec->getObjMsg(d->value) + " is ignored"); // Relocations are sorted by offset, so we can use std::equal_range to do // binary search. Relocation r; r.offset = d->value; auto range = std::equal_range(isec->relocations.begin(), isec->relocations.end(), r, [](const Relocation &lhs, const Relocation &rhs) { return lhs.offset < rhs.offset; }); for (auto it = range.first; it != range.second; ++it) if (it->type == R_RISCV_PCREL_HI20 || it->type == R_RISCV_GOT_HI20 || it->type == R_RISCV_TLS_GD_HI20 || it->type == R_RISCV_TLS_GOT_HI20) return &*it; error("R_RISCV_PCREL_LO12 relocation points to " + isec->getObjMsg(d->value) + " without an associated R_RISCV_PCREL_HI20 relocation"); return nullptr; } // A TLS symbol's virtual address is relative to the TLS segment. Add a // target-specific adjustment to produce a thread-pointer-relative offset. static int64_t getTlsTpOffset(const Symbol &s) { // On targets that support TLSDESC, _TLS_MODULE_BASE_@tpoff = 0. if (&s == ElfSym::tlsModuleBase) return 0; // There are 2 TLS layouts. Among targets we support, x86 uses TLS Variant 2 // while most others use Variant 1. At run time TP will be aligned to p_align. // Variant 1. TP will be followed by an optional gap (which is the size of 2 // pointers on ARM/AArch64, 0 on other targets), followed by alignment // padding, then the static TLS blocks. The alignment padding is added so that // (TP + gap + padding) is congruent to p_vaddr modulo p_align. // // Variant 2. Static TLS blocks, followed by alignment padding are placed // before TP. The alignment padding is added so that (TP - padding - // p_memsz) is congruent to p_vaddr modulo p_align. PhdrEntry *tls = Out::tlsPhdr; switch (config->emachine) { // Variant 1. case EM_ARM: case EM_AARCH64: return s.getVA(0) + config->wordsize * 2 + ((tls->p_vaddr - config->wordsize * 2) & (tls->p_align - 1)); case EM_MIPS: case EM_PPC: case EM_PPC64: // Adjusted Variant 1. TP is placed with a displacement of 0x7000, which is // to allow a signed 16-bit offset to reach 0x1000 of TCB/thread-library // data and 0xf000 of the program's TLS segment. return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)) - 0x7000; case EM_RISCV: return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)); // Variant 2. case EM_HEXAGON: case EM_SPARCV9: case EM_386: case EM_X86_64: return s.getVA(0) - tls->p_memsz - ((-tls->p_vaddr - tls->p_memsz) & (tls->p_align - 1)); default: llvm_unreachable("unhandled Config->EMachine"); } } uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, int64_t a, uint64_t p, const Symbol &sym, RelExpr expr) { switch (expr) { case R_ABS: case R_DTPREL: case R_RELAX_TLS_LD_TO_LE_ABS: case R_RELAX_GOT_PC_NOPIC: case R_RISCV_ADD: return sym.getVA(a); case R_ADDEND: return a; case R_ARM_SBREL: return sym.getVA(a) - getARMStaticBase(sym); case R_GOT: case R_RELAX_TLS_GD_TO_IE_ABS: return sym.getGotVA() + a; case R_GOTONLY_PC: return in.got->getVA() + a - p; case R_GOTPLTONLY_PC: return in.gotPlt->getVA() + a - p; case R_GOTREL: case R_PPC64_RELAX_TOC: return sym.getVA(a) - in.got->getVA(); case R_GOTPLTREL: return sym.getVA(a) - in.gotPlt->getVA(); case R_GOTPLT: case R_RELAX_TLS_GD_TO_IE_GOTPLT: return sym.getGotVA() + a - in.gotPlt->getVA(); case R_TLSLD_GOT_OFF: case R_GOT_OFF: case R_RELAX_TLS_GD_TO_IE_GOT_OFF: return sym.getGotOffset() + a; case R_AARCH64_GOT_PAGE_PC: case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: return getAArch64Page(sym.getGotVA() + a) - getAArch64Page(p); case R_GOT_PC: case R_RELAX_TLS_GD_TO_IE: return sym.getGotVA() + a - p; case R_MIPS_GOTREL: return sym.getVA(a) - in.mipsGot->getGp(file); case R_MIPS_GOT_GP: return in.mipsGot->getGp(file) + a; case R_MIPS_GOT_GP_PC: { // R_MIPS_LO16 expression has R_MIPS_GOT_GP_PC type iif the target // is _gp_disp symbol. In that case we should use the following // formula for calculation "AHL + GP - P + 4". For details see p. 4-19 at // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf // microMIPS variants of these relocations use slightly different // expressions: AHL + GP - P + 3 for %lo() and AHL + GP - P - 1 for %hi() // to correctly handle less-significant bit of the microMIPS symbol. uint64_t v = in.mipsGot->getGp(file) + a - p; if (type == R_MIPS_LO16 || type == R_MICROMIPS_LO16) v += 4; if (type == R_MICROMIPS_LO16 || type == R_MICROMIPS_HI16) v -= 1; return v; } case R_MIPS_GOT_LOCAL_PAGE: // If relocation against MIPS local symbol requires GOT entry, this entry // should be initialized by 'page address'. This address is high 16-bits // of sum the symbol's value and the addend. return in.mipsGot->getVA() + in.mipsGot->getPageEntryOffset(file, sym, a) - in.mipsGot->getGp(file); case R_MIPS_GOT_OFF: case R_MIPS_GOT_OFF32: // In case of MIPS if a GOT relocation has non-zero addend this addend // should be applied to the GOT entry content not to the GOT entry offset. // That is why we use separate expression type. return in.mipsGot->getVA() + in.mipsGot->getSymEntryOffset(file, sym, a) - in.mipsGot->getGp(file); case R_MIPS_TLSGD: return in.mipsGot->getVA() + in.mipsGot->getGlobalDynOffset(file, sym) - in.mipsGot->getGp(file); case R_MIPS_TLSLD: return in.mipsGot->getVA() + in.mipsGot->getTlsIndexOffset(file) - in.mipsGot->getGp(file); case R_AARCH64_PAGE_PC: { uint64_t val = sym.isUndefWeak() ? p + a : sym.getVA(a); return getAArch64Page(val) - getAArch64Page(p); } case R_RISCV_PC_INDIRECT: { if (const Relocation *hiRel = getRISCVPCRelHi20(&sym, a)) return getRelocTargetVA(file, hiRel->type, hiRel->addend, sym.getVA(), *hiRel->sym, hiRel->expr); return 0; } case R_PC: case R_ARM_PCA: { uint64_t dest; if (expr == R_ARM_PCA) // Some PC relative ARM (Thumb) relocations align down the place. p = p & 0xfffffffc; if (sym.isUndefWeak()) { // On ARM and AArch64 a branch to an undefined weak resolves to the // next instruction, otherwise the place. if (config->emachine == EM_ARM) dest = getARMUndefinedRelativeWeakVA(type, a, p); else if (config->emachine == EM_AARCH64) dest = getAArch64UndefinedRelativeWeakVA(type, a, p); else if (config->emachine == EM_PPC) dest = p; else dest = sym.getVA(a); } else { dest = sym.getVA(a); } return dest - p; } case R_PLT: return sym.getPltVA() + a; case R_PLT_PC: case R_PPC64_CALL_PLT: return sym.getPltVA() + a - p; case R_PPC32_PLTREL: // R_PPC_PLTREL24 uses the addend (usually 0 or 0x8000) to indicate r30 // stores _GLOBAL_OFFSET_TABLE_ or .got2+0x8000. The addend is ignored for // target VA computation. return sym.getPltVA() - p; case R_PPC64_CALL: { uint64_t symVA = sym.getVA(a); // If we have an undefined weak symbol, we might get here with a symbol // address of zero. That could overflow, but the code must be unreachable, // so don't bother doing anything at all. if (!symVA) return 0; // PPC64 V2 ABI describes two entry points to a function. The global entry // point is used for calls where the caller and callee (may) have different // TOC base pointers and r2 needs to be modified to hold the TOC base for // the callee. For local calls the caller and callee share the same // TOC base and so the TOC pointer initialization code should be skipped by // branching to the local entry point. return symVA - p + getPPC64GlobalEntryToLocalEntryOffset(sym.stOther); } case R_PPC64_TOCBASE: return getPPC64TocBase() + a; case R_RELAX_GOT_PC: return sym.getVA(a) - p; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_IE_TO_LE: case R_RELAX_TLS_LD_TO_LE: case R_TLS: // It is not very clear what to return if the symbol is undefined. With // --noinhibit-exec, even a non-weak undefined reference may reach here. // Just return A, which matches R_ABS, and the behavior of some dynamic // loaders. if (sym.isUndefined() || sym.isLazy()) return a; return getTlsTpOffset(sym) + a; case R_RELAX_TLS_GD_TO_LE_NEG: case R_NEG_TLS: if (sym.isUndefined()) return a; return -getTlsTpOffset(sym) + a; case R_SIZE: return sym.getSize() + a; case R_TLSDESC: return in.got->getGlobalDynAddr(sym) + a; case R_TLSDESC_PC: return in.got->getGlobalDynAddr(sym) + a - p; case R_AARCH64_TLSDESC_PAGE: return getAArch64Page(in.got->getGlobalDynAddr(sym) + a) - getAArch64Page(p); case R_TLSGD_GOT: return in.got->getGlobalDynOffset(sym) + a; case R_TLSGD_GOTPLT: return in.got->getVA() + in.got->getGlobalDynOffset(sym) + a - in.gotPlt->getVA(); case R_TLSGD_PC: return in.got->getGlobalDynAddr(sym) + a - p; case R_TLSLD_GOTPLT: return in.got->getVA() + in.got->getTlsIndexOff() + a - in.gotPlt->getVA(); case R_TLSLD_GOT: return in.got->getTlsIndexOff() + a; case R_TLSLD_PC: return in.got->getTlsIndexVA() + a - p; default: llvm_unreachable("invalid expression"); } } // This function applies relocations to sections without SHF_ALLOC bit. // Such sections are never mapped to memory at runtime. Debug sections are // an example. Relocations in non-alloc sections are much easier to // handle than in allocated sections because it will never need complex // treatment such as GOT or PLT (because at runtime no one refers them). // So, we handle relocations for non-alloc sections directly in this // function as a performance optimization. template void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { const unsigned bits = sizeof(typename ELFT::uint) * 8; const bool isDebug = isDebugSection(*this); const bool isDebugLocOrRanges = isDebug && (name == ".debug_loc" || name == ".debug_ranges"); const bool isDebugLine = isDebug && name == ".debug_line"; Optional tombstone; for (const auto &patAndValue : llvm::reverse(config->deadRelocInNonAlloc)) if (patAndValue.first.match(this->name)) { tombstone = patAndValue.second; break; } for (const RelTy &rel : rels) { RelType type = rel.getType(config->isMips64EL); // GCC 8.0 or earlier have a bug that they emit R_386_GOTPC relocations // against _GLOBAL_OFFSET_TABLE_ for .debug_info. The bug has been fixed // in 2017 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82630), but we // need to keep this bug-compatible code for a while. if (config->emachine == EM_386 && type == R_386_GOTPC) continue; uint64_t offset = getOffset(rel.r_offset); uint8_t *bufLoc = buf + offset; int64_t addend = getAddend(rel); if (!RelTy::IsRela) addend += target->getImplicitAddend(bufLoc, type); Symbol &sym = getFile()->getRelocTargetSym(rel); RelExpr expr = target->getRelExpr(type, sym, bufLoc); if (expr == R_NONE) continue; if (expr == R_SIZE) { target->relocateNoSym(bufLoc, type, SignExtend64(sym.getSize() + addend)); continue; } if (expr != R_ABS && expr != R_DTPREL && expr != R_RISCV_ADD) { std::string msg = getLocation(offset) + ": has non-ABS relocation " + toString(type) + " against symbol '" + toString(sym) + "'"; if (expr != R_PC && expr != R_ARM_PCA) { error(msg); return; } // If the control reaches here, we found a PC-relative relocation in a // non-ALLOC section. Since non-ALLOC section is not loaded into memory // at runtime, the notion of PC-relative doesn't make sense here. So, // this is a usage error. However, GNU linkers historically accept such // relocations without any errors and relocate them as if they were at // address 0. For bug-compatibilty, we accept them with warnings. We // know Steel Bank Common Lisp as of 2018 have this bug. warn(msg); target->relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(addend - offset))); continue; } if (tombstone || (isDebug && (type == target->symbolicRel || expr == R_DTPREL))) { // Resolve relocations in .debug_* referencing (discarded symbols or ICF // folded section symbols) to a tombstone value. Resolving to addend is // unsatisfactory because the result address range may collide with a // valid range of low address, or leave multiple CUs claiming ownership of // the same range of code, which may confuse consumers. // // To address the problems, we use -1 as a tombstone value for most // .debug_* sections. We have to ignore the addend because we don't want // to resolve an address attribute (which may have a non-zero addend) to // -1+addend (wrap around to a low address). // // R_DTPREL type relocations represent an offset into the dynamic thread // vector. The computed value is st_value plus a non-negative offset. // Negative values are invalid, so -1 can be used as the tombstone value. // // If the referenced symbol is discarded (made Undefined), or the // section defining the referenced symbol is garbage collected, // sym.getOutputSection() is nullptr. `ds->section->repl != ds->section` // catches the ICF folded case. However, resolving a relocation in // .debug_line to -1 would stop debugger users from setting breakpoints on // the folded-in function, so exclude .debug_line. // // For pre-DWARF-v5 .debug_loc and .debug_ranges, -1 is a reserved value - // (base address selection entry), so -2 is used. + // (base address selection entry), use 1 (which is used by GNU ld for + // .debug_ranges). + // + // TODO To reduce disruption, we use 0 instead of -1 as the tombstone + // value. Enable -1 in a future release. auto *ds = dyn_cast(&sym); if (!sym.getOutputSection() || (ds && ds->section->repl != ds->section && !isDebugLine)) { // If -z dead-reloc-in-nonalloc= is specified, respect it. - const uint64_t value = - tombstone ? SignExtend64(*tombstone) - : (isDebugLocOrRanges ? UINT64_MAX - 1 : UINT64_MAX); + const uint64_t value = tombstone ? SignExtend64(*tombstone) + : (isDebugLocOrRanges ? 1 : 0); target->relocateNoSym(bufLoc, type, value); continue; } } target->relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(addend))); } } // This is used when '-r' is given. // For REL targets, InputSection::copyRelocations() may store artificial // relocations aimed to update addends. They are handled in relocateAlloc() // for allocatable sections, and this function does the same for // non-allocatable sections, such as sections with debug information. static void relocateNonAllocForRelocatable(InputSection *sec, uint8_t *buf) { const unsigned bits = config->is64 ? 64 : 32; for (const Relocation &rel : sec->relocations) { // InputSection::copyRelocations() adds only R_ABS relocations. assert(rel.expr == R_ABS); uint8_t *bufLoc = buf + rel.offset + sec->outSecOff; uint64_t targetVA = SignExtend64(rel.sym->getVA(rel.addend), bits); target->relocate(bufLoc, rel, targetVA); } } template void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) { if (flags & SHF_EXECINSTR) adjustSplitStackFunctionPrologues(buf, bufEnd); if (flags & SHF_ALLOC) { relocateAlloc(buf, bufEnd); return; } auto *sec = cast(this); if (config->relocatable) relocateNonAllocForRelocatable(sec, buf); else if (sec->areRelocsRela) sec->relocateNonAlloc(buf, sec->template relas()); else sec->relocateNonAlloc(buf, sec->template rels()); } void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { assert(flags & SHF_ALLOC); const unsigned bits = config->wordsize * 8; for (const Relocation &rel : relocations) { if (rel.expr == R_NONE) continue; uint64_t offset = rel.offset; if (auto *sec = dyn_cast(this)) offset += sec->outSecOff; uint8_t *bufLoc = buf + offset; RelType type = rel.type; uint64_t addrLoc = getOutputSection()->addr + offset; RelExpr expr = rel.expr; uint64_t targetVA = SignExtend64( getRelocTargetVA(file, type, rel.addend, addrLoc, *rel.sym, expr), bits); switch (expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: target->relaxGot(bufLoc, rel, targetVA); break; case R_PPC64_RELAX_TOC: // rel.sym refers to the STT_SECTION symbol associated to the .toc input // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC // entry, there may be R_PPC64_TOC16_HA not paired with // R_PPC64_TOC16_LO_DS. Don't relax. This loses some relaxation // opportunities but is safe. if (ppc64noTocRelax.count({rel.sym, rel.addend}) || !tryRelaxPPC64TocIndirection(rel, bufLoc)) target->relocate(bufLoc, rel, targetVA); break; case R_RELAX_TLS_IE_TO_LE: target->relaxTlsIeToLe(bufLoc, rel, targetVA); break; case R_RELAX_TLS_LD_TO_LE: case R_RELAX_TLS_LD_TO_LE_ABS: target->relaxTlsLdToLe(bufLoc, rel, targetVA); break; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_GD_TO_LE_NEG: target->relaxTlsGdToLe(bufLoc, rel, targetVA); break; case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: case R_RELAX_TLS_GD_TO_IE: case R_RELAX_TLS_GD_TO_IE_ABS: case R_RELAX_TLS_GD_TO_IE_GOT_OFF: case R_RELAX_TLS_GD_TO_IE_GOTPLT: target->relaxTlsGdToIe(bufLoc, rel, targetVA); break; case R_PPC64_CALL: // If this is a call to __tls_get_addr, it may be part of a TLS // sequence that has been relaxed and turned into a nop. In this // case, we don't want to handle it as a call. if (read32(bufLoc) == 0x60000000) // nop break; // Patch a nop (0x60000000) to a ld. if (rel.sym->needsTocRestore) { // gcc/gfortran 5.4, 6.3 and earlier versions do not add nop for // recursive calls even if the function is preemptible. This is not // wrong in the common case where the function is not preempted at // runtime. Just ignore. if ((bufLoc + 8 > bufEnd || read32(bufLoc + 4) != 0x60000000) && rel.sym->file != file) { // Use substr(6) to remove the "__plt_" prefix. errorOrWarn(getErrorLocation(bufLoc) + "call to " + lld::toString(*rel.sym).substr(6) + " lacks nop, can't restore toc"); break; } write32(bufLoc + 4, 0xe8410018); // ld %r2, 24(%r1) } target->relocate(bufLoc, rel, targetVA); break; default: target->relocate(bufLoc, rel, targetVA); break; } } // Apply jumpInstrMods. jumpInstrMods are created when the opcode of // a jmp insn must be modified to shrink the jmp insn or to flip the jmp // insn. This is primarily used to relax and optimize jumps created with // basic block sections. if (auto *sec = dyn_cast(this)) { for (const JumpInstrMod &jumpMod : jumpInstrMods) { uint64_t offset = jumpMod.offset + sec->outSecOff; uint8_t *bufLoc = buf + offset; target->applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size); } } } // For each function-defining prologue, find any calls to __morestack, // and replace them with calls to __morestack_non_split. static void switchMorestackCallsToMorestackNonSplit( DenseSet &prologues, std::vector &morestackCalls) { // If the target adjusted a function's prologue, all calls to // __morestack inside that function should be switched to // __morestack_non_split. Symbol *moreStackNonSplit = symtab->find("__morestack_non_split"); if (!moreStackNonSplit) { error("Mixing split-stack objects requires a definition of " "__morestack_non_split"); return; } // Sort both collections to compare addresses efficiently. llvm::sort(morestackCalls, [](const Relocation *l, const Relocation *r) { return l->offset < r->offset; }); std::vector functions(prologues.begin(), prologues.end()); llvm::sort(functions, [](const Defined *l, const Defined *r) { return l->value < r->value; }); auto it = morestackCalls.begin(); for (Defined *f : functions) { // Find the first call to __morestack within the function. while (it != morestackCalls.end() && (*it)->offset < f->value) ++it; // Adjust all calls inside the function. while (it != morestackCalls.end() && (*it)->offset < f->value + f->size) { (*it)->sym = moreStackNonSplit; ++it; } } } static bool enclosingPrologueAttempted(uint64_t offset, const DenseSet &prologues) { for (Defined *f : prologues) if (f->value <= offset && offset < f->value + f->size) return true; return false; } // If a function compiled for split stack calls a function not // compiled for split stack, then the caller needs its prologue // adjusted to ensure that the called function will have enough stack // available. Find those functions, and adjust their prologues. template void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *buf, uint8_t *end) { if (!getFile()->splitStack) return; DenseSet prologues; std::vector morestackCalls; for (Relocation &rel : relocations) { // Local symbols can't possibly be cross-calls, and should have been // resolved long before this line. if (rel.sym->isLocal()) continue; // Ignore calls into the split-stack api. if (rel.sym->getName().startswith("__morestack")) { if (rel.sym->getName().equals("__morestack")) morestackCalls.push_back(&rel); continue; } // A relocation to non-function isn't relevant. Sometimes // __morestack is not marked as a function, so this check comes // after the name check. if (rel.sym->type != STT_FUNC) continue; // If the callee's-file was compiled with split stack, nothing to do. In // this context, a "Defined" symbol is one "defined by the binary currently // being produced". So an "undefined" symbol might be provided by a shared // library. It is not possible to tell how such symbols were compiled, so be // conservative. if (Defined *d = dyn_cast(rel.sym)) if (InputSection *isec = cast_or_null(d->section)) if (!isec || !isec->getFile() || isec->getFile()->splitStack) continue; if (enclosingPrologueAttempted(rel.offset, prologues)) continue; if (Defined *f = getEnclosingFunction(rel.offset)) { prologues.insert(f); if (target->adjustPrologueForCrossSplitStack(buf + getOffset(f->value), end, f->stOther)) continue; if (!getFile()->someNoSplitStack) error(lld::toString(this) + ": " + f->getName() + " (with -fsplit-stack) calls " + rel.sym->getName() + " (without -fsplit-stack), but couldn't adjust its prologue"); } } if (target->needsMoreStackNonSplit) switchMorestackCallsToMorestackNonSplit(prologues, morestackCalls); } template void InputSection::writeTo(uint8_t *buf) { if (type == SHT_NOBITS) return; if (auto *s = dyn_cast(this)) { s->writeTo(buf + outSecOff); return; } // If -r or --emit-relocs is given, then an InputSection // may be a relocation section. if (type == SHT_RELA) { copyRelocations(buf + outSecOff, getDataAs()); return; } if (type == SHT_REL) { copyRelocations(buf + outSecOff, getDataAs()); return; } // If -r is given, we may have a SHT_GROUP section. if (type == SHT_GROUP) { copyShtGroup(buf + outSecOff); return; } // If this is a compressed section, uncompress section contents directly // to the buffer. if (uncompressedSize >= 0) { size_t size = uncompressedSize; if (Error e = zlib::uncompress(toStringRef(rawData), (char *)(buf + outSecOff), size)) fatal(toString(this) + ": uncompress failed: " + llvm::toString(std::move(e))); uint8_t *bufEnd = buf + outSecOff + size; relocate(buf, bufEnd); return; } // Copy section contents from source object file to output file // and then apply relocations. memcpy(buf + outSecOff, data().data(), data().size()); uint8_t *bufEnd = buf + outSecOff + data().size(); relocate(buf, bufEnd); } void InputSection::replace(InputSection *other) { alignment = std::max(alignment, other->alignment); // When a section is replaced with another section that was allocated to // another partition, the replacement section (and its associated sections) // need to be placed in the main partition so that both partitions will be // able to access it. if (partition != other->partition) { partition = 1; for (InputSection *isec : dependentSections) isec->partition = 1; } other->repl = repl; other->markDead(); } template EhInputSection::EhInputSection(ObjFile &f, const typename ELFT::Shdr &header, StringRef name) : InputSectionBase(f, header, name, InputSectionBase::EHFrame) {} SyntheticSection *EhInputSection::getParent() const { return cast_or_null(parent); } // Returns the index of the first relocation that points to a region between // Begin and Begin+Size. template static unsigned getReloc(IntTy begin, IntTy size, const ArrayRef &rels, unsigned &relocI) { // Start search from RelocI for fast access. That works because the // relocations are sorted in .eh_frame. for (unsigned n = rels.size(); relocI < n; ++relocI) { const RelTy &rel = rels[relocI]; if (rel.r_offset < begin) continue; if (rel.r_offset < begin + size) return relocI; return -1; } return -1; } // .eh_frame is a sequence of CIE or FDE records. // This function splits an input section into records and returns them. template void EhInputSection::split() { if (areRelocsRela) split(relas()); else split(rels()); } template void EhInputSection::split(ArrayRef rels) { unsigned relI = 0; for (size_t off = 0, end = data().size(); off != end;) { size_t size = readEhRecordSize(this, off); pieces.emplace_back(off, this, size, getReloc(off, size, rels, relI)); // The empty record is the end marker. if (size == 4) break; off += size; } } static size_t findNull(StringRef s, size_t entSize) { // Optimize the common case. if (entSize == 1) return s.find(0); for (unsigned i = 0, n = s.size(); i != n; i += entSize) { const char *b = s.begin() + i; if (std::all_of(b, b + entSize, [](char c) { return c == 0; })) return i; } return StringRef::npos; } SyntheticSection *MergeInputSection::getParent() const { return cast_or_null(parent); } // Split SHF_STRINGS section. Such section is a sequence of // null-terminated strings. void MergeInputSection::splitStrings(ArrayRef data, size_t entSize) { size_t off = 0; bool isAlloc = flags & SHF_ALLOC; StringRef s = toStringRef(data); while (!s.empty()) { size_t end = findNull(s, entSize); if (end == StringRef::npos) fatal(toString(this) + ": string is not null terminated"); size_t size = end + entSize; pieces.emplace_back(off, xxHash64(s.substr(0, size)), !isAlloc); s = s.substr(size); off += size; } } // Split non-SHF_STRINGS section. Such section is a sequence of // fixed size records. void MergeInputSection::splitNonStrings(ArrayRef data, size_t entSize) { size_t size = data.size(); assert((size % entSize) == 0); bool isAlloc = flags & SHF_ALLOC; for (size_t i = 0; i != size; i += entSize) pieces.emplace_back(i, xxHash64(data.slice(i, entSize)), !isAlloc); } template MergeInputSection::MergeInputSection(ObjFile &f, const typename ELFT::Shdr &header, StringRef name) : InputSectionBase(f, header, name, InputSectionBase::Merge) {} MergeInputSection::MergeInputSection(uint64_t flags, uint32_t type, uint64_t entsize, ArrayRef data, StringRef name) : InputSectionBase(nullptr, flags, type, entsize, /*Link*/ 0, /*Info*/ 0, /*Alignment*/ entsize, data, name, SectionBase::Merge) {} // This function is called after we obtain a complete list of input sections // that need to be linked. This is responsible to split section contents // into small chunks for further processing. // // Note that this function is called from parallelForEach. This must be // thread-safe (i.e. no memory allocation from the pools). void MergeInputSection::splitIntoPieces() { assert(pieces.empty()); if (flags & SHF_STRINGS) splitStrings(data(), entsize); else splitNonStrings(data(), entsize); } SectionPiece *MergeInputSection::getSectionPiece(uint64_t offset) { if (this->data().size() <= offset) fatal(toString(this) + ": offset is outside the section"); // If Offset is not at beginning of a section piece, it is not in the map. // In that case we need to do a binary search of the original section piece vector. auto it = partition_point( pieces, [=](SectionPiece p) { return p.inputOff <= offset; }); return &it[-1]; } // Returns the offset in an output section for a given input offset. // Because contents of a mergeable section is not contiguous in output, // it is not just an addition to a base output offset. uint64_t MergeInputSection::getParentOffset(uint64_t offset) const { // If Offset is not at beginning of a section piece, it is not in the map. // In that case we need to search from the original section piece vector. const SectionPiece &piece = *(const_cast(this)->getSectionPiece (offset)); uint64_t addend = offset - piece.inputOff; return piece.outputOff + addend; } template InputSection::InputSection(ObjFile &, const ELF32LE::Shdr &, StringRef); template InputSection::InputSection(ObjFile &, const ELF32BE::Shdr &, StringRef); template InputSection::InputSection(ObjFile &, const ELF64LE::Shdr &, StringRef); template InputSection::InputSection(ObjFile &, const ELF64BE::Shdr &, StringRef); template std::string InputSectionBase::getLocation(uint64_t); template std::string InputSectionBase::getLocation(uint64_t); template std::string InputSectionBase::getLocation(uint64_t); template std::string InputSectionBase::getLocation(uint64_t); template void InputSection::writeTo(uint8_t *); template void InputSection::writeTo(uint8_t *); template void InputSection::writeTo(uint8_t *); template void InputSection::writeTo(uint8_t *); template MergeInputSection::MergeInputSection(ObjFile &, const ELF32LE::Shdr &, StringRef); template MergeInputSection::MergeInputSection(ObjFile &, const ELF32BE::Shdr &, StringRef); template MergeInputSection::MergeInputSection(ObjFile &, const ELF64LE::Shdr &, StringRef); template MergeInputSection::MergeInputSection(ObjFile &, const ELF64BE::Shdr &, StringRef); template EhInputSection::EhInputSection(ObjFile &, const ELF32LE::Shdr &, StringRef); template EhInputSection::EhInputSection(ObjFile &, const ELF32BE::Shdr &, StringRef); template EhInputSection::EhInputSection(ObjFile &, const ELF64LE::Shdr &, StringRef); template EhInputSection::EhInputSection(ObjFile &, const ELF64BE::Shdr &, StringRef); template void EhInputSection::split(); template void EhInputSection::split(); template void EhInputSection::split(); template void EhInputSection::split(); diff --git a/contrib/llvm-project/llvm/include/llvm/module.modulemap b/contrib/llvm-project/llvm/include/llvm/module.modulemap index b262311a96a0..778a17c8aeee 100644 --- a/contrib/llvm-project/llvm/include/llvm/module.modulemap +++ b/contrib/llvm-project/llvm/include/llvm/module.modulemap @@ -1,425 +1,426 @@ module LLVM_Analysis { requires cplusplus umbrella "Analysis" module * { export * } // This is intended for (repeated) textual inclusion. textual header "Analysis/TargetLibraryInfo.def" textual header "Analysis/VecFuncs.def" } module LLVM_AsmParser { requires cplusplus umbrella "AsmParser" module * { export * } } // A module covering CodeGen/ and Target/. These are intertwined // and codependent, and thus notionally form a single module. module LLVM_Backend { requires cplusplus module CodeGen { umbrella "CodeGen" module * { export * } // Exclude these; they're intended to be included into only a single // translation unit (or none) and aren't part of this module. exclude header "CodeGen/LinkAllAsmWriterComponents.h" exclude header "CodeGen/LinkAllCodegenComponents.h" // These are intended for (repeated) textual inclusion. textual header "CodeGen/DIEValue.def" } } // FIXME: Make this as a submodule of LLVM_Backend again. // Doing so causes a linker error in clang-format. module LLVM_Backend_Target { umbrella "Target" module * { export * } } module LLVM_Bitcode { requires cplusplus umbrella "Bitcode" module * { export * } } module LLVM_Bitstream { requires cplusplus umbrella "Bitstream" module * { export * } } module LLVM_BinaryFormat { requires cplusplus umbrella "BinaryFormat" module * { export * } textual header "BinaryFormat/Dwarf.def" textual header "BinaryFormat/DynamicTags.def" textual header "BinaryFormat/MachO.def" textual header "BinaryFormat/MinidumpConstants.def" textual header "BinaryFormat/ELFRelocs/AArch64.def" textual header "BinaryFormat/ELFRelocs/AMDGPU.def" textual header "BinaryFormat/ELFRelocs/ARM.def" textual header "BinaryFormat/ELFRelocs/ARC.def" textual header "BinaryFormat/ELFRelocs/AVR.def" textual header "BinaryFormat/ELFRelocs/BPF.def" textual header "BinaryFormat/ELFRelocs/Hexagon.def" textual header "BinaryFormat/ELFRelocs/i386.def" textual header "BinaryFormat/ELFRelocs/Lanai.def" textual header "BinaryFormat/ELFRelocs/Mips.def" textual header "BinaryFormat/ELFRelocs/MSP430.def" textual header "BinaryFormat/ELFRelocs/PowerPC64.def" textual header "BinaryFormat/ELFRelocs/PowerPC.def" textual header "BinaryFormat/ELFRelocs/RISCV.def" textual header "BinaryFormat/ELFRelocs/Sparc.def" textual header "BinaryFormat/ELFRelocs/SystemZ.def" textual header "BinaryFormat/ELFRelocs/VE.def" textual header "BinaryFormat/ELFRelocs/x86_64.def" textual header "BinaryFormat/WasmRelocs.def" textual header "BinaryFormat/MsgPack.def" } module LLVM_Config { requires cplusplus umbrella "Config" extern module LLVM_Extern_Config_Def "module.extern.modulemap" module * { export * } } module LLVM_DebugInfo { requires cplusplus module DIContext { header "DebugInfo/DIContext.h" export * } } module LLVM_DebugInfo_DWARF { requires cplusplus umbrella "DebugInfo/DWARF" module * { export * } } module LLVM_DebugInfo_PDB { requires cplusplus umbrella "DebugInfo/PDB" module * { export * } // Separate out this subdirectory; it's an optional component that depends on // a separate library which might not be available. // // FIXME: There should be a better way to specify this. exclude header "DebugInfo/PDB/DIA/DIADataStream.h" exclude header "DebugInfo/PDB/DIA/DIAEnumDebugStreams.h" exclude header "DebugInfo/PDB/DIA/DIAEnumFrameData.h" exclude header "DebugInfo/PDB/DIA/DIAEnumInjectedSources.h" exclude header "DebugInfo/PDB/DIA/DIAEnumLineNumbers.h" exclude header "DebugInfo/PDB/DIA/DIAEnumSectionContribs.h" exclude header "DebugInfo/PDB/DIA/DIAEnumSourceFiles.h" exclude header "DebugInfo/PDB/DIA/DIAEnumSymbols.h" exclude header "DebugInfo/PDB/DIA/DIAEnumTables.h" exclude header "DebugInfo/PDB/DIA/DIAError.h" exclude header "DebugInfo/PDB/DIA/DIAFrameData.h" exclude header "DebugInfo/PDB/DIA/DIAInjectedSource.h" exclude header "DebugInfo/PDB/DIA/DIALineNumber.h" exclude header "DebugInfo/PDB/DIA/DIARawSymbol.h" exclude header "DebugInfo/PDB/DIA/DIASectionContrib.h" exclude header "DebugInfo/PDB/DIA/DIASession.h" exclude header "DebugInfo/PDB/DIA/DIASourceFile.h" exclude header "DebugInfo/PDB/DIA/DIASupport.h" exclude header "DebugInfo/PDB/DIA/DIATable.h" exclude header "DebugInfo/PDB/DIA/DIAUtils.h" } module LLVM_DebugInfo_PDB_DIA { requires cplusplus umbrella "DebugInfo/PDB/DIA" module * { export * } } module LLVM_DebugInfo_MSF { requires cplusplus umbrella "DebugInfo/MSF" module * { export * } } module LLVM_DebugInfo_CodeView { requires cplusplus umbrella "DebugInfo/CodeView" module * { export * } // These are intended for (repeated) textual inclusion. textual header "DebugInfo/CodeView/CodeViewRegisters.def" textual header "DebugInfo/CodeView/CodeViewTypes.def" textual header "DebugInfo/CodeView/CodeViewSymbols.def" } module LLVM_DWARFLinker { requires cplusplus umbrella "DWARFLinker" module * { export * } } module LLVM_ExecutionEngine { requires cplusplus umbrella "ExecutionEngine" module * { export * } // Exclude this; it's an optional component of the ExecutionEngine. exclude header "ExecutionEngine/OProfileWrapper.h" // Exclude these; they're intended to be included into only a single // translation unit (or none) and aren't part of this module. exclude header "ExecutionEngine/MCJIT.h" exclude header "ExecutionEngine/Interpreter.h" exclude header "ExecutionEngine/OrcMCJITReplacement.h" // FIXME: These exclude directives were added as a workaround for // and should be removed once it is fixed. exclude header "ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" exclude header "ExecutionEngine/Orc/OrcRemoteTargetClient.h" exclude header "ExecutionEngine/Orc/OrcRemoteTargetServer.h" exclude header "ExecutionEngine/Orc/RemoteObjectLayer.h" // Exclude headers from LLVM_OrcSupport. exclude header "ExecutionEngine/Orc/OrcError.h" exclude header "ExecutionEngine/Orc/RPC/RPCUtils.h" exclude header "ExecutionEngine/Orc/RPC/RPCSerialization.h" exclude header "ExecutionEngine/Orc/RPC/RawByteChannel.h" } // Orc utilities that don't depend only on Support (not ExecutionEngine or // IR). This is a workaround for ExecutionEngine's broken layering, and will // be removed in the future. module LLVM_OrcSupport { requires cplusplus header "ExecutionEngine/Orc/OrcError.h" header "ExecutionEngine/Orc/RPC/RPCUtils.h" header "ExecutionEngine/Orc/RPC/RPCSerialization.h" header "ExecutionEngine/Orc/RPC/RawByteChannel.h" export * } module LLVM_Pass { module Pass { // PassSupport.h and PassAnalysisSupport.h are made available only through // Pass.h. header "Pass.h" textual header "PassSupport.h" textual header "PassAnalysisSupport.h" export * } module PassRegistry { header "PassRegistry.h" export * } module InitializePasses { header "InitializePasses.h" export * } } module LLVM_intrinsic_gen { requires cplusplus // Delay building the modules containing dependencies to Attributes.h and // Intrinsics.h because they need to be generated by tablegen first. // Attributes.h module IR_Argument { header "IR/Argument.h" export * } module IR_Attributes { header "IR/Attributes.h" extern module LLVM_Extern_IR_Attributes_Gen "module.extern.modulemap" export * } module IR_AbstractCallSite { header "IR/AbstractCallSite.h" export * } module IR_ConstantFolder { header "IR/ConstantFolder.h" export * } module IR_GlobalVariable { header "IR/GlobalVariable.h" export * } module IR_NoFolder { header "IR/NoFolder.h" export * } module IRBuilderFolder { header "IR/IRBuilderFolder.h" export * } module IR_Module { header "IR/Module.h" export * } module IR_ModuleSummaryIndex { header "IR/ModuleSummaryIndex.h" export * } module IR_ModuleSummaryIndexYAML { header "IR/ModuleSummaryIndexYAML.h" export * } module IR_Function { header "IR/Function.h" export * } module IR_InstrTypes { header "IR/InstrTypes.h" export * } module IR_Instructions { header "IR/Instructions.h" export * } // Intrinsics.h module IR_CFG { header "IR/CFG.h" export * } module IR_ConstantRange { header "IR/ConstantRange.h" export * } module IR_Dominators { header "IR/Dominators.h" export * } module Analysis_PostDominators { header "Analysis/PostDominators.h" export * } module Analysis_DomTreeUpdater { header "Analysis/DomTreeUpdater.h" export * } module IR_IRBuilder { header "IR/IRBuilder.h" export * } module IR_IRPrintingPasses { header "IR/IRPrintingPasses.h" export * } module IR_MatrixBuilder { header "IR/MatrixBuilder.h" export * } module IR_PassManager { header "IR/PassManager.h" export * } module IR_PassManagerImpl { header "IR/PassManagerImpl.h" export * } module IR_PredIteratorCache { header "IR/PredIteratorCache.h" export * } module IR_Verifier { header "IR/Verifier.h" export * } module IR_InstIterator { header "IR/InstIterator.h" export * } module IR_InstVisitor { header "IR/InstVisitor.h" export * } module IR_Intrinsics { header "IR/Intrinsics.h" extern module LLVM_Extern_IR_Intricsics_Gen "module.extern.modulemap" extern module LLVM_Extern_IR_Intrinsics_Enum "module.extern.modulemap" export * } module IR_IntrinsicInst { header "IR/IntrinsicInst.h" export * } module IR_PatternMatch { header "IR/PatternMatch.h" export * } module IR_SafepointIRVerifier { header "IR/SafepointIRVerifier.h" export * } module IR_Statepoint { header "IR/Statepoint.h" export * } export * } module LLVM_IR { requires cplusplus umbrella "IR" module * { export * } // These are intended for (repeated) textual inclusion. textual header "IR/ConstrainedOps.def" textual header "IR/DebugInfoFlags.def" textual header "IR/Instruction.def" textual header "IR/Metadata.def" textual header "IR/FixedMetadataKinds.def" textual header "IR/Value.def" textual header "IR/VPIntrinsics.def" textual header "IR/RuntimeLibcalls.def" } module LLVM_IRReader { requires cplusplus umbrella "IRReader" module * { export * } } module LLVM_LineEditor { requires cplusplus umbrella "LineEditor" module * { export * } } module LLVM_LTO { requires cplusplus umbrella "LTO" module * { export * } } module LLVM_MC { requires cplusplus umbrella "MC" module * { export * } } // Used by llvm-tblgen module LLVM_MC_TableGen { requires cplusplus module MC_LaneBitmask { header "MC/LaneBitmask.h" export * } module MC_FixedLenDisassembler { header "MC/MCFixedLenDisassembler.h" export * } module MC_InstrItineraries { header "MC/MCInstrItineraries.h" export * } module MC_Schedule { header "MC/MCSchedule.h" export * } module MC_SubtargetFeature { header "MC/SubtargetFeature.h" export * } } module LLVM_Object { requires cplusplus umbrella "Object" module * { export * } } module LLVM_Option { requires cplusplus umbrella "Option" module * { export * } } module LLVM_ProfileData { requires cplusplus umbrella "ProfileData" module * { export * } textual header "ProfileData/InstrProfData.inc" } // FIXME: Mislayered? module LLVM_Support_TargetRegistry { requires cplusplus header "Support/TargetRegistry.h" export * } module LLVM_TableGen { requires cplusplus umbrella "TableGen" module * { export * } } module LLVM_Transforms { requires cplusplus umbrella "Transforms" module * { export * } } extern module LLVM_Extern_Utils_DataTypes "module.extern.modulemap" // A module covering ADT/ and Support/. These are intertwined and // codependent, and notionally form a single module. module LLVM_Utils { module ADT { requires cplusplus umbrella "ADT" module * { export * } } module Support { requires cplusplus umbrella "Support" module * { export * } - + // Exclude this; it should only be used on Windows. exclude header "Support/Windows/WindowsSupport.h" // Exclude these; they are fundamentally non-modular. exclude header "Support/PluginLoader.h" exclude header "Support/Solaris/sys/regset.h" // These are intended for textual inclusion. - textual header "Support/ARMTargetParser.def" textual header "Support/AArch64TargetParser.def" + textual header "Support/ARMTargetParser.def" + textual header "Support/RISCVTargetParser.def" textual header "Support/TargetOpcodes.def" textual header "Support/X86TargetParser.def" } // This part of the module is usable from both C and C++ code. module ConvertUTF { header "Support/ConvertUTF.h" export * } } // This is used for a $src == $build compilation. Otherwise we use // LLVM_Support_DataTypes_Build, defined in a module map that is // copied into the build area. module LLVM_Support_DataTypes_Src { header "llvm/Support/DataTypes.h" export * } module LLVM_WindowsManifest { requires cplusplus umbrella "WindowsManifest" module * { export * } } diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp index ae282a7a1095..f409cd322146 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1,2335 +1,2341 @@ //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // The implementation for the loop memory dependence that was originally // developed for the loop vectorizer. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "loop-accesses" static cl::opt VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor)); unsigned VectorizerParams::VectorizationFactor; static cl::opt VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location( VectorizerParams::VectorizationInterleave)); unsigned VectorizerParams::VectorizationInterleave; static cl::opt RuntimeMemoryCheckThreshold( "runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); unsigned VectorizerParams::RuntimeMemoryCheckThreshold; /// The maximum iterations used to merge memory checks static cl::opt MemoryCheckMergeThreshold( "memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100)); /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; /// We collect dependences up to this threshold. static cl::opt MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100)); /// This enables versioning on the strides of symbolically striding memory /// accesses in code like the following. /// for (i = 0; i < N; ++i) /// A[i * Stride1] += B[i * Stride2] ... /// /// Will be roughly translated to /// if (Stride1 == 1 && Stride2 == 1) { /// for (i = 0; i < N; i+=4) /// A[i:i+3] += ... /// } else /// ... static cl::opt EnableMemAccessVersioning( "enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning")); /// Enable store-to-load forwarding conflict detection. This option can /// be disabled for correctness testing. static cl::opt EnableForwardingConflictDetection( "store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true)); bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; } Value *llvm::stripIntegerCast(Value *V) { if (auto *CI = dyn_cast(V)) if (CI->getOperand(0)->getType()->isIntegerTy()) return CI->getOperand(0); return V; } const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr) { const SCEV *OrigSCEV = PSE.getSCEV(Ptr); // If there is an entry in the map return the SCEV of the pointer with the // symbolic stride replaced by one. ValueToValueMap::const_iterator SI = PtrToStride.find(OrigPtr ? OrigPtr : Ptr); if (SI != PtrToStride.end()) { Value *StrideVal = SI->second; // Strip casts. StrideVal = stripIntegerCast(StrideVal); ScalarEvolution *SE = PSE.getSE(); const auto *U = cast(SE->getSCEV(StrideVal)); const auto *CT = static_cast(SE->getOne(StrideVal->getType())); PSE.addPredicate(*SE->getEqualPredicate(U, CT)); auto *Expr = PSE.getSCEV(Ptr); LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr << "\n"); return Expr; } // Otherwise, just return the SCEV of the original pointer. return OrigSCEV; } RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( unsigned Index, RuntimePointerChecking &RtCheck) : RtCheck(RtCheck), High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start) { Members.push_back(Index); } /// Calculate Start and End points of memory access. /// Let's assume A is the first access and B is a memory access on N-th loop /// iteration. Then B is calculated as: /// B = A + Step*N . /// Step value may be positive or negative. /// N is a calculated back-edge taken count: /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 /// Start and End points are calculated in the following way: /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, /// where SizeOfElt is the size of single memory access in bytes. /// /// There is no conflict when the intervals are disjoint: /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE) { // Get the stride replaced scev. const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); ScalarEvolution *SE = PSE.getSE(); const SCEV *ScStart; const SCEV *ScEnd; if (SE->isLoopInvariant(Sc, Lp)) ScStart = ScEnd = Sc; else { const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); const SCEV *Ex = PSE.getBackedgeTakenCount(); ScStart = AR->getStart(); ScEnd = AR->evaluateAtIteration(Ex, *SE); const SCEV *Step = AR->getStepRecurrence(*SE); // For expressions with negative step, the upper bound is ScStart and the // lower bound is ScEnd. if (const auto *CStep = dyn_cast(Step)) { if (CStep->getValue()->isNegative()) std::swap(ScStart, ScEnd); } else { // Fallback case: the step is not constant, but we can still // get the upper and lower bounds of the interval by using min/max // expressions. ScStart = SE->getUMinExpr(ScStart, ScEnd); ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); } // Add the size of the pointed element to ScEnd. unsigned EltSize = Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8; const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); } Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); } SmallVector RuntimePointerChecking::generateChecks() const { SmallVector Checks; for (unsigned I = 0; I < CheckingGroups.size(); ++I) { for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I]; const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J]; if (needsChecking(CGI, CGJ)) Checks.push_back(std::make_pair(&CGI, &CGJ)); } } return Checks; } void RuntimePointerChecking::generateChecks( MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { assert(Checks.empty() && "Checks is not empty"); groupChecks(DepCands, UseDependencies); Checks = generateChecks(); } bool RuntimePointerChecking::needsChecking( const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const { for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) if (needsChecking(M.Members[I], N.Members[J])) return true; return false; } /// Compare \p I and \p J and return the minimum. /// Return nullptr in case we couldn't find an answer. static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE) { const SCEV *Diff = SE->getMinusSCEV(J, I); const SCEVConstant *C = dyn_cast(Diff); if (!C) return nullptr; if (C->getValue()->isNegative()) return J; return I; } bool RuntimeCheckingPtrGroup::addPointer(unsigned Index) { const SCEV *Start = RtCheck.Pointers[Index].Start; const SCEV *End = RtCheck.Pointers[Index].End; // Compare the starts and ends with the known minimum and maximum // of this set. We need to know how we compare against the min/max // of the set in order to be able to emit memchecks. const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE); if (!Min0) return false; const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE); if (!Min1) return false; // Update the low bound expression if we've found a new min value. if (Min0 == Start) Low = Start; // Update the high bound expression if we've found a new max value. if (Min1 != End) High = End; Members.push_back(Index); return true; } void RuntimePointerChecking::groupChecks( MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { // We build the groups from dependency candidates equivalence classes // because: // - We know that pointers in the same equivalence class share // the same underlying object and therefore there is a chance // that we can compare pointers // - We wouldn't be able to merge two pointers for which we need // to emit a memcheck. The classes in DepCands are already // conveniently built such that no two pointers in the same // class need checking against each other. // We use the following (greedy) algorithm to construct the groups // For every pointer in the equivalence class: // For each existing group: // - if the difference between this pointer and the min/max bounds // of the group is a constant, then make the pointer part of the // group and update the min/max bounds of that group as required. CheckingGroups.clear(); // If we need to check two pointers to the same underlying object // with a non-constant difference, we shouldn't perform any pointer // grouping with those pointers. This is because we can easily get // into cases where the resulting check would return false, even when // the accesses are safe. // // The following example shows this: // for (i = 0; i < 1000; ++i) // a[5000 + i * m] = a[i] + a[i + 9000] // // Here grouping gives a check of (5000, 5000 + 1000 * m) against // (0, 10000) which is always false. However, if m is 1, there is no // dependence. Not grouping the checks for a[i] and a[i + 9000] allows // us to perform an accurate check in this case. // // The above case requires that we have an UnknownDependence between // accesses to the same underlying object. This cannot happen unless // FoundNonConstantDistanceDependence is set, and therefore UseDependencies // is also false. In this case we will use the fallback path and create // separate checking groups for all pointers. // If we don't have the dependency partitions, construct a new // checking pointer group for each pointer. This is also required // for correctness, because in this case we can have checking between // pointers to the same underlying object. if (!UseDependencies) { for (unsigned I = 0; I < Pointers.size(); ++I) CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this)); return; } unsigned TotalComparisons = 0; DenseMap PositionMap; for (unsigned Index = 0; Index < Pointers.size(); ++Index) PositionMap[Pointers[Index].PointerValue] = Index; // We need to keep track of what pointers we've already seen so we // don't process them twice. SmallSet Seen; // Go through all equivalence classes, get the "pointer check groups" // and add them to the overall solution. We use the order in which accesses // appear in 'Pointers' to enforce determinism. for (unsigned I = 0; I < Pointers.size(); ++I) { // We've seen this pointer before, and therefore already processed // its equivalence class. if (Seen.count(I)) continue; MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, Pointers[I].IsWritePtr); SmallVector Groups; auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); // Because DepCands is constructed by visiting accesses in the order in // which they appear in alias sets (which is deterministic) and the // iteration order within an equivalence class member is only dependent on // the order in which unions and insertions are performed on the // equivalence class, the iteration order is deterministic. for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); MI != ME; ++MI) { - unsigned Pointer = PositionMap[MI->getPointer()]; + auto PointerI = PositionMap.find(MI->getPointer()); + assert(PointerI != PositionMap.end() && + "pointer in equivalence class not found in PositionMap"); + unsigned Pointer = PointerI->second; bool Merged = false; // Mark this pointer as seen. Seen.insert(Pointer); // Go through all the existing sets and see if we can find one // which can include this pointer. for (RuntimeCheckingPtrGroup &Group : Groups) { // Don't perform more than a certain amount of comparisons. // This should limit the cost of grouping the pointers to something // reasonable. If we do end up hitting this threshold, the algorithm // will create separate groups for all remaining pointers. if (TotalComparisons > MemoryCheckMergeThreshold) break; TotalComparisons++; if (Group.addPointer(Pointer)) { Merged = true; break; } } if (!Merged) // We couldn't add this pointer to any existing set or the threshold // for the number of comparisons has been reached. Create a new group // to hold the current pointer. Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); } // We've computed the grouped checks for this partition. // Save the results and continue with the next one. llvm::copy(Groups, std::back_inserter(CheckingGroups)); } } bool RuntimePointerChecking::arePointersInSamePartition( const SmallVectorImpl &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2) { return (PtrToPartition[PtrIdx1] != -1 && PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]); } bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { const PointerInfo &PointerI = Pointers[I]; const PointerInfo &PointerJ = Pointers[J]; // No need to check if two readonly pointers intersect. if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr) return false; // Only need to check pointers between two different dependency sets. if (PointerI.DependencySetId == PointerJ.DependencySetId) return false; // Only need to check pointers in the same alias set. if (PointerI.AliasSetId != PointerJ.AliasSetId) return false; return true; } void RuntimePointerChecking::printChecks( raw_ostream &OS, const SmallVectorImpl &Checks, unsigned Depth) const { unsigned N = 0; for (const auto &Check : Checks) { const auto &First = Check.first->Members, &Second = Check.second->Members; OS.indent(Depth) << "Check " << N++ << ":\n"; OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n"; for (unsigned K = 0; K < First.size(); ++K) OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n"; OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n"; for (unsigned K = 0; K < Second.size(); ++K) OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n"; } } void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { OS.indent(Depth) << "Run-time memory checks:\n"; printChecks(OS, Checks, Depth); OS.indent(Depth) << "Grouped accesses:\n"; for (unsigned I = 0; I < CheckingGroups.size(); ++I) { const auto &CG = CheckingGroups[I]; OS.indent(Depth + 2) << "Group " << &CG << ":\n"; OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High << ")\n"; for (unsigned J = 0; J < CG.Members.size(); ++J) { OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr << "\n"; } } } namespace { /// Analyses memory accesses in a loop. /// /// Checks whether run time pointer checks are needed and builds sets for data /// dependence checking. class AccessAnalysis { public: /// Read or write access location. typedef PointerIntPair MemAccessInfo; typedef SmallVector MemAccessInfoList; AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AAResults *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, PredicatedScalarEvolution &PSE) : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), PSE(PSE) {} /// Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { Value *Ptr = const_cast(Loc.Ptr); AST.add(Ptr, LocationSize::unknown(), Loc.AATags); Accesses.insert(MemAccessInfo(Ptr, false)); if (IsReadOnly) ReadOnlyPtr.insert(Ptr); } /// Register a store. void addStore(MemoryLocation &Loc) { Value *Ptr = const_cast(Loc.Ptr); AST.add(Ptr, LocationSize::unknown(), Loc.AATags); Accesses.insert(MemAccessInfo(Ptr, true)); } /// Check if we can emit a run-time no-alias check for \p Access. /// /// Returns true if we can emit a run-time no alias check for \p Access. /// If we can check this access, this also adds it to a dependence set and /// adds a run-time to check for it to \p RtCheck. If \p Assume is true, /// we will attempt to use additional run-time checks in order to get /// the bounds of the pointer. bool createCheckForAccess(RuntimePointerChecking &RtCheck, MemAccessInfo Access, const ValueToValueMap &Strides, DenseMap &DepSetId, Loop *TheLoop, unsigned &RunningDepId, unsigned ASId, bool ShouldCheckStride, bool Assume); /// Check whether we can check the pointers at runtime for /// non-intersection. /// /// Returns true if we need no check or if we do and we can generate them /// (i.e. the pointers have computable bounds). bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &Strides, bool ShouldCheckWrap = false); /// Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. void buildDependenceSets() { processMemAccesses(); } /// Initial processing of memory accesses determined that we need to /// perform dependency checking. /// /// Note that this can later be cleared if we retry memcheck analysis without /// dependency checking (i.e. FoundNonConstantDistanceDependence). bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } /// We decided that no dependence analysis would be used. Reset the state. void resetDepChecks(MemoryDepChecker &DepChecker) { CheckDeps.clear(); DepChecker.clearDependences(); } MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } private: typedef SetVector PtrAccessSet; /// Go over all memory access and check whether runtime pointer checks /// are needed and build sets of dependency check candidates. void processMemAccesses(); /// Set of all accesses. PtrAccessSet Accesses; const DataLayout &DL; /// The loop being checked. const Loop *TheLoop; /// List of accesses that need a further dependence check. MemAccessInfoList CheckDeps; /// Set of pointers that are read only. SmallPtrSet ReadOnlyPtr; /// An alias set tracker to partition the access set by underlying object and //intrinsic property (such as TBAA metadata). AliasSetTracker AST; LoopInfo *LI; /// Sets of potentially dependent accesses - members of one set share an /// underlying pointer. The set "CheckDeps" identfies which sets really need a /// dependence check. MemoryDepChecker::DepCandidates &DepCands; /// Initial processing of memory accesses determined that we may need /// to add memchecks. Perform the analysis to determine the necessary checks. /// /// Note that, this is different from isDependencyCheckNeeded. When we retry /// memcheck analysis without dependency checking /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is /// cleared while this remains set if we have potentially dependent accesses. bool IsRTCheckAnalysisNeeded; /// The SCEV predicate containing all the SCEV-related assumptions. PredicatedScalarEvolution &PSE; }; } // end anonymous namespace /// Check whether a pointer can participate in a runtime bounds check. /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr /// by adding run-time checks (overflow checks) if necessary. static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume) { const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); // The bounds for loop-invariant pointer is trivial. if (PSE.getSE()->isLoopInvariant(PtrScev, L)) return true; const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR && Assume) AR = PSE.getAsAddRec(Ptr); if (!AR) return false; return AR->isAffine(); } /// Check whether a pointer address cannot wrap. static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L) { const SCEV *PtrScev = PSE.getSCEV(Ptr); if (PSE.getSE()->isLoopInvariant(PtrScev, L)) return true; int64_t Stride = getPtrStride(PSE, Ptr, L, Strides); if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) return true; return false; } bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, MemAccessInfo Access, const ValueToValueMap &StridesMap, DenseMap &DepSetId, Loop *TheLoop, unsigned &RunningDepId, unsigned ASId, bool ShouldCheckWrap, bool Assume) { Value *Ptr = Access.getPointer(); if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume)) return false; // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) { auto *Expr = PSE.getSCEV(Ptr); if (!Assume || !isa(Expr)) return false; PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); } // The id of the dependence set. unsigned DepId; if (isDependencyCheckNeeded()) { Value *Leader = DepCands.getLeaderValue(Access).getPointer(); unsigned &LeaderId = DepSetId[Leader]; if (!LeaderId) LeaderId = RunningDepId++; DepId = LeaderId; } else // Each access has its own dependence set. DepId = RunningDepId++; bool IsWrite = Access.getInt(); RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); return true; } bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, bool ShouldCheckWrap) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; bool MayNeedRTCheck = false; if (!IsRTCheckAnalysisNeeded) return true; bool IsDepCheckNeeded = isDependencyCheckNeeded(); // We assign a consecutive id to access from different alias sets. // Accesses between different groups doesn't need to be checked. unsigned ASId = 0; for (auto &AS : AST) { int NumReadPtrChecks = 0; int NumWritePtrChecks = 0; bool CanDoAliasSetRT = true; ++ASId; // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. unsigned RunningDepId = 1; DenseMap DepSetId; SmallVector Retries; + // First, count how many write and read accesses are in the alias set. Also + // collect MemAccessInfos for later. + SmallVector AccessInfos; for (auto A : AS) { Value *Ptr = A.getValue(); bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); - MemAccessInfo Access(Ptr, IsWrite); if (IsWrite) ++NumWritePtrChecks; else ++NumReadPtrChecks; + AccessInfos.emplace_back(Ptr, IsWrite); + } + // We do not need runtime checks for this alias set, if there are no writes + // or a single write and no reads. + if (NumWritePtrChecks == 0 || + (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) { + assert((AS.size() <= 1 || + all_of(AS, + [this](auto AC) { + MemAccessInfo AccessWrite(AC.getValue(), true); + return DepCands.findValue(AccessWrite) == DepCands.end(); + })) && + "Can only skip updating CanDoRT below, if all entries in AS " + "are reads or there is at most 1 entry"); + continue; + } + + for (auto &Access : AccessInfos) { if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, ShouldCheckWrap, false)) { - LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); + LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" + << *Access.getPointer() << '\n'); Retries.push_back(Access); CanDoAliasSetRT = false; } } - // If we have at least two writes or one write and a read then we need to - // check them. But there is no need to checks if there is only one - // dependence set for this alias set. - // // Note that this function computes CanDoRT and MayNeedRTCheck // independently. For example CanDoRT=false, MayNeedRTCheck=false means that // we have a pointer for which we couldn't find the bounds but we don't // actually need to emit any checks so it does not matter. - bool NeedsAliasSetRTCheck = false; - if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) { - NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 || - (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1)); - // For alias sets without at least 2 writes or 1 write and 1 read, there - // is no need to generate RT checks and CanDoAliasSetRT for this alias set - // does not impact whether runtime checks can be generated. - if (!NeedsAliasSetRTCheck) { - assert((AS.size() <= 1 || - all_of(AS, - [this](auto AC) { - MemAccessInfo AccessWrite(AC.getValue(), true); - return DepCands.findValue(AccessWrite) == - DepCands.end(); - })) && - "Can only skip updating CanDoRT below, if all entries in AS " - "are reads or there is at most 1 entry"); - continue; - } - } + // + // We need runtime checks for this alias set, if there are at least 2 + // dependence sets (in which case RunningDepId > 2) or if we need to re-try + // any bound checks (because in that case the number of dependence sets is + // incomplete). + bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty(); // We need to perform run-time alias checks, but some pointers had bounds // that couldn't be checked. if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) { // Reset the CanDoSetRt flag and retry all accesses that have failed. // We know that we need these checks, so we can now be more aggressive // and add further checks if required (overflow checks). CanDoAliasSetRT = true; for (auto Access : Retries) if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, ShouldCheckWrap, /*Assume=*/true)) { CanDoAliasSetRT = false; break; } } CanDoRT &= CanDoAliasSetRT; MayNeedRTCheck |= NeedsAliasSetRTCheck; ++ASId; } // If the pointers that we would use for the bounds comparison have different // address spaces, assume the values aren't directly comparable, so we can't // use them for the runtime check. We also have to assume they could // overlap. In the future there should be metadata for whether address spaces // are disjoint. unsigned NumPointers = RtCheck.Pointers.size(); for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i + 1; j < NumPointers; ++j) { // Only need to check pointers between two different dependency sets. if (RtCheck.Pointers[i].DependencySetId == RtCheck.Pointers[j].DependencySetId) continue; // Only need to check pointers in the same alias set. if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId) continue; Value *PtrI = RtCheck.Pointers[i].PointerValue; Value *PtrJ = RtCheck.Pointers[j].PointerValue; unsigned ASi = PtrI->getType()->getPointerAddressSpace(); unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); if (ASi != ASj) { LLVM_DEBUG( dbgs() << "LAA: Runtime check would require comparison between" " different address spaces\n"); return false; } } } if (MayNeedRTCheck && CanDoRT) RtCheck.generateChecks(DepCands, IsDepCheckNeeded); LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() << " pointer comparisons.\n"); // If we can do run-time checks, but there are no checks, no runtime checks // are needed. This can happen when all pointers point to the same underlying // object for example. RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck; bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT; if (!CanDoRTIfNeeded) RtCheck.reset(); return CanDoRTIfNeeded; } void AccessAnalysis::processMemAccesses() { // We process the set twice: first we process read-write pointers, last we // process read-only pointers. This allows us to skip dependence tests for // read-only pointers. LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); LLVM_DEBUG(dbgs() << " AST: "; AST.dump()); LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); LLVM_DEBUG({ for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only" : "read")) << ")\n"; }); // The AliasSetTracker has nicely partitioned our pointers by metadata // compatibility and potential for underlying-object overlap. As a result, we // only need to check for potential pointer dependencies within each alias // set. for (auto &AS : AST) { // Note that both the alias-set tracker and the alias sets themselves used // linked lists internally and so the iteration order here is deterministic // (matching the original instruction order within each set). bool SetHasWrite = false; // Map of pointers to last access encountered. typedef DenseMap UnderlyingObjToAccessMap; UnderlyingObjToAccessMap ObjToLastAccess; // Set of access to check after all writes have been processed. PtrAccessSet DeferredAccesses; // Iterate over each alias set twice, once to process read/write pointers, // and then to process read-only pointers. for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { bool UseDeferred = SetIteration > 0; PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; for (auto AV : AS) { Value *Ptr = AV.getValue(); // For a single memory access in AliasSetTracker, Accesses may contain // both read and write, and they both need to be handled for CheckDeps. for (auto AC : S) { if (AC.getPointer() != Ptr) continue; bool IsWrite = AC.getInt(); // If we're using the deferred access set, then it contains only // reads. bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; if (UseDeferred && !IsReadOnlyPtr) continue; // Otherwise, the pointer must be in the PtrAccessSet, either as a // read or a write. assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count(MemAccessInfo(Ptr, false))) && "Alias-set pointer not in the access set?"); MemAccessInfo Access(Ptr, IsWrite); DepCands.insert(Access); // Memorize read-only pointers for later processing and skip them in // the first round (they need to be checked after we have seen all // write pointers). Note: we also mark pointer that are not // consecutive as "read-only" pointers (so that we check // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". if (!UseDeferred && IsReadOnlyPtr) { DeferredAccesses.insert(Access); continue; } // If this is a write - check other reads and writes for conflicts. If // this is a read only check other writes for conflicts (but only if // there is no other write to the ptr - this is an optimization to // catch "a[i] = a[i] + " without having to do a dependence check). if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { CheckDeps.push_back(Access); IsRTCheckAnalysisNeeded = true; } if (IsWrite) SetHasWrite = true; // Create sets of pointers connected by a shared alias set and // underlying object. typedef SmallVector ValueVector; ValueVector TempObjects; GetUnderlyingObjects(Ptr, TempObjects, DL, LI); LLVM_DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); for (const Value *UnderlyingObj : TempObjects) { // nullptr never alias, don't join sets for pointer that have "null" // in their UnderlyingObjects list. if (isa(UnderlyingObj) && !NullPointerIsDefined( TheLoop->getHeader()->getParent(), UnderlyingObj->getType()->getPointerAddressSpace())) continue; UnderlyingObjToAccessMap::iterator Prev = ObjToLastAccess.find(UnderlyingObj); if (Prev != ObjToLastAccess.end()) DepCands.unionSets(Access, Prev->second); ObjToLastAccess[UnderlyingObj] = Access; LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n"); } } } } } } static bool isInBoundsGep(Value *Ptr) { if (GetElementPtrInst *GEP = dyn_cast(Ptr)) return GEP->isInBounds(); return false; } /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping, /// i.e. monotonically increasing/decreasing. static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L) { // FIXME: This should probably only return true for NUW. if (AR->getNoWrapFlags(SCEV::NoWrapMask)) return true; // Scalar evolution does not propagate the non-wrapping flags to values that // are derived from a non-wrapping induction variable because non-wrapping // could be flow-sensitive. // // Look through the potentially overflowing instruction to try to prove // non-wrapping for the *specific* value of Ptr. // The arithmetic implied by an inbounds GEP can't overflow. auto *GEP = dyn_cast(Ptr); if (!GEP || !GEP->isInBounds()) return false; // Make sure there is only one non-const index and analyze that. Value *NonConstIndex = nullptr; for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end())) if (!isa(Index)) { if (NonConstIndex) return false; NonConstIndex = Index; } if (!NonConstIndex) // The recurrence is on the pointer, ignore for now. return false; // The index in GEP is signed. It is non-wrapping if it's derived from a NSW // AddRec using a NSW operation. if (auto *OBO = dyn_cast(NonConstIndex)) if (OBO->hasNoSignedWrap() && // Assume constant for other the operand so that the AddRec can be // easily found. isa(OBO->getOperand(1))) { auto *OpScev = PSE.getSCEV(OBO->getOperand(0)); if (auto *OpAR = dyn_cast(OpScev)) return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW); } return false; } /// Check whether the access through \p Ptr has a constant stride. int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap, bool Assume, bool ShouldCheckWrap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); // Make sure that the pointer does not point to aggregate types. auto *PtrTy = cast(Ty); if (PtrTy->getElementType()->isAggregateType()) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr << "\n"); return 0; } const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (Assume && !AR) AR = PSE.getAsAddRec(Ptr); if (!AR) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr << " SCEV: " << *PtrScev << "\n"); return 0; } // The access function must stride over the innermost loop. if (Lp != AR->getLoop()) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *AR << "\n"); return 0; } // The address calculation must not wrap. Otherwise, a dependence could be // inverted. // An inbounds getelementptr that is a AddRec with a unit stride // cannot wrap per definition. The unit stride requirement is checked later. // An getelementptr without an inbounds attribute and unit stride would have // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); bool IsNoWrapAddRec = !ShouldCheckWrap || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || isNoWrapAddRec(Ptr, AR, PSE, Lp); if (!IsNoWrapAddRec && !IsInBoundsGEP && NullPointerIsDefined(Lp->getHeader()->getParent(), PtrTy->getAddressSpace())) { if (Assume) { PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); IsNoWrapAddRec = true; LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"); } else { LLVM_DEBUG( dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " << *Ptr << " SCEV: " << *AR << "\n"); return 0; } } // Check the step is constant. const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); // Calculate the pointer stride and check if it is constant. const SCEVConstant *C = dyn_cast(Step); if (!C) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << " SCEV: " << *AR << "\n"); return 0; } auto &DL = Lp->getHeader()->getModule()->getDataLayout(); int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); const APInt &APStepVal = C->getAPInt(); // Huge step value - give up. if (APStepVal.getBitWidth() > 64) return 0; int64_t StepVal = APStepVal.getSExtValue(); // Strided access. int64_t Stride = StepVal / Size; int64_t Rem = StepVal % Size; if (Rem) return 0; // If the SCEV could wrap but we have an inbounds gep with a unit stride we // know we can't "wrap around the address space". In case of address space // zero we know that this won't happen without triggering undefined behavior. if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 && (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(), PtrTy->getAddressSpace()))) { if (Assume) { // We can avoid this case by adding a run-time check. LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either " << "inbounds or in address space 0 may wrap:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"); PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); } else return 0; } return Stride; } bool llvm::sortPtrAccesses(ArrayRef VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl &SortedIndices) { assert(llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && "Expected list of pointer operands."); SmallVector, 4> OffValPairs; OffValPairs.reserve(VL.size()); // Walk over the pointers, and map each of them to an offset relative to // first pointer in the array. Value *Ptr0 = VL[0]; const SCEV *Scev0 = SE.getSCEV(Ptr0); Value *Obj0 = GetUnderlyingObject(Ptr0, DL); llvm::SmallSet Offsets; for (auto *Ptr : VL) { // TODO: Outline this code as a special, more time consuming, version of // computeConstantDifference() function. if (Ptr->getType()->getPointerAddressSpace() != Ptr0->getType()->getPointerAddressSpace()) return false; // If a pointer refers to a different underlying object, bail - the // pointers are by definition incomparable. Value *CurrObj = GetUnderlyingObject(Ptr, DL); if (CurrObj != Obj0) return false; const SCEV *Scev = SE.getSCEV(Ptr); const auto *Diff = dyn_cast(SE.getMinusSCEV(Scev, Scev0)); // The pointers may not have a constant offset from each other, or SCEV // may just not be smart enough to figure out they do. Regardless, // there's nothing we can do. if (!Diff) return false; // Check if the pointer with the same offset is found. int64_t Offset = Diff->getAPInt().getSExtValue(); if (!Offsets.insert(Offset).second) return false; OffValPairs.emplace_back(Offset, Ptr); } SortedIndices.clear(); SortedIndices.resize(VL.size()); std::iota(SortedIndices.begin(), SortedIndices.end(), 0); // Sort the memory accesses and keep the order of their uses in UseOrder. llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) { return OffValPairs[Left].first < OffValPairs[Right].first; }); // Check if the order is consecutive already. if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) { return I == SortedIndices[I]; })) SortedIndices.clear(); return true; } /// Take the address space operand from the Load/Store instruction. /// Returns -1 if this is not a valid Load/Store instruction. static unsigned getAddressSpaceOperand(Value *I) { if (LoadInst *L = dyn_cast(I)) return L->getPointerAddressSpace(); if (StoreInst *S = dyn_cast(I)) return S->getPointerAddressSpace(); return -1; } /// Returns true if the memory operations \p A and \p B are consecutive. bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType) { Value *PtrA = getLoadStorePointerOperand(A); Value *PtrB = getLoadStorePointerOperand(B); unsigned ASA = getAddressSpaceOperand(A); unsigned ASB = getAddressSpaceOperand(B); // Check that the address spaces match and that the pointers are valid. if (!PtrA || !PtrB || (ASA != ASB)) return false; // Make sure that A and B are different pointers. if (PtrA == PtrB) return false; // Make sure that A and B have the same type if required. if (CheckType && PtrA->getType() != PtrB->getType()) return false; unsigned IdxWidth = DL.getIndexSizeInBits(ASA); Type *Ty = cast(PtrA->getType())->getElementType(); APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); // Retrieve the address space again as pointer stripping now tracks through // `addrspacecast`. ASA = cast(PtrA->getType())->getAddressSpace(); ASB = cast(PtrB->getType())->getAddressSpace(); // Check that the address spaces match and that the pointers are valid. if (ASA != ASB) return false; IdxWidth = DL.getIndexSizeInBits(ASA); OffsetA = OffsetA.sextOrTrunc(IdxWidth); OffsetB = OffsetB.sextOrTrunc(IdxWidth); APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); // OffsetDelta = OffsetB - OffsetA; const SCEV *OffsetSCEVA = SE.getConstant(OffsetA); const SCEV *OffsetSCEVB = SE.getConstant(OffsetB); const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA); const APInt &OffsetDelta = cast(OffsetDeltaSCEV)->getAPInt(); // Check if they are based on the same pointer. That makes the offsets // sufficient. if (PtrA == PtrB) return OffsetDelta == Size; // Compute the necessary base pointer delta to have the necessary final delta // equal to the size. // BaseDelta = Size - OffsetDelta; const SCEV *SizeSCEV = SE.getConstant(Size); const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV); // Otherwise compute the distance with SCEV between the base pointers. const SCEV *PtrSCEVA = SE.getSCEV(PtrA); const SCEV *PtrSCEVB = SE.getSCEV(PtrB); const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta); return X == PtrSCEVB; } MemoryDepChecker::VectorizationSafetyStatus MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { switch (Type) { case NoDep: case Forward: case BackwardVectorizable: return VectorizationSafetyStatus::Safe; case Unknown: return VectorizationSafetyStatus::PossiblySafeWithRtChecks; case ForwardButPreventsForwarding: case Backward: case BackwardVectorizableButPreventsForwarding: return VectorizationSafetyStatus::Unsafe; } llvm_unreachable("unexpected DepType!"); } bool MemoryDepChecker::Dependence::isBackward() const { switch (Type) { case NoDep: case Forward: case ForwardButPreventsForwarding: case Unknown: return false; case BackwardVectorizable: case Backward: case BackwardVectorizableButPreventsForwarding: return true; } llvm_unreachable("unexpected DepType!"); } bool MemoryDepChecker::Dependence::isPossiblyBackward() const { return isBackward() || Type == Unknown; } bool MemoryDepChecker::Dependence::isForward() const { switch (Type) { case Forward: case ForwardButPreventsForwarding: return true; case NoDep: case Unknown: case BackwardVectorizable: case Backward: case BackwardVectorizableButPreventsForwarding: return false; } llvm_unreachable("unexpected DepType!"); } bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize) { // If loads occur at a distance that is not a multiple of a feasible vector // factor store-load forwarding does not take place. // Positive dependences might cause troubles because vectorizing them might // prevent store-load forwarding making vectorized code run a lot slower. // a[i] = a[i-3] ^ a[i-8]; // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and // hence on your typical architecture store-load forwarding does not take // place. Vectorizing in such cases does not make sense. // Store-load forwarding distance. // After this many iterations store-to-load forwarding conflicts should not // cause any slowdowns. const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; // Maximum vector factor. uint64_t MaxVFWithoutSLForwardIssues = std::min( VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes); // Compute the smallest VF at which the store and load would be misaligned. for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; VF *= 2) { // If the number of vector iteration between the store and the load are // small we could incur conflicts. if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) { MaxVFWithoutSLForwardIssues = (VF >>= 1); break; } } if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) { LLVM_DEBUG( dbgs() << "LAA: Distance " << Distance << " that could cause a store-load forwarding conflict\n"); return true; } if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && MaxVFWithoutSLForwardIssues != VectorizerParams::MaxVectorWidth * TypeByteSize) MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; return false; } void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) { if (Status < S) Status = S; } /// Given a non-constant (unknown) dependence-distance \p Dist between two /// memory accesses, that have the same stride whose absolute value is given /// in \p Stride, and that have the same type size \p TypeByteSize, /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is /// possible to prove statically that the dependence distance is larger /// than the range that the accesses will travel through the execution of /// the loop. If so, return true; false otherwise. This is useful for /// example in loops such as the following (PR31098): /// for (i = 0; i < D; ++i) { /// = out[i]; /// out[i+D] = /// } static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize) { // If we can prove that // (**) |Dist| > BackedgeTakenCount * Step // where Step is the absolute stride of the memory accesses in bytes, // then there is no dependence. // // Rationale: // We basically want to check if the absolute distance (|Dist/Step|) // is >= the loop iteration count (or > BackedgeTakenCount). // This is equivalent to the Strong SIV Test (Practical Dependence Testing, // Section 4.2.1); Note, that for vectorization it is sufficient to prove // that the dependence distance is >= VF; This is checked elsewhere. // But in some cases we can prune unknown dependence distances early, and // even before selecting the VF, and without a runtime test, by comparing // the distance against the loop iteration count. Since the vectorized code // will be executed only if LoopCount >= VF, proving distance >= LoopCount // also guarantees that distance >= VF. // const uint64_t ByteStride = Stride * TypeByteSize; const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride); const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step); const SCEV *CastedDist = &Dist; const SCEV *CastedProduct = Product; uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType()); uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType()); // The dependence distance can be positive/negative, so we sign extend Dist; // The multiplication of the absolute stride in bytes and the // backedgeTakenCount is non-negative, so we zero extend Product. if (DistTypeSize > ProductTypeSize) CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); else CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType()); // Is Dist - (BackedgeTakenCount * Step) > 0 ? // (If so, then we have proven (**) because |Dist| >= Dist) const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct); if (SE.isKnownPositive(Minus)) return true; // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ? // (If so, then we have proven (**) because |Dist| >= -1*Dist) const SCEV *NegDist = SE.getNegativeSCEV(CastedDist); Minus = SE.getMinusSCEV(NegDist, CastedProduct); if (SE.isKnownPositive(Minus)) return true; return false; } /// Check the dependence for two accesses with the same stride \p Stride. /// \p Distance is the positive distance and \p TypeByteSize is type size in /// bytes. /// /// \returns true if they are independent. static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize) { assert(Stride > 1 && "The stride must be greater than 1"); assert(TypeByteSize > 0 && "The type size in byte must be non-zero"); assert(Distance > 0 && "The distance must be non-zero"); // Skip if the distance is not multiple of type byte size. if (Distance % TypeByteSize) return false; uint64_t ScaledDist = Distance / TypeByteSize; // No dependence if the scaled distance is not multiple of the stride. // E.g. // for (i = 0; i < 1024 ; i += 4) // A[i+2] = A[i] + 1; // // Two accesses in memory (scaled distance is 2, stride is 4): // | A[0] | | | | A[4] | | | | // | | | A[2] | | | | A[6] | | // // E.g. // for (i = 0; i < 1024 ; i += 3) // A[i+4] = A[i] + 1; // // Two accesses in memory (scaled distance is 4, stride is 3): // | A[0] | | | A[3] | | | A[6] | | | // | | | | | A[4] | | | A[7] | | return ScaledDist % Stride; } MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, unsigned BIdx, const ValueToValueMap &Strides) { assert (AIdx < BIdx && "Must pass arguments in program order"); Value *APtr = A.getPointer(); Value *BPtr = B.getPointer(); bool AIsWrite = A.getInt(); bool BIsWrite = B.getInt(); // Two reads are independent. if (!AIsWrite && !BIsWrite) return Dependence::NoDep; // We cannot check pointers in different address spaces. if (APtr->getType()->getPointerAddressSpace() != BPtr->getType()->getPointerAddressSpace()) return Dependence::Unknown; int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true); int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true); const SCEV *Src = PSE.getSCEV(APtr); const SCEV *Sink = PSE.getSCEV(BPtr); // If the induction step is negative we have to invert source and sink of the // dependence. if (StrideAPtr < 0) { std::swap(APtr, BPtr); std::swap(Src, Sink); std::swap(AIsWrite, BIsWrite); std::swap(AIdx, BIdx); std::swap(StrideAPtr, StrideBPtr); } const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src); LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink << "(Induction step: " << StrideAPtr << ")\n"); LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " << *InstMap[BIdx] << ": " << *Dist << "\n"); // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in // the address space. if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); return Dependence::Unknown; } Type *ATy = APtr->getType()->getPointerElementType(); Type *BTy = BPtr->getType()->getPointerElementType(); auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); uint64_t Stride = std::abs(StrideAPtr); const SCEVConstant *C = dyn_cast(Dist); if (!C) { if (TypeByteSize == DL.getTypeAllocSize(BTy) && isSafeDependenceDistance(DL, *(PSE.getSE()), *(PSE.getBackedgeTakenCount()), *Dist, Stride, TypeByteSize)) return Dependence::NoDep; LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); FoundNonConstantDistanceDependence = true; return Dependence::Unknown; } const APInt &Val = C->getAPInt(); int64_t Distance = Val.getSExtValue(); // Attempt to prove strided accesses independent. if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy && areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); return Dependence::NoDep; } // Negative distances are not plausible dependencies. if (Val.isNegative()) { bool IsTrueDataDependence = (AIsWrite && !BIsWrite); if (IsTrueDataDependence && EnableForwardingConflictDetection && (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || ATy != BTy)) { LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); return Dependence::ForwardButPreventsForwarding; } LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n"); return Dependence::Forward; } // Write to the same location with the same size. // Could be improved to assert type sizes are the same (i32 == float, etc). if (Val == 0) { if (ATy == BTy) return Dependence::Forward; LLVM_DEBUG( dbgs() << "LAA: Zero dependence difference but different types\n"); return Dependence::Unknown; } assert(Val.isStrictlyPositive() && "Expect a positive value"); if (ATy != BTy) { LLVM_DEBUG( dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n"); return Dependence::Unknown; } // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? VectorizerParams::VectorizationFactor : 1); unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? VectorizerParams::VectorizationInterleave : 1); // The minimum number of iterations for a vectorized/unrolled version. unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U); // It's not vectorizable if the distance is smaller than the minimum distance // needed for a vectroized/unrolled version. Vectorizing one iteration in // front needs TypeByteSize * Stride. Vectorizing the last iteration needs // TypeByteSize (No need to plus the last gap distance). // // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. // foo(int *A) { // int *B = (int *)((char *)A + 14); // for (i = 0 ; i < 1024 ; i += 2) // B[i] = A[i] + 1; // } // // Two accesses in memory (stride is 2): // | A[0] | | A[2] | | A[4] | | A[6] | | // | B[0] | | B[2] | | B[4] | // // Distance needs for vectorizing iterations except the last iteration: // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4. // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4. // // If MinNumIter is 2, it is vectorizable as the minimum distance needed is // 12, which is less than distance. // // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4), // the minimum distance needed is 28, which is greater than distance. It is // not safe to do vectorization. uint64_t MinDistanceNeeded = TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; if (MinDistanceNeeded > static_cast(Distance)) { LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance << '\n'); return Dependence::Backward; } // Unsafe if the minimum distance needed is greater than max safe distance. if (MinDistanceNeeded > MaxSafeDepDistBytes) { LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least " << MinDistanceNeeded << " size in bytes"); return Dependence::Backward; } // Positive distance bigger than max vectorization factor. // FIXME: Should use max factor instead of max distance in bytes, which could // not handle different types. // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. // void foo (int *A, char *B) { // for (unsigned i = 0; i < 1024; i++) { // A[i+2] = A[i] + 1; // B[i+2] = B[i] + 1; // } // } // // This case is currently unsafe according to the max safe distance. If we // analyze the two accesses on array B, the max safe dependence distance // is 2. Then we analyze the accesses on array A, the minimum distance needed // is 8, which is less than 2 and forbidden vectorization, But actually // both A and B could be vectorized by 2 iterations. MaxSafeDepDistBytes = std::min(static_cast(Distance), MaxSafeDepDistBytes); bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && EnableForwardingConflictDetection && couldPreventStoreLoadForward(Distance, TypeByteSize)) return Dependence::BackwardVectorizableButPreventsForwarding; uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride); LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxVF << '\n'); uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits); return Dependence::BackwardVectorizable; } bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides) { MaxSafeDepDistBytes = -1; SmallPtrSet Visited; for (MemAccessInfo CurAccess : CheckDeps) { if (Visited.count(CurAccess)) continue; // Get the relevant memory access set. EquivalenceClasses::iterator I = AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); // Check accesses within this set. EquivalenceClasses::member_iterator AI = AccessSets.member_begin(I); EquivalenceClasses::member_iterator AE = AccessSets.member_end(); // Check every access pair. while (AI != AE) { Visited.insert(*AI); bool AIIsWrite = AI->getInt(); // Check loads only against next equivalent class, but stores also against // other stores in the same equivalence class - to the same address. EquivalenceClasses::member_iterator OI = (AIIsWrite ? AI : std::next(AI)); while (OI != AE) { // Check every accessing instruction pair in program order. for (std::vector::iterator I1 = Accesses[*AI].begin(), I1E = Accesses[*AI].end(); I1 != I1E; ++I1) // Scan all accesses of another equivalence class, but only the next // accesses of the same equivalent class. for (std::vector::iterator I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()), I2E = (OI == AI ? I1E : Accesses[*OI].end()); I2 != I2E; ++I2) { auto A = std::make_pair(&*AI, *I1); auto B = std::make_pair(&*OI, *I2); assert(*I1 != *I2); if (*I1 > *I2) std::swap(A, B); Dependence::DepType Type = isDependent(*A.first, A.second, *B.first, B.second, Strides); mergeInStatus(Dependence::isSafeForVectorization(Type)); // Gather dependences unless we accumulated MaxDependences // dependences. In that case return as soon as we find the first // unsafe dependence. This puts a limit on this quadratic // algorithm. if (RecordDependences) { if (Type != Dependence::NoDep) Dependences.push_back(Dependence(A.second, B.second, Type)); if (Dependences.size() >= MaxDependences) { RecordDependences = false; Dependences.clear(); LLVM_DEBUG(dbgs() << "Too many dependences, stopped recording\n"); } } if (!RecordDependences && !isSafeForVectorization()) return false; } ++OI; } AI++; } } LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); return isSafeForVectorization(); } SmallVector MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const { MemAccessInfo Access(Ptr, isWrite); auto &IndexVector = Accesses.find(Access)->second; SmallVector Insts; transform(IndexVector, std::back_inserter(Insts), [&](unsigned Idx) { return this->InstMap[Idx]; }); return Insts; } const char *MemoryDepChecker::Dependence::DepName[] = { "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward", "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"}; void MemoryDepChecker::Dependence::print( raw_ostream &OS, unsigned Depth, const SmallVectorImpl &Instrs) const { OS.indent(Depth) << DepName[Type] << ":\n"; OS.indent(Depth + 2) << *Instrs[Source] << " -> \n"; OS.indent(Depth + 2) << *Instrs[Destination] << "\n"; } bool LoopAccessInfo::canAnalyzeLoop() { // We need to have a loop header. LLVM_DEBUG(dbgs() << "LAA: Found a loop in " << TheLoop->getHeader()->getParent()->getName() << ": " << TheLoop->getHeader()->getName() << '\n'); // We can only analyze innermost loops. if (!TheLoop->empty()) { LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop"; return false; } // We must have a single backedge. if (TheLoop->getNumBackEdges() != 1) { LLVM_DEBUG( dbgs() << "LAA: loop control flow is not understood by analyzer\n"); recordAnalysis("CFGNotUnderstood") << "loop control flow is not understood by analyzer"; return false; } // We must have a single exiting block. if (!TheLoop->getExitingBlock()) { LLVM_DEBUG( dbgs() << "LAA: loop control flow is not understood by analyzer\n"); recordAnalysis("CFGNotUnderstood") << "loop control flow is not understood by analyzer"; return false; } // We only handle bottom-tested loops, i.e. loop in which the condition is // checked at the end of each iteration. With that we can assume that all // instructions in the loop are executed the same number of times. if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { LLVM_DEBUG( dbgs() << "LAA: loop control flow is not understood by analyzer\n"); recordAnalysis("CFGNotUnderstood") << "loop control flow is not understood by analyzer"; return false; } // ScalarEvolution needs to be able to find the exit count. const SCEV *ExitCount = PSE->getBackedgeTakenCount(); if (ExitCount == PSE->getSE()->getCouldNotCompute()) { recordAnalysis("CantComputeNumberOfIterations") << "could not determine number of loop iterations"; LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); return false; } return true; } void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, DominatorTree *DT) { typedef SmallPtrSet ValueSet; // Holds the Load and Store instructions. SmallVector Loads; SmallVector Stores; // Holds all the different accesses in the loop. unsigned NumReads = 0; unsigned NumReadWrites = 0; bool HasComplexMemInst = false; // A runtime check is only legal to insert if there are no convergent calls. HasConvergentOp = false; PtrRtChecking->Pointers.clear(); PtrRtChecking->Need = false; const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); const bool EnableMemAccessVersioningOfLoop = EnableMemAccessVersioning && !TheLoop->getHeader()->getParent()->hasOptSize(); // For each block. for (BasicBlock *BB : TheLoop->blocks()) { // Scan the BB and collect legal loads and stores. Also detect any // convergent instructions. for (Instruction &I : *BB) { if (auto *Call = dyn_cast(&I)) { if (Call->isConvergent()) HasConvergentOp = true; } // With both a non-vectorizable memory instruction and a convergent // operation, found in this loop, no reason to continue the search. if (HasComplexMemInst && HasConvergentOp) { CanVecMem = false; return; } // Avoid hitting recordAnalysis multiple times. if (HasComplexMemInst) continue; // If this is a load, save it. If this instruction can read from memory // but is not a load, then we quit. Notice that we don't handle function // calls that read or write. if (I.mayReadFromMemory()) { // Many math library functions read the rounding mode. We will only // vectorize a loop if it contains known function calls that don't set // the flag. Therefore, it is safe to ignore this read from memory. auto *Call = dyn_cast(&I); if (Call && getVectorIntrinsicIDForCall(Call, TLI)) continue; // If the function has an explicit vectorized counterpart, we can safely // assume that it can be vectorized. if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && !VFDatabase::getMappings(*Call).empty()) continue; auto *Ld = dyn_cast(&I); if (!Ld) { recordAnalysis("CantVectorizeInstruction", Ld) << "instruction cannot be vectorized"; HasComplexMemInst = true; continue; } if (!Ld->isSimple() && !IsAnnotatedParallel) { recordAnalysis("NonSimpleLoad", Ld) << "read with atomic ordering or volatile read"; LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); HasComplexMemInst = true; continue; } NumLoads++; Loads.push_back(Ld); DepChecker->addAccess(Ld); if (EnableMemAccessVersioningOfLoop) collectStridedAccess(Ld); continue; } // Save 'store' instructions. Abort if other instructions write to memory. if (I.mayWriteToMemory()) { auto *St = dyn_cast(&I); if (!St) { recordAnalysis("CantVectorizeInstruction", St) << "instruction cannot be vectorized"; HasComplexMemInst = true; continue; } if (!St->isSimple() && !IsAnnotatedParallel) { recordAnalysis("NonSimpleStore", St) << "write with atomic ordering or volatile write"; LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); HasComplexMemInst = true; continue; } NumStores++; Stores.push_back(St); DepChecker->addAccess(St); if (EnableMemAccessVersioningOfLoop) collectStridedAccess(St); } } // Next instr. } // Next block. if (HasComplexMemInst) { CanVecMem = false; return; } // Now we have two lists that hold the loads and the stores. // Next, we find the pointers that they use. // Check if we see any stores. If there are no stores, then we don't // care if the pointers are *restrict*. if (!Stores.size()) { LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); CanVecMem = true; return; } MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), TheLoop, AA, LI, DependentAccesses, *PSE); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once // for read and once for write, it will only appear once (on the write // list). This is okay, since we are going to check for conflicts between // writes and between reads and writes, but not between reads and reads. ValueSet Seen; // Record uniform store addresses to identify if we have multiple stores // to the same address. ValueSet UniformStores; for (StoreInst *ST : Stores) { Value *Ptr = ST->getPointerOperand(); if (isUniform(Ptr)) HasDependenceInvolvingLoopInvariantAddress |= !UniformStores.insert(Ptr).second; // If we did *not* see this pointer before, insert it to the read-write // list. At this phase it is only a 'write' list. if (Seen.insert(Ptr).second) { ++NumReadWrites; MemoryLocation Loc = MemoryLocation::get(ST); // The TBAA metadata could have a control dependency on the predication // condition, so we cannot rely on it when determining whether or not we // need runtime pointer checks. if (blockNeedsPredication(ST->getParent(), TheLoop, DT)) Loc.AATags.TBAA = nullptr; Accesses.addStore(Loc); } } if (IsAnnotatedParallel) { LLVM_DEBUG( dbgs() << "LAA: A loop annotated parallel, ignore memory dependency " << "checks.\n"); CanVecMem = true; return; } for (LoadInst *LD : Loads) { Value *Ptr = LD->getPointerOperand(); // If we did *not* see this pointer before, insert it to the // read list. If we *did* see it before, then it is already in // the read-write list. This allows us to vectorize expressions // such as A[i] += x; Because the address of A[i] is a read-write // pointer. This only works if the index of A[i] is consecutive. // If the address of i is unknown (for example A[B[i]]) then we may // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; if (Seen.insert(Ptr).second || !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) { ++NumReads; IsReadOnlyPtr = true; } // See if there is an unsafe dependency between a load to a uniform address and // store to the same uniform address. if (UniformStores.count(Ptr)) { LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform " "load and uniform store to the same address!\n"); HasDependenceInvolvingLoopInvariantAddress = true; } MemoryLocation Loc = MemoryLocation::get(LD); // The TBAA metadata could have a control dependency on the predication // condition, so we cannot rely on it when determining whether or not we // need runtime pointer checks. if (blockNeedsPredication(LD->getParent(), TheLoop, DT)) Loc.AATags.TBAA = nullptr; Accesses.addLoad(Loc, IsReadOnlyPtr); } // If we write (or read-write) to a single destination and there are no // other reads in this loop then is it safe to vectorize. if (NumReadWrites == 1 && NumReads == 0) { LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); CanVecMem = true; return; } // Build dependence sets and check whether we need a runtime pointer bounds // check. Accesses.buildDependenceSets(); // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop, SymbolicStrides); if (!CanDoRTIfNeeded) { recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds"; LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << "the array bounds.\n"); CanVecMem = false; return; } LLVM_DEBUG( dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"); CanVecMem = true; if (Accesses.isDependencyCheckNeeded()) { LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); CanVecMem = DepChecker->areDepsSafe( DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); // Clear the dependency checks. We assume they are not needed. Accesses.resetDepChecks(*DepChecker); PtrRtChecking->reset(); PtrRtChecking->Need = true; auto *SE = PSE->getSE(); CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop, SymbolicStrides, true); // Check that we found the bounds for the pointer. if (!CanDoRTIfNeeded) { recordAnalysis("CantCheckMemDepsAtRunTime") << "cannot check memory dependencies at runtime"; LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); CanVecMem = false; return; } CanVecMem = true; } } if (HasConvergentOp) { recordAnalysis("CantInsertRuntimeCheckWithConvergent") << "cannot add control dependency to convergent operation"; LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check " "would be needed with a convergent operation\n"); CanVecMem = false; return; } if (CanVecMem) LLVM_DEBUG( dbgs() << "LAA: No unsafe dependent memory operations in loop. We" << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"); else { recordAnalysis("UnsafeMemDep") << "unsafe dependent memory operations in loop. Use " "#pragma loop distribute(enable) to allow loop distribution " "to attempt to isolate the offending operations into a separate " "loop"; LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); } } bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT) { assert(TheLoop->contains(BB) && "Unknown block used"); // Blocks that do not dominate the latch need predication. BasicBlock* Latch = TheLoop->getLoopLatch(); return !DT->dominates(BB, Latch); } OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName, Instruction *I) { assert(!Report && "Multiple reports generated"); Value *CodeRegion = TheLoop->getHeader(); DebugLoc DL = TheLoop->getStartLoc(); if (I) { CodeRegion = I->getParent(); // If there is no debug location attached to the instruction, revert back to // using the loop's. if (I->getDebugLoc()) DL = I->getDebugLoc(); } Report = std::make_unique(DEBUG_TYPE, RemarkName, DL, CodeRegion); return *Report; } bool LoopAccessInfo::isUniform(Value *V) const { auto *SE = PSE->getSE(); // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is // never considered uniform. // TODO: Is this really what we want? Even without FP SCEV, we may want some // trivially loop-invariant FP values to be considered uniform. if (!SE->isSCEVable(V->getType())) return false; return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); } void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { Value *Ptr = nullptr; if (LoadInst *LI = dyn_cast(MemAccess)) Ptr = LI->getPointerOperand(); else if (StoreInst *SI = dyn_cast(MemAccess)) Ptr = SI->getPointerOperand(); else return; Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop); if (!Stride) return; LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " "versioning:"); LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); // Avoid adding the "Stride == 1" predicate when we know that // Stride >= Trip-Count. Such a predicate will effectively optimize a single // or zero iteration loop, as Trip-Count <= Stride == 1. // // TODO: We are currently not making a very informed decision on when it is // beneficial to apply stride versioning. It might make more sense that the // users of this analysis (such as the vectorizer) will trigger it, based on // their specific cost considerations; For example, in cases where stride // versioning does not help resolving memory accesses/dependences, the // vectorizer should evaluate the cost of the runtime test, and the benefit // of various possible stride specializations, considering the alternatives // of using gather/scatters (if available). const SCEV *StrideExpr = PSE->getSCEV(Stride); const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); // Match the types so we can compare the stride and the BETakenCount. // The Stride can be positive/negative, so we sign extend Stride; // The backedgeTakenCount is non-negative, so we zero extend BETakenCount. const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType()); const SCEV *CastedStride = StrideExpr; const SCEV *CastedBECount = BETakenCount; ScalarEvolution *SE = PSE->getSE(); if (BETypeSize >= StrideTypeSize) CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType()); else CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); // Since TripCount == BackEdgeTakenCount + 1, checking: // "Stride >= TripCount" is equivalent to checking: // Stride - BETakenCount > 0 if (SE->isKnownPositive(StrideMinusBETaken)) { LLVM_DEBUG( dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " "Stride==1 predicate will imply that the loop executes " "at most once.\n"); return; } LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version."); SymbolicStrides[Ptr] = Stride; StrideSet.insert(Stride); } LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI) : PSE(std::make_unique(*SE, *L)), PtrRtChecking(std::make_unique(SE)), DepChecker(std::make_unique(*PSE, L)), TheLoop(L), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), HasConvergentOp(false), HasDependenceInvolvingLoopInvariantAddress(false) { if (canAnalyzeLoop()) analyzeLoop(AA, LI, TLI, DT); } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { OS.indent(Depth) << "Memory dependences are safe"; if (MaxSafeDepDistBytes != -1ULL) OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes << " bytes"; if (PtrRtChecking->Need) OS << " with run-time checks"; OS << "\n"; } if (HasConvergentOp) OS.indent(Depth) << "Has convergent operation in loop\n"; if (Report) OS.indent(Depth) << "Report: " << Report->getMsg() << "\n"; if (auto *Dependences = DepChecker->getDependences()) { OS.indent(Depth) << "Dependences:\n"; for (auto &Dep : *Dependences) { Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions()); OS << "\n"; } } else OS.indent(Depth) << "Too many dependences, not recorded\n"; // List the pair of accesses need run-time checks to prove independence. PtrRtChecking->print(OS, Depth); OS << "\n"; OS.indent(Depth) << "Non vectorizable stores to invariant address were " << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ") << "found in loop.\n"; OS.indent(Depth) << "SCEV assumptions:\n"; PSE->getUnionPredicate().print(OS, Depth); OS << "\n"; OS.indent(Depth) << "Expressions re-written:\n"; PSE->print(OS, Depth); } LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) { initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry()); } const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { auto &LAI = LoopAccessInfoMap[L]; if (!LAI) LAI = std::make_unique(L, SE, TLI, AA, DT, LI); return *LAI.get(); } void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const { LoopAccessLegacyAnalysis &LAA = *const_cast(this); for (Loop *TopLevelLoop : *LI) for (Loop *L : depth_first(TopLevelLoop)) { OS.indent(2) << L->getHeader()->getName() << ":\n"; auto &LAI = LAA.getInfo(L); LAI.print(OS, 4); } } bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) { SE = &getAnalysis().getSE(); auto *TLIP = getAnalysisIfAvailable(); TLI = TLIP ? &TLIP->getTLI(F) : nullptr; AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); return false; } void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); } char LoopAccessLegacyAnalysis::ID = 0; static const char laa_name[] = "Loop Access Analysis"; #define LAA_NAME "loop-accesses" INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true) AnalysisKey LoopAccessAnalysis::Key; LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR) { return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI); } namespace llvm { Pass *createLAAPass() { return new LoopAccessLegacyAnalysis(); } } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp index a5030305435c..c61531c5141a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1,1838 +1,1839 @@ //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains a pass (at IR level) to replace atomic instructions with // __atomic_* library calls, or target specific instruction which implement the // same semantics in a way which better fits the target backend. This can // include the use of (intrinsic-based) load-linked/store-conditional loops, // AtomicCmpXchg, or type coercions. // //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include #include #include using namespace llvm; #define DEBUG_TYPE "atomic-expand" namespace { class AtomicExpand: public FunctionPass { const TargetLowering *TLI = nullptr; public: static char ID; // Pass identification, replacement for typeid AtomicExpand() : FunctionPass(ID) { initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); bool tryExpandAtomicLoad(LoadInst *LI); bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); Value * insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder, function_ref &, Value *)> PerformOp); void expandAtomicOpToLLSC( Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder, function_ref &, Value *)> PerformOp); void expandPartwordAtomicRMW( AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); static Value *insertRMWCmpXchgLoop( IRBuilder<> &Builder, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder, function_ref &, Value *)> PerformOp, CreateCmpXchgInstFun CreateCmpXchg); bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool isIdempotentRMW(AtomicRMWInst *RMWI); bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, AtomicOrdering Ordering2, ArrayRef Libcalls); void expandAtomicLoadToLibcall(LoadInst *LI); void expandAtomicStoreToLibcall(StoreInst *LI); void expandAtomicRMWToLibcall(AtomicRMWInst *I); void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); friend bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg); }; } // end anonymous namespace char AtomicExpand::ID = 0; char &llvm::AtomicExpandID = AtomicExpand::ID; INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } // Helper functions to retrieve the size of atomic instructions. static unsigned getAtomicOpSize(LoadInst *LI) { const DataLayout &DL = LI->getModule()->getDataLayout(); return DL.getTypeStoreSize(LI->getType()); } static unsigned getAtomicOpSize(StoreInst *SI) { const DataLayout &DL = SI->getModule()->getDataLayout(); return DL.getTypeStoreSize(SI->getValueOperand()->getType()); } static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { const DataLayout &DL = RMWI->getModule()->getDataLayout(); return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); } static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { const DataLayout &DL = CASI->getModule()->getDataLayout(); return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); } // Determine if a particular atomic operation has a supported size, // and is of appropriate alignment, to be passed through for target // lowering. (Versus turning into a __atomic libcall) template static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { unsigned Size = getAtomicOpSize(I); Align Alignment = I->getAlign(); return Alignment >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; } bool AtomicExpand::runOnFunction(Function &F) { auto *TPC = getAnalysisIfAvailable(); if (!TPC) return false; auto &TM = TPC->getTM(); if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) return false; TLI = TM.getSubtargetImpl(F)->getTargetLowering(); SmallVector AtomicInsts; // Changing control-flow while iterating through it is a bad idea, so gather a // list of all atomic instructions before we start. for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { Instruction *I = &*II; if (I->isAtomic() && !isa(I)) AtomicInsts.push_back(I); } bool MadeChange = false; for (auto I : AtomicInsts) { auto LI = dyn_cast(I); auto SI = dyn_cast(I); auto RMWI = dyn_cast(I); auto CASI = dyn_cast(I); assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction"); // If the Size/Alignment is not supported, replace with a libcall. if (LI) { if (!atomicSizeSupported(TLI, LI)) { expandAtomicLoadToLibcall(LI); MadeChange = true; continue; } } else if (SI) { if (!atomicSizeSupported(TLI, SI)) { expandAtomicStoreToLibcall(SI); MadeChange = true; continue; } } else if (RMWI) { if (!atomicSizeSupported(TLI, RMWI)) { expandAtomicRMWToLibcall(RMWI); MadeChange = true; continue; } } else if (CASI) { if (!atomicSizeSupported(TLI, CASI)) { expandAtomicCASToLibcall(CASI); MadeChange = true; continue; } } if (TLI->shouldInsertFencesForAtomic(I)) { auto FenceOrdering = AtomicOrdering::Monotonic; if (LI && isAcquireOrStronger(LI->getOrdering())) { FenceOrdering = LI->getOrdering(); LI->setOrdering(AtomicOrdering::Monotonic); } else if (SI && isReleaseOrStronger(SI->getOrdering())) { FenceOrdering = SI->getOrdering(); SI->setOrdering(AtomicOrdering::Monotonic); } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || isAcquireOrStronger(RMWI->getOrdering()))) { FenceOrdering = RMWI->getOrdering(); RMWI->setOrdering(AtomicOrdering::Monotonic); } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) == TargetLoweringBase::AtomicExpansionKind::None && (isReleaseOrStronger(CASI->getSuccessOrdering()) || isAcquireOrStronger(CASI->getSuccessOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence // insertion, with a stronger one on the success path than on the // failure path. As a result, fence insertion is directly done by // expandAtomicCmpXchg in that case. FenceOrdering = CASI->getSuccessOrdering(); CASI->setSuccessOrdering(AtomicOrdering::Monotonic); CASI->setFailureOrdering(AtomicOrdering::Monotonic); } if (FenceOrdering != AtomicOrdering::Monotonic) { MadeChange |= bracketInstWithFences(I, FenceOrdering); } } if (LI) { if (LI->getType()->isFloatingPointTy()) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. LI = convertAtomicLoadToIntegerType(LI); assert(LI->getType()->isIntegerTy() && "invariant broken"); MadeChange = true; } MadeChange |= tryExpandAtomicLoad(LI); } else if (SI) { if (SI->getValueOperand()->getType()->isFloatingPointTy()) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. SI = convertAtomicStoreToIntegerType(SI); assert(SI->getValueOperand()->getType()->isIntegerTy() && "invariant broken"); MadeChange = true; } if (TLI->shouldExpandAtomicStoreInIR(SI)) MadeChange |= expandAtomicStore(SI); } else if (RMWI) { // There are two different ways of expanding RMW instructions: // - into a load if it is idempotent // - into a Cmpxchg/LL-SC loop otherwise // we try them in that order. if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { MadeChange = true; } else { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(RMWI); AtomicRMWInst::BinOp Op = RMWI->getOperation(); if (ValueSize < MinCASSize && (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || Op == AtomicRMWInst::And)) { RMWI = widenPartwordAtomicRMW(RMWI); MadeChange = true; } MadeChange |= tryExpandAtomicRMW(RMWI); } } else if (CASI) { // TODO: when we're ready to make the change at the IR level, we can // extend convertCmpXchgToInteger for floating point too. assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() && "unimplemented - floating point not legal at IR level"); if (CASI->getCompareOperand()->getType()->isPointerTy() ) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. CASI = convertCmpXchgToIntegerType(CASI); assert(CASI->getCompareOperand()->getType()->isIntegerTy() && "invariant broken"); MadeChange = true; } MadeChange |= tryExpandAtomicCmpXchg(CASI); } } return MadeChange; } bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { IRBuilder<> Builder(I); auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); // We have a guard here because not every atomic operation generates a // trailing fence. if (TrailingFence) TrailingFence->moveAfter(I); return (LeadingFence || TrailingFence); } /// Get the iX type with the same bitwidth as T. IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, const DataLayout &DL) { EVT VT = TLI->getMemValueType(DL, T); unsigned BitWidth = VT.getStoreSizeInBits(); assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); return IntegerType::get(T->getContext(), BitWidth); } /// Convert an atomic load of a non-integral type to an integer load of the /// equivalent bitwidth. See the function comment on /// convertAtomicStoreToIntegerType for background. LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *M = LI->getModule(); Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); IRBuilder<> Builder(LI); Value *Addr = LI->getPointerOperand(); Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); NewLI->setAlignment(LI->getAlign()); NewLI->setVolatile(LI->isVolatile()); NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); LI->replaceAllUsesWith(NewVal); LI->eraseFromParent(); return NewLI; } bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { switch (TLI->shouldExpandAtomicLoadInIR(LI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: expandAtomicOpToLLSC( LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(), [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); return true; case TargetLoweringBase::AtomicExpansionKind::LLOnly: return expandAtomicLoadToLL(LI); case TargetLoweringBase::AtomicExpansionKind::CmpXChg: return expandAtomicLoadToCmpXchg(LI); default: llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); } } bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { IRBuilder<> Builder(LI); // On some architectures, load-linked instructions are atomic for larger // sizes than normal loads. For example, the only 64-bit load guaranteed // to be single-copy atomic by ARM is an ldrexd (A3.5.3). Value *Val = TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); LI->replaceAllUsesWith(Val); LI->eraseFromParent(); return true; } bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { IRBuilder<> Builder(LI); AtomicOrdering Order = LI->getOrdering(); if (Order == AtomicOrdering::Unordered) Order = AtomicOrdering::Monotonic; Value *Addr = LI->getPointerOperand(); Type *Ty = cast(Addr->getType())->getElementType(); Constant *DummyVal = Constant::getNullValue(Ty); Value *Pair = Builder.CreateAtomicCmpXchg( Addr, DummyVal, DummyVal, Order, AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); LI->replaceAllUsesWith(Loaded); LI->eraseFromParent(); return true; } /// Convert an atomic store of a non-integral type to an integer store of the /// equivalent bitwidth. We used to not support floating point or vector /// atomics in the IR at all. The backends learned to deal with the bitcast /// idiom because that was the only way of expressing the notion of a atomic /// float or vector store. The long term plan is to teach each backend to /// instruction select from the original atomic store, but as a migration /// mechanism, we convert back to the old format which the backends understand. /// Each backend will need individual work to recognize the new format. StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { IRBuilder<> Builder(SI); auto *M = SI->getModule(); Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), M->getDataLayout()); Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); Value *Addr = SI->getPointerOperand(); Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); NewSI->setAlignment(SI->getAlign()); NewSI->setVolatile(SI->isVolatile()); NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); SI->eraseFromParent(); return NewSI; } bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // This function is only called on atomic stores that are too large to be // atomic if implemented as a native store. So we replace them by an // atomic swap, that can be implemented for example as a ldrex/strex on ARM // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. IRBuilder<> Builder(SI); AtomicRMWInst *AI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), SI->getOrdering()); SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. return tryExpandAtomicRMW(AI); } static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded) { Type *OrigTy = NewVal->getType(); // This code can go away when cmpxchg supports FP types. bool NeedBitcast = OrigTy->isFloatingPointTy(); if (NeedBitcast) { IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); unsigned AS = Addr->getType()->getPointerAddressSpace(); Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS)); NewVal = Builder.CreateBitCast(NewVal, IntTy); Loaded = Builder.CreateBitCast(Loaded, IntTy); } Value* Pair = Builder.CreateAtomicCmpXchg( Addr, Loaded, NewVal, MemOpOrder, AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); Success = Builder.CreateExtractValue(Pair, 1, "success"); NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); if (NeedBitcast) NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); } /// Emit IR to implement the given atomicrmw operation on values in registers, /// returning the new value. static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Inc) { Value *NewVal; switch (Op) { case AtomicRMWInst::Xchg: return Inc; case AtomicRMWInst::Add: return Builder.CreateAdd(Loaded, Inc, "new"); case AtomicRMWInst::Sub: return Builder.CreateSub(Loaded, Inc, "new"); case AtomicRMWInst::And: return Builder.CreateAnd(Loaded, Inc, "new"); case AtomicRMWInst::Nand: return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); case AtomicRMWInst::Or: return Builder.CreateOr(Loaded, Inc, "new"); case AtomicRMWInst::Xor: return Builder.CreateXor(Loaded, Inc, "new"); case AtomicRMWInst::Max: NewVal = Builder.CreateICmpSGT(Loaded, Inc); return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); case AtomicRMWInst::Min: NewVal = Builder.CreateICmpSLE(Loaded, Inc); return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); case AtomicRMWInst::UMax: NewVal = Builder.CreateICmpUGT(Loaded, Inc); return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); case AtomicRMWInst::UMin: NewVal = Builder.CreateICmpULE(Loaded, Inc); return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); case AtomicRMWInst::FAdd: return Builder.CreateFAdd(Loaded, Inc, "new"); case AtomicRMWInst::FSub: return Builder.CreateFSub(Loaded, Inc, "new"); default: llvm_unreachable("Unknown atomic op"); } } bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { switch (TLI->shouldExpandAtomicRMWInIR(AI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(AI); if (ValueSize < MinCASSize) { expandPartwordAtomicRMW(AI, TargetLoweringBase::AtomicExpansionKind::LLSC); } else { auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { return performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); }; expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), AI->getOrdering(), PerformOp); } return true; } case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(AI); if (ValueSize < MinCASSize) { // TODO: Handle atomicrmw fadd/fsub if (AI->getType()->isFloatingPointTy()) return false; expandPartwordAtomicRMW(AI, TargetLoweringBase::AtomicExpansionKind::CmpXChg); } else { expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); } return true; } case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { expandAtomicRMWToMaskedIntrinsic(AI); return true; } default: llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } } namespace { struct PartwordMaskValues { // These three fields are guaranteed to be set by createMaskInstrs. Type *WordType = nullptr; Type *ValueType = nullptr; Value *AlignedAddr = nullptr; // The remaining fields can be null. Value *ShiftAmt = nullptr; Value *Mask = nullptr; Value *Inv_Mask = nullptr; }; LLVM_ATTRIBUTE_UNUSED raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { auto PrintObj = [&O](auto *V) { if (V) O << *V; else O << "nullptr"; O << '\n'; }; O << "PartwordMaskValues {\n"; O << " WordType: "; PrintObj(PMV.WordType); O << " ValueType: "; PrintObj(PMV.ValueType); O << " AlignedAddr: "; PrintObj(PMV.AlignedAddr); O << " ShiftAmt: "; PrintObj(PMV.ShiftAmt); O << " Mask: "; PrintObj(PMV.Mask); O << " Inv_Mask: "; PrintObj(PMV.Inv_Mask); O << "}\n"; return O; } } // end anonymous namespace /// This is a helper function which builds instructions to provide /// values necessary for partword atomic operations. It takes an /// incoming address, Addr, and ValueType, and constructs the address, /// shift-amounts and masks needed to work with a larger value of size /// WordSize. /// /// AlignedAddr: Addr rounded down to a multiple of WordSize /// /// ShiftAmt: Number of bits to right-shift a WordSize value loaded /// from AlignAddr for it to have the same value as if /// ValueType was loaded from Addr. /// /// Mask: Value to mask with the value loaded from AlignAddr to /// include only the part that would've been loaded from Addr. /// /// Inv_Mask: The inverse of Mask. static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, unsigned MinWordSize) { PartwordMaskValues PMV; Module *M = I->getModule(); LLVMContext &Ctx = M->getContext(); const DataLayout &DL = M->getDataLayout(); unsigned ValueSize = DL.getTypeStoreSize(ValueType); PMV.ValueType = ValueType; PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) : ValueType; if (PMV.ValueType == PMV.WordType) { PMV.AlignedAddr = Addr; return PMV; } assert(ValueSize < MinWordSize); Type *WordPtrType = PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); PMV.AlignedAddr = Builder.CreateIntToPtr( Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, "AlignedAddr"); Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); if (DL.isLittleEndian()) { // turn bytes into bits PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); } else { // turn bytes into bits, and count from the other side. PMV.ShiftAmt = Builder.CreateShl( Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); } PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); PMV.Mask = Builder.CreateShl( ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, "Mask"); PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); return PMV; } static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, const PartwordMaskValues &PMV) { assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); if (PMV.WordType == PMV.ValueType) return WideWord; Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted"); return Trunc; } static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV) { assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); if (PMV.WordType == PMV.ValueType) return Updated; Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); Value *Shift = Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); Value *Or = Builder.CreateOr(And, Shift, "inserted"); return Or; } /// Emit IR to implement a masked version of a given atomicrmw /// operation. (That is, only the bits under the Mask should be /// affected by the operation) static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV) { // TODO: update to use // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order // to merge bits from two values without requiring PMV.Inv_Mask. switch (Op) { case AtomicRMWInst::Xchg: { Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc); return FinalVal; } case AtomicRMWInst::Or: case AtomicRMWInst::Xor: case AtomicRMWInst::And: llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW"); case AtomicRMWInst::Add: case AtomicRMWInst::Sub: case AtomicRMWInst::Nand: { // The other arithmetic ops need to be masked into place. Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc); Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); return FinalVal; } case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: { // Finally, comparison ops will operate on the full value, so // truncate down to the original size, and expand out again after // doing the operation. Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc); Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); return FinalVal; } default: llvm_unreachable("Unknown atomic op"); } } /// Expand a sub-word atomicrmw operation into an appropriate /// word-sized operation. /// /// It will create an LL/SC or cmpxchg loop, as appropriate, the same /// way as a typical atomicrmw expansion. The only difference here is /// that the operation inside of the loop may operate upon only a /// part of the value. void AtomicExpand::expandPartwordAtomicRMW( AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { AtomicOrdering MemOpOrder = AI->getOrdering(); IRBuilder<> Builder(AI); PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), TLI->getMinCmpXchgSizeInBits() / 8); Value *ValOperand_Shifted = Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), PMV.ShiftAmt, "ValOperand_Shifted"); auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) { return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded, ValOperand_Shifted, AI->getValOperand(), PMV); }; Value *OldResult; if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, PerformPartwordOp, createCmpXchgInstFun); } else { assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, PerformPartwordOp); } Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); } // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { IRBuilder<> Builder(AI); AtomicRMWInst::BinOp Op = AI->getOperation(); assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || Op == AtomicRMWInst::And) && "Unable to widen operation"); PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), TLI->getMinCmpXchgSizeInBits() / 8); Value *ValOperand_Shifted = Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), PMV.ShiftAmt, "ValOperand_Shifted"); Value *NewOperand; if (Op == AtomicRMWInst::And) NewOperand = Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand"); else NewOperand = ValOperand_Shifted; AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, AI->getOrdering()); Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); return NewAI; } bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { // The basic idea here is that we're expanding a cmpxchg of a // smaller memory size up to a word-sized cmpxchg. To do this, we // need to add a retry-loop for strong cmpxchg, so that // modifications to other parts of the word don't cause a spurious // failure. // This generates code like the following: // [[Setup mask values PMV.*]] // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt // %InitLoaded = load i32* %addr // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask // br partword.cmpxchg.loop // partword.cmpxchg.loop: // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ], // [ %OldVal_MaskOut, %partword.cmpxchg.failure ] // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp, // i32 %FullWord_NewVal success_ordering failure_ordering // %OldVal = extractvalue { i32, i1 } %NewCI, 0 // %Success = extractvalue { i32, i1 } %NewCI, 1 // br i1 %Success, label %partword.cmpxchg.end, // label %partword.cmpxchg.failure // partword.cmpxchg.failure: // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut // br i1 %ShouldContinue, label %partword.cmpxchg.loop, // label %partword.cmpxchg.end // partword.cmpxchg.end: // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt // %FinalOldVal = trunc i32 %tmp1 to i8 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1 Value *Addr = CI->getPointerOperand(); Value *Cmp = CI->getCompareOperand(); Value *NewVal = CI->getNewValOperand(); BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); IRBuilder<> Builder(CI); LLVMContext &Ctx = Builder.getContext(); const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8; BasicBlock *EndBB = BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); auto FailureBB = BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB); auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB); // The split call above "helpfully" added a branch at the end of BB // (to the wrong place). std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); PartwordMaskValues PMV = createMaskInstrs( Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize); // Shift the incoming values over, into the right location in the word. Value *NewVal_Shifted = Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); Value *Cmp_Shifted = Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt); // Load the entire current word, and mask into place the expected and new // values LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr); InitLoaded->setVolatile(CI->isVolatile()); Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask); Builder.CreateBr(LoopBB); // partword.cmpxchg.loop: Builder.SetInsertPoint(LoopBB); PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2); Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB); // Mask/Or the expected and new values into place in the loaded word. Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); // When we're building a strong cmpxchg, we need a loop, so you // might think we could use a weak cmpxchg inside. But, using strong // allows the below comparison for ShouldContinue, and we're // expecting the underlying cmpxchg to be a machine instruction, // which is strong anyways. NewCI->setWeak(CI->isWeak()); Value *OldVal = Builder.CreateExtractValue(NewCI, 0); Value *Success = Builder.CreateExtractValue(NewCI, 1); if (CI->isWeak()) Builder.CreateBr(EndBB); else Builder.CreateCondBr(Success, EndBB, FailureBB); // partword.cmpxchg.failure: Builder.SetInsertPoint(FailureBB); // Upon failure, verify that the masked-out part of the loaded value // has been modified. If it didn't, abort the cmpxchg, since the // masked-in part must've. Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask); Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut); Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB); // Add the second value to the phi from above Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB); // partword.cmpxchg.end: Builder.SetInsertPoint(CI); Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Res = Builder.CreateInsertValue(Res, Success, 1); CI->replaceAllUsesWith(Res); CI->eraseFromParent(); return true; } void AtomicExpand::expandAtomicOpToLLSC( Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder, function_ref &, Value *)> PerformOp) { IRBuilder<> Builder(I); Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp); I->replaceAllUsesWith(Loaded); I->eraseFromParent(); } void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { IRBuilder<> Builder(AI); PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), TLI->getMinCmpXchgSizeInBits() / 8); // The value operand must be sign-extended for signed min/max so that the // target's signed comparison instructions can be used. Otherwise, just // zero-ext. Instruction::CastOps CastOp = Instruction::ZExt; AtomicRMWInst::BinOp RMWOp = AI->getOperation(); if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min) CastOp = Instruction::SExt; Value *ValOperand_Shifted = Builder.CreateShl( Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType), PMV.ShiftAmt, "ValOperand_Shifted"); Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, AI->getOrdering()); Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); } void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { IRBuilder<> Builder(CI); PartwordMaskValues PMV = createMaskInstrs( Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), TLI->getMinCmpXchgSizeInBits() / 8); Value *CmpVal_Shifted = Builder.CreateShl( Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, "CmpVal_Shifted"); Value *NewVal_Shifted = Builder.CreateShl( Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, "NewVal_Shifted"); Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, CI->getSuccessOrdering()); Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Value *Success = Builder.CreateICmpEQ( CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); Res = Builder.CreateInsertValue(Res, Success, 1); CI->replaceAllUsesWith(Res); CI->eraseFromParent(); } Value *AtomicExpand::insertRMWLLSCLoop( IRBuilder<> &Builder, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder, function_ref &, Value *)> PerformOp) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); // Given: atomicrmw some_op iN* %addr, iN %incr ordering // // The standard expansion we produce is: // [...] // atomicrmw.start: // %loaded = @load.linked(%addr) // %new = some_op iN %loaded, %incr // %stored = @store_conditional(%new, %addr) // %try_again = icmp i32 ne %stored, 0 // br i1 %try_again, label %loop, label %atomicrmw.end // atomicrmw.end: // [...] BasicBlock *ExitBB = BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place). std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); Value *NewVal = PerformOp(Builder, Loaded); Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); Value *TryAgain = Builder.CreateICmpNE( StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return Loaded; } /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of /// the equivalent bitwidth. We used to not support pointer cmpxchg in the /// IR. As a migration step, we convert back to what use to be the standard /// way to represent a pointer cmpxchg so that we can update backends one by /// one. AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { auto *M = CI->getModule(); Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), M->getDataLayout()); IRBuilder<> Builder(CI); Value *Addr = CI->getPointerOperand(); Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal, CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); NewCI->setWeak(CI->isWeak()); LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); Value *OldVal = Builder.CreateExtractValue(NewCI, 0); Value *Succ = Builder.CreateExtractValue(NewCI, 1); OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, OldVal, 0); Res = Builder.CreateInsertValue(Res, Succ, 1); CI->replaceAllUsesWith(Res); CI->eraseFromParent(); return NewCI; } bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); AtomicOrdering FailureOrder = CI->getFailureOrdering(); Value *Addr = CI->getPointerOperand(); BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); // If shouldInsertFencesForAtomic() returns true, then the target does not // want to deal with memory orders, and emitLeading/TrailingFence should take // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we // should preserve the ordering. bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder; // In implementations which use a barrier to achieve release semantics, we can // delay emitting this barrier until we know a store is actually going to be // attempted. The cost of this delay is that we need 2 copies of the block // emitting the load-linked, affecting code size. // // Ideally, this logic would be unconditional except for the minsize check // since in other cases the extra blocks naturally collapse down to the // minimal loop. Unfortunately, this puts too much stress on later // optimisations so we avoid emitting the extra logic in those cases too. bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && SuccessOrder != AtomicOrdering::Monotonic && SuccessOrder != AtomicOrdering::Acquire && !F->hasMinSize(); // There's no overhead for sinking the release barrier in a weak cmpxchg, so // do it even on minsize. bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord // // The full expansion we produce is: // [...] // %aligned.addr = ... // cmpxchg.start: // %unreleasedload = @load.linked(%aligned.addr) // %unreleasedload.extract = extract value from %unreleasedload // %should_store = icmp eq %unreleasedload.extract, %desired // br i1 %should_store, label %cmpxchg.releasingstore, // label %cmpxchg.nostore // cmpxchg.releasingstore: // fence? // br label cmpxchg.trystore // cmpxchg.trystore: // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], // [%releasedload, %cmpxchg.releasedload] // %updated.new = insert %new into %loaded.trystore // %stored = @store_conditional(%updated.new, %aligned.addr) // %success = icmp eq i32 %stored, 0 // br i1 %success, label %cmpxchg.success, // label %cmpxchg.releasedload/%cmpxchg.failure // cmpxchg.releasedload: // %releasedload = @load.linked(%aligned.addr) // %releasedload.extract = extract value from %releasedload // %should_store = icmp eq %releasedload.extract, %desired // br i1 %should_store, label %cmpxchg.trystore, // label %cmpxchg.failure // cmpxchg.success: // fence? // br label %cmpxchg.end // cmpxchg.nostore: // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], // [%releasedload, // %cmpxchg.releasedload/%cmpxchg.trystore] // @load_linked_fail_balance()? // br label %cmpxchg.failure // cmpxchg.failure: // fence? // br label %cmpxchg.end // cmpxchg.end: // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], // [%loaded.trystore, %cmpxchg.trystore] // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] // %loaded = extract value from %loaded.exit // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); auto ReleasedLoadBB = BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); auto ReleasingStoreBB = BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); // This grabs the DebugLoc from CI IRBuilder<> Builder(CI); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) TLI->emitLeadingFence(Builder, CI, SuccessOrder); PartwordMaskValues PMV = createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, TLI->getMinCmpXchgSizeInBits() / 8); Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(StartBB); Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); Value *UnreleasedLoadExtract = extractMaskedValue(Builder, UnreleasedLoad, PMV); Value *ShouldStore = Builder.CreateICmpEQ( UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); Builder.SetInsertPoint(ReleasingStoreBB); if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) TLI->emitLeadingFence(Builder, CI, SuccessOrder); Builder.CreateBr(TryStoreBB); Builder.SetInsertPoint(TryStoreBB); PHINode *LoadedTryStore = Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); Value *NewValueInsert = insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); Value *StoreSuccess = - TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder); + TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr, + MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; Builder.CreateCondBr(StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : RetryBB); Builder.SetInsertPoint(ReleasedLoadBB); Value *SecondLoad; if (HasReleasedLoadBB) { SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); // Update PHI node in TryStoreBB. LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); } else Builder.CreateUnreachable(); // Make sure later instructions don't get reordered with a fence if // necessary. Builder.SetInsertPoint(SuccessBB); if (ShouldInsertFencesForAtomic) TLI->emitTrailingFence(Builder, CI, SuccessOrder); Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); PHINode *LoadedNoStore = Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); if (HasReleasedLoadBB) LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); // In the failing case, where we don't execute the store-conditional, the // target might want to balance out the load-linked with a dedicated // instruction (e.g., on ARM, clearing the exclusive monitor). TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); Builder.CreateBr(FailureBB); Builder.SetInsertPoint(FailureBB); PHINode *LoadedFailure = Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); if (CI->isWeak()) LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); if (ShouldInsertFencesForAtomic) TLI->emitTrailingFence(Builder, CI, FailureOrder); Builder.CreateBr(ExitBB); // Finally, we have control-flow based knowledge of whether the cmpxchg // succeeded or not. We expose this to later passes by converting any // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate // PHI. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); PHINode *LoadedExit = Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); LoadedExit->addIncoming(LoadedTryStore, SuccessBB); LoadedExit->addIncoming(LoadedFailure, FailureBB); PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); // This is the "exit value" from the cmpxchg expansion. It may be of // a type wider than the one in the cmpxchg instruction. Value *LoadedFull = LoadedExit; Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. SmallVector PrunedInsts; for (auto User : CI->users()) { ExtractValueInst *EV = dyn_cast(User); if (!EV) continue; assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && "weird extraction from { iN, i1 }"); if (EV->getIndices()[0] == 0) EV->replaceAllUsesWith(Loaded); else EV->replaceAllUsesWith(Success); PrunedInsts.push_back(EV); } // We can remove the instructions now we're no longer iterating through them. for (auto EV : PrunedInsts) EV->eraseFromParent(); if (!CI->use_empty()) { // Some use of the full struct return that we don't understand has happened, // so we've got to reconstruct it properly. Value *Res; Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); Res = Builder.CreateInsertValue(Res, Success, 1); CI->replaceAllUsesWith(Res); } CI->eraseFromParent(); return true; } bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { auto C = dyn_cast(RMWI->getValOperand()); if(!C) return false; AtomicRMWInst::BinOp Op = RMWI->getOperation(); switch(Op) { case AtomicRMWInst::Add: case AtomicRMWInst::Sub: case AtomicRMWInst::Or: case AtomicRMWInst::Xor: return C->isZero(); case AtomicRMWInst::And: return C->isMinusOne(); // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... default: return false; } } bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { tryExpandAtomicLoad(ResultingLoad); return true; } return false; } Value *AtomicExpand::insertRMWCmpXchgLoop( IRBuilder<> &Builder, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder, function_ref &, Value *)> PerformOp, CreateCmpXchgInstFun CreateCmpXchg) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); // Given: atomicrmw some_op iN* %addr, iN %incr ordering // // The standard expansion we produce is: // [...] // %init_loaded = load atomic iN* %addr // br label %loop // loop: // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] // %new = some_op iN %loaded, %incr // %pair = cmpxchg iN* %addr, iN %loaded, iN %new // %new_loaded = extractvalue { iN, i1 } %pair, 0 // %success = extractvalue { iN, i1 } %pair, 1 // br i1 %success, label %atomicrmw.end, label %loop // atomicrmw.end: // [...] BasicBlock *ExitBB = BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we want a load. It's easiest to just remove // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr); // Atomics require at least natural alignment. InitLoaded->setAlignment(Align(ResultTy->getPrimitiveSizeInBits() / 8)); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded"); Loaded->addIncoming(InitLoaded, BB); Value *NewVal = PerformOp(Builder, Loaded); Value *NewLoaded = nullptr; Value *Success = nullptr; CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder == AtomicOrdering::Unordered ? AtomicOrdering::Monotonic : MemOpOrder, Success, NewLoaded); assert(Success && NewLoaded); Loaded->addIncoming(NewLoaded, LoopBB); Builder.CreateCondBr(Success, ExitBB, LoopBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return NewLoaded; } bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(CI); switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { default: llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg"); case TargetLoweringBase::AtomicExpansionKind::None: if (ValueSize < MinCASSize) return expandPartwordCmpXchg(CI); return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: { return expandAtomicCmpXchg(CI); } case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: expandAtomicCmpXchgToMaskedIntrinsic(CI); return true; } } // Note: This function is exposed externally by AtomicExpandUtils.h bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) { IRBuilder<> Builder(AI); Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(), [&](IRBuilder<> &Builder, Value *Loaded) { return performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); }, CreateCmpXchg); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); return true; } // In order to use one of the sized library calls such as // __atomic_fetch_add_4, the alignment must be sufficient, the size // must be one of the potentially-specialized sizes, and the value // type must actually exist in C on the target (otherwise, the // function wouldn't actually be defined.) static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL) { // TODO: "LargestSize" is an approximation for "largest type that // you can express in C". It seems to be the case that int128 is // supported on all 64-bit platforms, otherwise only up to 64-bit // integers are supported. If we get this wrong, then we'll try to // call a sized libcall that doesn't actually exist. There should // really be some more reliable way in LLVM of determining integer // sizes which are valid in the target's C ABI... unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; return Alignment >= Size && (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && Size <= LargestSize; } void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { static const RTLIB::Libcall Libcalls[6] = { RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; unsigned Size = getAtomicOpSize(I); bool expanded = expandAtomicOpToLibcall( I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); (void)expanded; assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load"); } void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { static const RTLIB::Libcall Libcalls[6] = { RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; unsigned Size = getAtomicOpSize(I); bool expanded = expandAtomicOpToLibcall( I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); (void)expanded; assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store"); } void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { static const RTLIB::Libcall Libcalls[6] = { RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; unsigned Size = getAtomicOpSize(I); bool expanded = expandAtomicOpToLibcall( I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), Libcalls); (void)expanded; assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS"); } static ArrayRef GetRMWLibcall(AtomicRMWInst::BinOp Op) { static const RTLIB::Libcall LibcallsXchg[6] = { RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1, RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4, RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16}; static const RTLIB::Libcall LibcallsAdd[6] = { RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1, RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4, RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16}; static const RTLIB::Libcall LibcallsSub[6] = { RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1, RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4, RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16}; static const RTLIB::Libcall LibcallsAnd[6] = { RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1, RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4, RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16}; static const RTLIB::Libcall LibcallsOr[6] = { RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1, RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4, RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16}; static const RTLIB::Libcall LibcallsXor[6] = { RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1, RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4, RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16}; static const RTLIB::Libcall LibcallsNand[6] = { RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1, RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4, RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16}; switch (Op) { case AtomicRMWInst::BAD_BINOP: llvm_unreachable("Should not have BAD_BINOP."); case AtomicRMWInst::Xchg: return makeArrayRef(LibcallsXchg); case AtomicRMWInst::Add: return makeArrayRef(LibcallsAdd); case AtomicRMWInst::Sub: return makeArrayRef(LibcallsSub); case AtomicRMWInst::And: return makeArrayRef(LibcallsAnd); case AtomicRMWInst::Or: return makeArrayRef(LibcallsOr); case AtomicRMWInst::Xor: return makeArrayRef(LibcallsXor); case AtomicRMWInst::Nand: return makeArrayRef(LibcallsNand); case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: // No atomic libcalls are available for max/min/umax/umin. return {}; } llvm_unreachable("Unexpected AtomicRMW operation."); } void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { ArrayRef Libcalls = GetRMWLibcall(I->getOperation()); unsigned Size = getAtomicOpSize(I); bool Success = false; if (!Libcalls.empty()) Success = expandAtomicOpToLibcall( I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(), nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); // The expansion failed: either there were no libcalls at all for // the operation (min/max), or there were only size-specialized // libcalls (add/sub/etc) and we needed a generic. So, expand to a // CAS libcall, via a CAS loop, instead. if (!Success) { expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded) { // Create the CAS instruction normally... AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( Addr, Loaded, NewVal, MemOpOrder, AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); Success = Builder.CreateExtractValue(Pair, 1, "success"); NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); // ...and then expand the CAS into a libcall. expandAtomicCASToLibcall(Pair); }); } } // A helper routine for the above expandAtomic*ToLibcall functions. // // 'Libcalls' contains an array of enum values for the particular // ATOMIC libcalls to be emitted. All of the other arguments besides // 'I' are extracted from the Instruction subclass by the // caller. Depending on the particular call, some will be null. bool AtomicExpand::expandAtomicOpToLibcall( Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, AtomicOrdering Ordering2, ArrayRef Libcalls) { assert(Libcalls.size() == 6); LLVMContext &Ctx = I->getContext(); Module *M = I->getModule(); const DataLayout &DL = M->getDataLayout(); IRBuilder<> Builder(I); IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); // TODO: the "order" argument type is "int", not int32. So // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO"); Constant *OrderingVal = ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); Constant *Ordering2Val = nullptr; if (CASExpected) { assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO"); Ordering2Val = ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2)); } bool HasResult = I->getType() != Type::getVoidTy(Ctx); RTLIB::Libcall RTLibType; if (UseSizedLibcall) { switch (Size) { case 1: RTLibType = Libcalls[1]; break; case 2: RTLibType = Libcalls[2]; break; case 4: RTLibType = Libcalls[3]; break; case 8: RTLibType = Libcalls[4]; break; case 16: RTLibType = Libcalls[5]; break; } } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { RTLibType = Libcalls[0]; } else { // Can't use sized function, and there's no generic for this // operation, so give up. return false; } // Build up the function call. There's two kinds. First, the sized // variants. These calls are going to be one of the following (with // N=1,2,4,8,16): // iN __atomic_load_N(iN *ptr, int ordering) // void __atomic_store_N(iN *ptr, iN val, int ordering) // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering) // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, // int success_order, int failure_order) // // Note that these functions can be used for non-integer atomic // operations, the values just need to be bitcast to integers on the // way in and out. // // And, then, the generic variants. They look like the following: // void __atomic_load(size_t size, void *ptr, void *ret, int ordering) // void __atomic_store(size_t size, void *ptr, void *val, int ordering) // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, // int ordering) // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, // void *desired, int success_order, // int failure_order) // // The different signatures are built up depending on the // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult' // variables. AllocaInst *AllocaCASExpected = nullptr; Value *AllocaCASExpected_i8 = nullptr; AllocaInst *AllocaValue = nullptr; Value *AllocaValue_i8 = nullptr; AllocaInst *AllocaResult = nullptr; Value *AllocaResult_i8 = nullptr; Type *ResultTy; SmallVector Args; AttributeList Attr; // 'size' argument. if (!UseSizedLibcall) { // Note, getIntPtrType is assumed equivalent to size_t. Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size)); } // 'ptr' argument. // note: This assumes all address spaces share a common libfunc // implementation and that addresses are convertable. For systems without // that property, we'd need to extend this mechanism to support AS-specific // families of atomic intrinsics. auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace(); Value *PtrVal = Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS)); PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx)); Args.push_back(PtrVal); // 'expected' argument, if present. if (CASExpected) { AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); AllocaCASExpected->setAlignment(AllocaAlignment); unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); AllocaCASExpected_i8 = Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS)); Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64); Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); Args.push_back(AllocaCASExpected_i8); } // 'val' argument ('desired' for cas), if present. if (ValueOperand) { if (UseSizedLibcall) { Value *IntValue = Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); Args.push_back(IntValue); } else { AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); AllocaValue->setAlignment(AllocaAlignment); AllocaValue_i8 = Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); Args.push_back(AllocaValue_i8); } } // 'ret' argument. if (!CASExpected && HasResult && !UseSizedLibcall) { AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); AllocaResult->setAlignment(AllocaAlignment); unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); AllocaResult_i8 = Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64); Args.push_back(AllocaResult_i8); } // 'ordering' ('success_order' for cas) argument. Args.push_back(OrderingVal); // 'failure_order' argument, if present. if (Ordering2Val) Args.push_back(Ordering2Val); // Now, the return type. if (CASExpected) { ResultTy = Type::getInt1Ty(Ctx); Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt); } else if (HasResult && UseSizedLibcall) ResultTy = SizedIntTy; else ResultTy = Type::getVoidTy(Ctx); // Done with setting up arguments and return types, create the call: SmallVector ArgTys; for (Value *Arg : Args) ArgTys.push_back(Arg->getType()); FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); FunctionCallee LibcallFn = M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); CallInst *Call = Builder.CreateCall(LibcallFn, Args); Call->setAttributes(Attr); Value *Result = Call; // And then, extract the results... if (ValueOperand && !UseSizedLibcall) Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64); if (CASExpected) { // The final result from the CAS is {load of 'expected' alloca, bool result // from call} Type *FinalResultTy = I->getType(); Value *V = UndefValue::get(FinalResultTy); Value *ExpectedOut = Builder.CreateAlignedLoad( CASExpected->getType(), AllocaCASExpected, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64); V = Builder.CreateInsertValue(V, ExpectedOut, 0); V = Builder.CreateInsertValue(V, Result, 1); I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; if (UseSizedLibcall) V = Builder.CreateBitOrPointerCast(Result, I->getType()); else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64); } I->replaceAllUsesWith(V); } I->eraseFromParent(); return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index 2ce1d414e755..1e2a82615da8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -1,667 +1,671 @@ //===-- lib/CodeGen/GlobalISel/InlineAsmLowering.cpp ----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file implements the lowering from LLVM IR inline asm to MIR INLINEASM /// //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #define DEBUG_TYPE "inline-asm-lowering" using namespace llvm; void InlineAsmLowering::anchor() {} namespace { /// GISelAsmOperandInfo - This contains information for each constraint that we /// are lowering. class GISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// Regs - If this is a register or register class operand, this /// contains the set of assigned registers corresponding to the operand. SmallVector Regs; explicit GISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &Info) : TargetLowering::AsmOperandInfo(Info) {} }; using GISelAsmOperandInfoVector = SmallVector; class ExtraFlags { unsigned Flags = 0; public: explicit ExtraFlags(const CallBase &CB) { const InlineAsm *IA = cast(CB.getCalledOperand()); if (IA->hasSideEffects()) Flags |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) Flags |= InlineAsm::Extra_IsAlignStack; if (CB.isConvergent()) Flags |= InlineAsm::Extra_IsConvergent; Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; } void update(const TargetLowering::AsmOperandInfo &OpInfo) { // Ideally, we would only check against memory constraints. However, the // meaning of an Other constraint can be target-specific and we can't easily // reason about it. Therefore, be conservative and set MayLoad/MayStore // for Other constraints as well. if (OpInfo.ConstraintType == TargetLowering::C_Memory || OpInfo.ConstraintType == TargetLowering::C_Other) { if (OpInfo.Type == InlineAsm::isInput) Flags |= InlineAsm::Extra_MayLoad; else if (OpInfo.Type == InlineAsm::isOutput) Flags |= InlineAsm::Extra_MayStore; else if (OpInfo.Type == InlineAsm::isClobber) Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); } } unsigned get() const { return Flags; } }; } // namespace /// Assign virtual/physical registers for the specified register operand. static void getRegistersForValue(MachineFunction &MF, MachineIRBuilder &MIRBuilder, GISelAsmOperandInfo &OpInfo, GISelAsmOperandInfo &RefOpInfo) { const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // No work to do for memory operations. if (OpInfo.ConstraintType == TargetLowering::C_Memory) return; // If this is a constraint for a single physreg, or a constraint for a // register class, find it. Register AssignedReg; const TargetRegisterClass *RC; std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint( &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); // RC is unset only on failure. Return immediately. if (!RC) return; // No need to allocate a matching input constraint since the constraint it's // matching to has already been allocated. if (OpInfo.isMatchingInputConstraint()) return; // Initialize NumRegs. unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) NumRegs = TLI.getNumRegisters(MF.getFunction().getContext(), OpInfo.ConstraintVT); // If this is a constraint for a specific physical register, but the type of // the operand requires more than one register to be passed, we allocate the // required amount of physical registers, starting from the selected physical // register. // For this, first retrieve a register iterator for the given register class TargetRegisterClass::iterator I = RC->begin(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); // Advance the iterator to the assigned register (if set) if (AssignedReg) { for (; *I != AssignedReg; ++I) assert(I != RC->end() && "AssignedReg should be a member of provided RC"); } // Finally, assign the registers. If the AssignedReg isn't set, create virtual // registers with the provided register class for (; NumRegs; --NumRegs, ++I) { assert(I != RC->end() && "Ran out of registers to allocate!"); Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC); OpInfo.Regs.push_back(R); } } /// Return an integer indicating how general CT is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { case TargetLowering::C_Immediate: case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; case TargetLowering::C_Register: return 1; case TargetLowering::C_RegisterClass: return 2; case TargetLowering::C_Memory: return 3; } llvm_unreachable("Invalid constraint type"); } static void chooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, const TargetLowering *TLI) { assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); unsigned BestIdx = 0; TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; int BestGenerality = -1; // Loop over the options, keeping track of the most general one. for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { TargetLowering::ConstraintType CType = TLI->getConstraintType(OpInfo.Codes[i]); // Indirect 'other' or 'immediate' constraints are not allowed. if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory || CType == TargetLowering::C_Register || CType == TargetLowering::C_RegisterClass)) continue; // If this is an 'other' or 'immediate' constraint, see if the operand is // valid for it. For example, on X86 we might have an 'rI' constraint. If // the operand is an integer in the range [0..31] we want to use I (saving a // load of a register), otherwise we must use 'r'. if (CType == TargetLowering::C_Other || CType == TargetLowering::C_Immediate) { assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); // FIXME: prefer immediate constraints if the target allows it } // Things with matching constraints can only be registers, per gcc // documentation. This mainly affects "g" constraints. if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) continue; // This constraint letter is more general than the previous one, use it. int Generality = getConstraintGenerality(CType); if (Generality > BestGenerality) { BestType = CType; BestIdx = i; BestGenerality = Generality; } } OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; OpInfo.ConstraintType = BestType; } static void computeConstraintToUse(const TargetLowering *TLI, TargetLowering::AsmOperandInfo &OpInfo) { assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); // Single-letter constraints ('r') are very common. if (OpInfo.Codes.size() == 1) { OpInfo.ConstraintCode = OpInfo.Codes[0]; OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode); } else { chooseConstraint(OpInfo, TLI); } // 'X' matches anything. if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { // Labels and constants are handled elsewhere ('X' is the only thing // that matches labels). For Functions, the type here is the type of // the result, which is not what we want to look at; leave them alone. Value *Val = OpInfo.CallOperandVal; if (isa(Val) || isa(Val) || isa(Val)) return; // Otherwise, try to resolve it to something we know about by looking at // the actual operand type. if (const char *Repl = TLI->LowerXConstraint(OpInfo.ConstraintVT)) { OpInfo.ConstraintCode = Repl; OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode); } } } static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) { unsigned Flag = I.getOperand(OpIdx).getImm(); return InlineAsm::getNumOperandRegisters(Flag); } static bool buildAnyextOrCopy(Register Dst, Register Src, MachineIRBuilder &MIRBuilder) { const TargetRegisterInfo *TRI = MIRBuilder.getMF().getSubtarget().getRegisterInfo(); MachineRegisterInfo *MRI = MIRBuilder.getMRI(); auto SrcTy = MRI->getType(Src); if (!SrcTy.isValid()) { LLVM_DEBUG(dbgs() << "Source type for copy is not valid\n"); return false; } unsigned SrcSize = TRI->getRegSizeInBits(Src, *MRI); unsigned DstSize = TRI->getRegSizeInBits(Dst, *MRI); if (DstSize < SrcSize) { LLVM_DEBUG(dbgs() << "Input can't fit in destination reg class\n"); return false; } // Attempt to anyext small scalar sources. if (DstSize > SrcSize) { if (!SrcTy.isScalar()) { LLVM_DEBUG(dbgs() << "Can't extend non-scalar input to size of" "destination register class\n"); return false; } Src = MIRBuilder.buildAnyExt(LLT::scalar(DstSize), Src).getReg(0); } MIRBuilder.buildCopy(Dst, Src); return true; } bool InlineAsmLowering::lowerInlineAsm( MachineIRBuilder &MIRBuilder, const CallBase &Call, std::function(const Value &Val)> GetOrCreateVRegs) const { const InlineAsm *IA = cast(Call.getCalledOperand()); /// ConstraintOperands - Information about all of the constraints. GISelAsmOperandInfoVector ConstraintOperands; MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MachineRegisterInfo *MRI = MIRBuilder.getMRI(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI->ParseConstraints(DL, TRI, Call); ExtraFlags ExtraInfo(Call); unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. for (auto &T : TargetConstraints) { ConstraintOperands.push_back(GISelAsmOperandInfo(T)); GISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); // Compute the value type for each operand. if (OpInfo.Type == InlineAsm::isInput || (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { OpInfo.CallOperandVal = const_cast(Call.getArgOperand(ArgNo++)); if (isa(OpInfo.CallOperandVal)) { LLVM_DEBUG(dbgs() << "Basic block input operands not supported yet\n"); return false; } Type *OpTy = OpInfo.CallOperandVal->getType(); // If this is an indirect operand, the operand is a pointer to the // accessed type. if (OpInfo.isIndirect) { PointerType *PtrTy = dyn_cast(OpTy); if (!PtrTy) report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } // FIXME: Support aggregate input operands if (!OpTy->isSingleValueType()) { LLVM_DEBUG( dbgs() << "Aggregate input operands are not supported yet\n"); return false; } OpInfo.ConstraintVT = TLI->getValueType(DL, OpTy, true).getSimpleVT(); } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast(Call.getType())) { OpInfo.ConstraintVT = TLI->getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); OpInfo.ConstraintVT = TLI->getSimpleValueType(DL, Call.getType()); } ++ResNo; } else { OpInfo.ConstraintVT = MVT::Other; } // Compute the constraint code and ConstraintType to use. computeConstraintToUse(TLI, OpInfo); // The selected constraint type might expose new sideeffects ExtraInfo.update(OpInfo); } // At this point, all operand types are decided. // Create the MachineInstr, but don't insert it yet since input // operands still need to insert instructions before this one auto Inst = MIRBuilder.buildInstrNoInsert(TargetOpcode::INLINEASM) .addExternalSymbol(IA->getAsmString().c_str()) .addImm(ExtraInfo.get()); // Starting from this operand: flag followed by register(s) will be added as // operands to Inst for each constraint. Used for matching input constraints. unsigned StartIdx = Inst->getNumOperands(); // Collects the output operands for later processing GISelAsmOperandInfoVector OutputOperands; for (auto &OpInfo : ConstraintOperands) { GISelAsmOperandInfo &RefOpInfo = OpInfo.isMatchingInputConstraint() ? ConstraintOperands[OpInfo.getMatchedOperand()] : OpInfo; // Assign registers for register operands getRegistersForValue(MF, MIRBuilder, OpInfo, RefOpInfo); switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory) { unsigned ConstraintID = TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::Constraint_Unknown && "Failed to convert memory constraint code to constraint id."); // Add information to the INLINEASM instruction to know about this // output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); Inst.addImm(OpFlags); ArrayRef SourceRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); assert( SourceRegs.size() == 1 && "Expected the memory output to fit into a single virtual register"); Inst.addReg(SourceRegs[0]); } else { // Otherwise, this outputs to a register (directly for C_Register / // C_RegisterClass. Find a register that we can use. assert(OpInfo.ConstraintType == TargetLowering::C_Register || OpInfo.ConstraintType == TargetLowering::C_RegisterClass); if (OpInfo.Regs.empty()) { LLVM_DEBUG(dbgs() << "Couldn't allocate output register for constraint\n"); return false; } // Add information to the INLINEASM instruction to know that this // register is set. unsigned Flag = InlineAsm::getFlagWord( OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber : InlineAsm::Kind_RegDef, OpInfo.Regs.size()); if (OpInfo.Regs.front().isVirtual()) { // Put the register class of the virtual registers in the flag word. // That way, later passes can recompute register class constraints for // inline assembly as well as normal instructions. Don't do this for // tied operands that can use the regclass information from the def. const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front()); Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); } Inst.addImm(Flag); for (Register Reg : OpInfo.Regs) { Inst.addReg(Reg, RegState::Define | getImplRegState(Reg.isPhysical()) | (OpInfo.isEarlyClobber ? RegState::EarlyClobber : 0)); } // Remember this output operand for later processing OutputOperands.push_back(OpInfo); } break; case InlineAsm::isInput: { if (OpInfo.isMatchingInputConstraint()) { unsigned DefIdx = OpInfo.getMatchedOperand(); // Find operand with register def that corresponds to DefIdx. unsigned InstFlagIdx = StartIdx; for (unsigned i = 0; i < DefIdx; ++i) InstFlagIdx += getNumOpRegs(*Inst, InstFlagIdx) + 1; assert(getNumOpRegs(*Inst, InstFlagIdx) == 1 && "Wrong flag"); unsigned MatchedOperandFlag = Inst->getOperand(InstFlagIdx).getImm(); if (InlineAsm::isMemKind(MatchedOperandFlag)) { LLVM_DEBUG(dbgs() << "Matching input constraint to mem operand not " "supported. This should be target specific.\n"); return false; } if (!InlineAsm::isRegDefKind(MatchedOperandFlag) && !InlineAsm::isRegDefEarlyClobberKind(MatchedOperandFlag)) { LLVM_DEBUG(dbgs() << "Unknown matching constraint\n"); return false; } // We want to tie input to register in next operand. unsigned DefRegIdx = InstFlagIdx + 1; Register Def = Inst->getOperand(DefRegIdx).getReg(); - // Copy input to new vreg with same reg class as Def - const TargetRegisterClass *RC = MRI->getRegClass(Def); ArrayRef SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); assert(SrcRegs.size() == 1 && "Single register is expected here"); - Register Tmp = MRI->createVirtualRegister(RC); - if (!buildAnyextOrCopy(Tmp, SrcRegs[0], MIRBuilder)) - return false; - // Add Flag and input register operand (Tmp) to Inst. Tie Tmp to Def. + // When Def is physreg: use given input. + Register In = SrcRegs[0]; + // When Def is vreg: copy input to new vreg with same reg class as Def. + if (Def.isVirtual()) { + In = MRI->createVirtualRegister(MRI->getRegClass(Def)); + if (!buildAnyextOrCopy(In, SrcRegs[0], MIRBuilder)) + return false; + } + + // Add Flag and input register operand (In) to Inst. Tie In to Def. unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1); unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx); Inst.addImm(Flag); - Inst.addReg(Tmp); + Inst.addReg(In); Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1); break; } if (OpInfo.ConstraintType == TargetLowering::C_Other && OpInfo.isIndirect) { LLVM_DEBUG(dbgs() << "Indirect input operands with unknown constraint " "not supported yet\n"); return false; } if (OpInfo.ConstraintType == TargetLowering::C_Immediate || OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; if (!lowerAsmOperandForConstraint(OpInfo.CallOperandVal, OpInfo.ConstraintCode, Ops, MIRBuilder)) { LLVM_DEBUG(dbgs() << "Don't support constraint: " << OpInfo.ConstraintCode << " yet\n"); return false; } assert(Ops.size() > 0 && "Expected constraint to be lowered to at least one operand"); // Add information to the INLINEASM node to know about this input. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); Inst.addImm(OpFlags); Inst.add(Ops); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { if (!OpInfo.isIndirect) { LLVM_DEBUG(dbgs() << "Cannot indirectify memory input operands yet\n"); return false; } assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); unsigned ConstraintID = TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode); unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); Inst.addImm(OpFlags); ArrayRef SourceRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); assert( SourceRegs.size() == 1 && "Expected the memory input to fit into a single virtual register"); Inst.addReg(SourceRegs[0]); break; } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); if (OpInfo.isIndirect) { LLVM_DEBUG(dbgs() << "Can't handle indirect register inputs yet " "for constraint '" << OpInfo.ConstraintCode << "'\n"); return false; } // Copy the input into the appropriate registers. if (OpInfo.Regs.empty()) { LLVM_DEBUG( dbgs() << "Couldn't allocate input register for register constraint\n"); return false; } unsigned NumRegs = OpInfo.Regs.size(); ArrayRef SourceRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); assert(NumRegs == SourceRegs.size() && "Expected the number of input registers to match the number of " "source registers"); if (NumRegs > 1) { LLVM_DEBUG(dbgs() << "Input operands with multiple input registers are " "not supported yet\n"); return false; } unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs); Inst.addImm(Flag); if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder)) return false; Inst.addReg(OpInfo.Regs[0]); break; } case InlineAsm::isClobber: { unsigned NumRegs = OpInfo.Regs.size(); if (NumRegs > 0) { unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_Clobber, NumRegs); Inst.addImm(Flag); for (Register Reg : OpInfo.Regs) { Inst.addReg(Reg, RegState::Define | RegState::EarlyClobber | getImplRegState(Reg.isPhysical())); } } break; } } } if (const MDNode *SrcLoc = Call.getMetadata("srcloc")) Inst.addMetadata(SrcLoc); // All inputs are handled, insert the instruction now MIRBuilder.insertInstr(Inst); // Finally, copy the output operands into the output registers ArrayRef ResRegs = GetOrCreateVRegs(Call); if (ResRegs.size() != OutputOperands.size()) { LLVM_DEBUG(dbgs() << "Expected the number of output registers to match the " "number of destination registers\n"); return false; } for (unsigned int i = 0, e = ResRegs.size(); i < e; i++) { GISelAsmOperandInfo &OpInfo = OutputOperands[i]; if (OpInfo.Regs.empty()) continue; switch (OpInfo.ConstraintType) { case TargetLowering::C_Register: case TargetLowering::C_RegisterClass: { if (OpInfo.Regs.size() > 1) { LLVM_DEBUG(dbgs() << "Output operands with multiple defining " "registers are not supported yet\n"); return false; } Register SrcReg = OpInfo.Regs[0]; unsigned SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI); if (MRI->getType(ResRegs[i]).getSizeInBits() < SrcSize) { // First copy the non-typed virtual register into a generic virtual // register Register Tmp1Reg = MRI->createGenericVirtualRegister(LLT::scalar(SrcSize)); MIRBuilder.buildCopy(Tmp1Reg, SrcReg); // Need to truncate the result of the register MIRBuilder.buildTrunc(ResRegs[i], Tmp1Reg); } else { MIRBuilder.buildCopy(ResRegs[i], SrcReg); } break; } case TargetLowering::C_Immediate: case TargetLowering::C_Other: LLVM_DEBUG( dbgs() << "Cannot lower target specific output constraints yet\n"); return false; case TargetLowering::C_Memory: break; // Already handled. case TargetLowering::C_Unknown: LLVM_DEBUG(dbgs() << "Unexpected unknown constraint\n"); return false; } } return true; } bool InlineAsmLowering::lowerAsmOperandForConstraint( Value *Val, StringRef Constraint, std::vector &Ops, MachineIRBuilder &MIRBuilder) const { if (Constraint.size() > 1) return false; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: return false; case 'i': // Simple Integer or Relocatable Constant if (ConstantInt *CI = dyn_cast(Val)) { assert(CI->getBitWidth() <= 64 && "expected immediate to fit into 64-bits"); // Boolean constants should be zero-extended, others are sign-extended bool IsBool = CI->getBitWidth() == 1; int64_t ExtVal = IsBool ? CI->getZExtValue() : CI->getSExtValue(); Ops.push_back(MachineOperand::CreateImm(ExtVal)); return true; } return false; } } diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 7c39ddc8b1da..7ed8a718ed3c 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1,1967 +1,1967 @@ //===-- RuntimeDyldELF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Implementation of ELF support for the MC-JIT runtime dynamic linker. // //===----------------------------------------------------------------------===// #include "RuntimeDyldELF.h" #include "RuntimeDyldCheckerImpl.h" #include "Targets/RuntimeDyldELFMips.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBuffer.h" using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; #define DEBUG_TYPE "dyld" static void or32le(void *P, int32_t V) { write32le(P, read32le(P) | V); } static void or32AArch64Imm(void *L, uint64_t Imm) { or32le(L, (Imm & 0xFFF) << 10); } template static void write(bool isBE, void *P, T V) { isBE ? write(P, V) : write(P, V); } static void write32AArch64Addr(void *L, uint64_t Imm) { uint32_t ImmLo = (Imm & 0x3) << 29; uint32_t ImmHi = (Imm & 0x1FFFFC) << 3; uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3); write32le(L, (read32le(L) & ~Mask) | ImmLo | ImmHi); } // Return the bits [Start, End] from Val shifted Start bits. // For instance, getBits(0xF0, 4, 8) returns 0xF. static uint64_t getBits(uint64_t Val, int Start, int End) { uint64_t Mask = ((uint64_t)1 << (End + 1 - Start)) - 1; return (Val >> Start) & Mask; } namespace { template class DyldELFObject : public ELFObjectFile { LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) typedef Elf_Shdr_Impl Elf_Shdr; typedef Elf_Sym_Impl Elf_Sym; typedef Elf_Rel_Impl Elf_Rel; typedef Elf_Rel_Impl Elf_Rela; typedef Elf_Ehdr_Impl Elf_Ehdr; typedef typename ELFT::uint addr_type; DyldELFObject(ELFObjectFile &&Obj); public: static Expected> create(MemoryBufferRef Wrapper); void updateSectionAddress(const SectionRef &Sec, uint64_t Addr); void updateSymbolAddress(const SymbolRef &SymRef, uint64_t Addr); // Methods for type inquiry through isa, cast and dyn_cast static bool classof(const Binary *v) { return (isa>(v) && classof(cast>(v))); } static bool classof(const ELFObjectFile *v) { return v->isDyldType(); } }; // The MemoryBuffer passed into this constructor is just a wrapper around the // actual memory. Ultimately, the Binary parent class will take ownership of // this MemoryBuffer object but not the underlying memory. template DyldELFObject::DyldELFObject(ELFObjectFile &&Obj) : ELFObjectFile(std::move(Obj)) { this->isDyldELFObject = true; } template Expected>> DyldELFObject::create(MemoryBufferRef Wrapper) { auto Obj = ELFObjectFile::create(Wrapper); if (auto E = Obj.takeError()) return std::move(E); std::unique_ptr> Ret( new DyldELFObject(std::move(*Obj))); return std::move(Ret); } template void DyldELFObject::updateSectionAddress(const SectionRef &Sec, uint64_t Addr) { DataRefImpl ShdrRef = Sec.getRawDataRefImpl(); Elf_Shdr *shdr = const_cast(reinterpret_cast(ShdrRef.p)); // This assumes the address passed in matches the target address bitness // The template-based type cast handles everything else. shdr->sh_addr = static_cast(Addr); } template void DyldELFObject::updateSymbolAddress(const SymbolRef &SymRef, uint64_t Addr) { Elf_Sym *sym = const_cast( ELFObjectFile::getSymbol(SymRef.getRawDataRefImpl())); // This assumes the address passed in matches the target address bitness // The template-based type cast handles everything else. sym->st_value = static_cast(Addr); } class LoadedELFObjectInfo final : public LoadedObjectInfoHelper { public: LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {} OwningBinary getObjectForDebug(const ObjectFile &Obj) const override; }; template static Expected>> createRTDyldELFObject(MemoryBufferRef Buffer, const ObjectFile &SourceObject, const LoadedELFObjectInfo &L) { typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::uint addr_type; Expected>> ObjOrErr = DyldELFObject::create(Buffer); if (Error E = ObjOrErr.takeError()) return std::move(E); std::unique_ptr> Obj = std::move(*ObjOrErr); // Iterate over all sections in the object. auto SI = SourceObject.section_begin(); for (const auto &Sec : Obj->sections()) { Expected NameOrErr = Sec.getName(); if (!NameOrErr) { consumeError(NameOrErr.takeError()); continue; } if (*NameOrErr != "") { DataRefImpl ShdrRef = Sec.getRawDataRefImpl(); Elf_Shdr *shdr = const_cast( reinterpret_cast(ShdrRef.p)); if (uint64_t SecLoadAddr = L.getSectionLoadAddress(*SI)) { // This assumes that the address passed in matches the target address // bitness. The template-based type cast handles everything else. shdr->sh_addr = static_cast(SecLoadAddr); } } ++SI; } return std::move(Obj); } static OwningBinary createELFDebugObject(const ObjectFile &Obj, const LoadedELFObjectInfo &L) { assert(Obj.isELF() && "Not an ELF object file."); std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(Obj.getData(), Obj.getFileName()); Expected> DebugObj(nullptr); handleAllErrors(DebugObj.takeError()); if (Obj.getBytesInAddress() == 4 && Obj.isLittleEndian()) DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L); else if (Obj.getBytesInAddress() == 4 && !Obj.isLittleEndian()) DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L); else if (Obj.getBytesInAddress() == 8 && !Obj.isLittleEndian()) DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L); else if (Obj.getBytesInAddress() == 8 && Obj.isLittleEndian()) DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L); else llvm_unreachable("Unexpected ELF format"); handleAllErrors(DebugObj.takeError()); return OwningBinary(std::move(*DebugObj), std::move(Buffer)); } OwningBinary LoadedELFObjectInfo::getObjectForDebug(const ObjectFile &Obj) const { return createELFDebugObject(Obj, *this); } } // anonymous namespace namespace llvm { RuntimeDyldELF::RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver) : RuntimeDyldImpl(MemMgr, Resolver), GOTSectionID(0), CurrentGOTIndex(0) {} RuntimeDyldELF::~RuntimeDyldELF() {} void RuntimeDyldELF::registerEHFrames() { for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) { SID EHFrameSID = UnregisteredEHFrameSections[i]; uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress(); uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress(); size_t EHFrameSize = Sections[EHFrameSID].getSize(); MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); } UnregisteredEHFrameSections.clear(); } std::unique_ptr llvm::RuntimeDyldELF::create(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver) { switch (Arch) { default: return std::make_unique(MemMgr, Resolver); case Triple::mips: case Triple::mipsel: case Triple::mips64: case Triple::mips64el: return std::make_unique(MemMgr, Resolver); } } std::unique_ptr RuntimeDyldELF::loadObject(const object::ObjectFile &O) { if (auto ObjSectionToIDOrErr = loadObjectImpl(O)) return std::make_unique(*this, *ObjSectionToIDOrErr); else { HasError = true; raw_string_ostream ErrStream(ErrorStr); logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream); return nullptr; } } void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend, uint64_t SymOffset) { switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_X86_64_NONE: break; case ELF::R_X86_64_64: { support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = Value + Addend; LLVM_DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at " << format("%p\n", Section.getAddressWithOffset(Offset))); break; } case ELF::R_X86_64_32: case ELF::R_X86_64_32S: { Value += Addend; assert((Type == ELF::R_X86_64_32 && (Value <= UINT32_MAX)) || (Type == ELF::R_X86_64_32S && ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN))); uint32_t TruncatedAddr = (Value & 0xFFFFFFFF); support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = TruncatedAddr; LLVM_DEBUG(dbgs() << "Writing " << format("%p", TruncatedAddr) << " at " << format("%p\n", Section.getAddressWithOffset(Offset))); break; } case ELF::R_X86_64_PC8: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int64_t RealOffset = Value + Addend - FinalAddress; assert(isInt<8>(RealOffset)); int8_t TruncOffset = (RealOffset & 0xFF); Section.getAddress()[Offset] = TruncOffset; break; } case ELF::R_X86_64_PC32: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int64_t RealOffset = Value + Addend - FinalAddress; assert(isInt<32>(RealOffset)); int32_t TruncOffset = (RealOffset & 0xFFFFFFFF); support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = TruncOffset; break; } case ELF::R_X86_64_PC64: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int64_t RealOffset = Value + Addend - FinalAddress; support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = RealOffset; LLVM_DEBUG(dbgs() << "Writing " << format("%p", RealOffset) << " at " << format("%p\n", FinalAddress)); break; } case ELF::R_X86_64_GOTOFF64: { // Compute Value - GOTBase. uint64_t GOTBase = 0; for (const auto &Section : Sections) { if (Section.getName() == ".got") { GOTBase = Section.getLoadAddressWithOffset(0); break; } } assert(GOTBase != 0 && "missing GOT"); int64_t GOTOffset = Value - GOTBase + Addend; support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = GOTOffset; break; } } } void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend) { switch (Type) { case ELF::R_386_32: { support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = Value + Addend; break; } // Handle R_386_PLT32 like R_386_PC32 since it should be able to // reach any 32 bit address. case ELF::R_386_PLT32: case ELF::R_386_PC32: { uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset) & 0xFFFFFFFF; uint32_t RealOffset = Value + Addend - FinalAddress; support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = RealOffset; break; } default: // There are other relocation types, but it appears these are the // only ones currently used by the LLVM ELF object writer - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; } } void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { uint32_t *TargetPtr = reinterpret_cast(Section.getAddressWithOffset(Offset)); uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); // Data should use target endian. Code should always use little endian. bool isBE = Arch == Triple::aarch64_be; LLVM_DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x" << format("%llx", Section.getAddressWithOffset(Offset)) << " FinalAddress: 0x" << format("%llx", FinalAddress) << " Value: 0x" << format("%llx", Value) << " Type: 0x" << format("%x", Type) << " Addend: 0x" << format("%llx", Addend) << "\n"); switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_AARCH64_ABS16: { uint64_t Result = Value + Addend; assert(static_cast(Result) >= INT16_MIN && Result < UINT16_MAX); write(isBE, TargetPtr, static_cast(Result & 0xffffU)); break; } case ELF::R_AARCH64_ABS32: { uint64_t Result = Value + Addend; assert(static_cast(Result) >= INT32_MIN && Result < UINT32_MAX); write(isBE, TargetPtr, static_cast(Result & 0xffffffffU)); break; } case ELF::R_AARCH64_ABS64: write(isBE, TargetPtr, Value + Addend); break; case ELF::R_AARCH64_PLT32: { uint64_t Result = Value + Addend - FinalAddress; assert(static_cast(Result) >= INT32_MIN && static_cast(Result) <= INT32_MAX); write(isBE, TargetPtr, static_cast(Result)); break; } case ELF::R_AARCH64_PREL32: { uint64_t Result = Value + Addend - FinalAddress; assert(static_cast(Result) >= INT32_MIN && static_cast(Result) <= UINT32_MAX); write(isBE, TargetPtr, static_cast(Result & 0xffffffffU)); break; } case ELF::R_AARCH64_PREL64: write(isBE, TargetPtr, Value + Addend - FinalAddress); break; case ELF::R_AARCH64_CALL26: // fallthrough case ELF::R_AARCH64_JUMP26: { // Operation: S+A-P. Set Call or B immediate value to bits fff_fffc of the // calculation. uint64_t BranchImm = Value + Addend - FinalAddress; // "Check that -2^27 <= result < 2^27". assert(isInt<28>(BranchImm)); or32le(TargetPtr, (BranchImm & 0x0FFFFFFC) >> 2); break; } case ELF::R_AARCH64_MOVW_UABS_G3: or32le(TargetPtr, ((Value + Addend) & 0xFFFF000000000000) >> 43); break; case ELF::R_AARCH64_MOVW_UABS_G2_NC: or32le(TargetPtr, ((Value + Addend) & 0xFFFF00000000) >> 27); break; case ELF::R_AARCH64_MOVW_UABS_G1_NC: or32le(TargetPtr, ((Value + Addend) & 0xFFFF0000) >> 11); break; case ELF::R_AARCH64_MOVW_UABS_G0_NC: or32le(TargetPtr, ((Value + Addend) & 0xFFFF) << 5); break; case ELF::R_AARCH64_ADR_PREL_PG_HI21: { // Operation: Page(S+A) - Page(P) uint64_t Result = ((Value + Addend) & ~0xfffULL) - (FinalAddress & ~0xfffULL); // Check that -2^32 <= X < 2^32 assert(isInt<33>(Result) && "overflow check failed for relocation"); // Immediate goes in bits 30:29 + 5:23 of ADRP instruction, taken // from bits 32:12 of X. write32AArch64Addr(TargetPtr, Result >> 12); break; } case ELF::R_AARCH64_ADD_ABS_LO12_NC: // Operation: S + A // Immediate goes in bits 21:10 of LD/ST instruction, taken // from bits 11:0 of X or32AArch64Imm(TargetPtr, Value + Addend); break; case ELF::R_AARCH64_LDST8_ABS_LO12_NC: // Operation: S + A // Immediate goes in bits 21:10 of LD/ST instruction, taken // from bits 11:0 of X or32AArch64Imm(TargetPtr, getBits(Value + Addend, 0, 11)); break; case ELF::R_AARCH64_LDST16_ABS_LO12_NC: // Operation: S + A // Immediate goes in bits 21:10 of LD/ST instruction, taken // from bits 11:1 of X or32AArch64Imm(TargetPtr, getBits(Value + Addend, 1, 11)); break; case ELF::R_AARCH64_LDST32_ABS_LO12_NC: // Operation: S + A // Immediate goes in bits 21:10 of LD/ST instruction, taken // from bits 11:2 of X or32AArch64Imm(TargetPtr, getBits(Value + Addend, 2, 11)); break; case ELF::R_AARCH64_LDST64_ABS_LO12_NC: // Operation: S + A // Immediate goes in bits 21:10 of LD/ST instruction, taken // from bits 11:3 of X or32AArch64Imm(TargetPtr, getBits(Value + Addend, 3, 11)); break; case ELF::R_AARCH64_LDST128_ABS_LO12_NC: // Operation: S + A // Immediate goes in bits 21:10 of LD/ST instruction, taken // from bits 11:4 of X or32AArch64Imm(TargetPtr, getBits(Value + Addend, 4, 11)); break; } } void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend) { // TODO: Add Thumb relocations. uint32_t *TargetPtr = reinterpret_cast(Section.getAddressWithOffset(Offset)); uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset) & 0xFFFFFFFF; Value += Addend; LLVM_DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: " << Section.getAddressWithOffset(Offset) << " FinalAddress: " << format("%p", FinalAddress) << " Value: " << format("%x", Value) << " Type: " << format("%x", Type) << " Addend: " << format("%x", Addend) << "\n"); switch (Type) { default: llvm_unreachable("Not implemented relocation type!"); case ELF::R_ARM_NONE: break; // Write a 31bit signed offset case ELF::R_ARM_PREL31: support::ulittle32_t::ref{TargetPtr} = (support::ulittle32_t::ref{TargetPtr} & 0x80000000) | ((Value - FinalAddress) & ~0x80000000); break; case ELF::R_ARM_TARGET1: case ELF::R_ARM_ABS32: support::ulittle32_t::ref{TargetPtr} = Value; break; // Write first 16 bit of 32 bit value to the mov instruction. // Last 4 bit should be shifted. case ELF::R_ARM_MOVW_ABS_NC: case ELF::R_ARM_MOVT_ABS: if (Type == ELF::R_ARM_MOVW_ABS_NC) Value = Value & 0xFFFF; else if (Type == ELF::R_ARM_MOVT_ABS) Value = (Value >> 16) & 0xFFFF; support::ulittle32_t::ref{TargetPtr} = (support::ulittle32_t::ref{TargetPtr} & ~0x000F0FFF) | (Value & 0xFFF) | (((Value >> 12) & 0xF) << 16); break; // Write 24 bit relative value to the branch instruction. case ELF::R_ARM_PC24: // Fall through. case ELF::R_ARM_CALL: // Fall through. case ELF::R_ARM_JUMP24: int32_t RelValue = static_cast(Value - FinalAddress - 8); RelValue = (RelValue & 0x03FFFFFC) >> 2; assert((support::ulittle32_t::ref{TargetPtr} & 0xFFFFFF) == 0xFFFFFE); support::ulittle32_t::ref{TargetPtr} = (support::ulittle32_t::ref{TargetPtr} & 0xFF000000) | RelValue; break; } } void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) { if (Arch == Triple::UnknownArch || !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) { IsMipsO32ABI = false; IsMipsN32ABI = false; IsMipsN64ABI = false; return; } if (auto *E = dyn_cast(&Obj)) { unsigned AbiVariant = E->getPlatformFlags(); IsMipsO32ABI = AbiVariant & ELF::EF_MIPS_ABI_O32; IsMipsN32ABI = AbiVariant & ELF::EF_MIPS_ABI2; } IsMipsN64ABI = Obj.getFileFormatName().equals("elf64-mips"); } // Return the .TOC. section and offset. Error RuntimeDyldELF::findPPC64TOCSection(const ELFObjectFileBase &Obj, ObjSectionToIDMap &LocalSections, RelocationValueRef &Rel) { // Set a default SectionID in case we do not find a TOC section below. // This may happen for references to TOC base base (sym@toc, .odp // relocation) without a .toc directive. In this case just use the // first section (which is usually the .odp) since the code won't // reference the .toc base directly. Rel.SymbolName = nullptr; Rel.SectionID = 0; // The TOC consists of sections .got, .toc, .tocbss, .plt in that // order. The TOC starts where the first of these sections starts. for (auto &Section : Obj.sections()) { Expected NameOrErr = Section.getName(); if (!NameOrErr) return NameOrErr.takeError(); StringRef SectionName = *NameOrErr; if (SectionName == ".got" || SectionName == ".toc" || SectionName == ".tocbss" || SectionName == ".plt") { if (auto SectionIDOrErr = findOrEmitSection(Obj, Section, false, LocalSections)) Rel.SectionID = *SectionIDOrErr; else return SectionIDOrErr.takeError(); break; } } // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000 // thus permitting a full 64 Kbytes segment. Rel.Addend = 0x8000; return Error::success(); } // Returns the sections and offset associated with the ODP entry referenced // by Symbol. Error RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj, ObjSectionToIDMap &LocalSections, RelocationValueRef &Rel) { // Get the ELF symbol value (st_value) to compare with Relocation offset in // .opd entries for (section_iterator si = Obj.section_begin(), se = Obj.section_end(); si != se; ++si) { Expected RelSecOrErr = si->getRelocatedSection(); if (!RelSecOrErr) report_fatal_error(toString(RelSecOrErr.takeError())); section_iterator RelSecI = *RelSecOrErr; if (RelSecI == Obj.section_end()) continue; Expected NameOrErr = RelSecI->getName(); if (!NameOrErr) return NameOrErr.takeError(); StringRef RelSectionName = *NameOrErr; if (RelSectionName != ".opd") continue; for (elf_relocation_iterator i = si->relocation_begin(), e = si->relocation_end(); i != e;) { // The R_PPC64_ADDR64 relocation indicates the first field // of a .opd entry uint64_t TypeFunc = i->getType(); if (TypeFunc != ELF::R_PPC64_ADDR64) { ++i; continue; } uint64_t TargetSymbolOffset = i->getOffset(); symbol_iterator TargetSymbol = i->getSymbol(); int64_t Addend; if (auto AddendOrErr = i->getAddend()) Addend = *AddendOrErr; else return AddendOrErr.takeError(); ++i; if (i == e) break; // Just check if following relocation is a R_PPC64_TOC uint64_t TypeTOC = i->getType(); if (TypeTOC != ELF::R_PPC64_TOC) continue; // Finally compares the Symbol value and the target symbol offset // to check if this .opd entry refers to the symbol the relocation // points to. if (Rel.Addend != (int64_t)TargetSymbolOffset) continue; section_iterator TSI = Obj.section_end(); if (auto TSIOrErr = TargetSymbol->getSection()) TSI = *TSIOrErr; else return TSIOrErr.takeError(); assert(TSI != Obj.section_end() && "TSI should refer to a valid section"); bool IsCode = TSI->isText(); if (auto SectionIDOrErr = findOrEmitSection(Obj, *TSI, IsCode, LocalSections)) Rel.SectionID = *SectionIDOrErr; else return SectionIDOrErr.takeError(); Rel.Addend = (intptr_t)Addend; return Error::success(); } } llvm_unreachable("Attempting to get address of ODP entry!"); } // Relocation masks following the #lo(value), #hi(value), #ha(value), // #higher(value), #highera(value), #highest(value), and #highesta(value) // macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi // document. static inline uint16_t applyPPClo(uint64_t value) { return value & 0xffff; } static inline uint16_t applyPPChi(uint64_t value) { return (value >> 16) & 0xffff; } static inline uint16_t applyPPCha (uint64_t value) { return ((value + 0x8000) >> 16) & 0xffff; } static inline uint16_t applyPPChigher(uint64_t value) { return (value >> 32) & 0xffff; } static inline uint16_t applyPPChighera (uint64_t value) { return ((value + 0x8000) >> 32) & 0xffff; } static inline uint16_t applyPPChighest(uint64_t value) { return (value >> 48) & 0xffff; } static inline uint16_t applyPPChighesta (uint64_t value) { return ((value + 0x8000) >> 48) & 0xffff; } void RuntimeDyldELF::resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_PPC_ADDR16_LO: writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); break; case ELF::R_PPC_ADDR16_HI: writeInt16BE(LocalAddress, applyPPChi(Value + Addend)); break; case ELF::R_PPC_ADDR16_HA: writeInt16BE(LocalAddress, applyPPCha(Value + Addend)); break; } } void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_PPC64_ADDR16: writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); break; case ELF::R_PPC64_ADDR16_DS: writeInt16BE(LocalAddress, applyPPClo(Value + Addend) & ~3); break; case ELF::R_PPC64_ADDR16_LO: writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); break; case ELF::R_PPC64_ADDR16_LO_DS: writeInt16BE(LocalAddress, applyPPClo(Value + Addend) & ~3); break; case ELF::R_PPC64_ADDR16_HI: case ELF::R_PPC64_ADDR16_HIGH: writeInt16BE(LocalAddress, applyPPChi(Value + Addend)); break; case ELF::R_PPC64_ADDR16_HA: case ELF::R_PPC64_ADDR16_HIGHA: writeInt16BE(LocalAddress, applyPPCha(Value + Addend)); break; case ELF::R_PPC64_ADDR16_HIGHER: writeInt16BE(LocalAddress, applyPPChigher(Value + Addend)); break; case ELF::R_PPC64_ADDR16_HIGHERA: writeInt16BE(LocalAddress, applyPPChighera(Value + Addend)); break; case ELF::R_PPC64_ADDR16_HIGHEST: writeInt16BE(LocalAddress, applyPPChighest(Value + Addend)); break; case ELF::R_PPC64_ADDR16_HIGHESTA: writeInt16BE(LocalAddress, applyPPChighesta(Value + Addend)); break; case ELF::R_PPC64_ADDR14: { assert(((Value + Addend) & 3) == 0); // Preserve the AA/LK bits in the branch instruction uint8_t aalk = *(LocalAddress + 3); writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc)); } break; case ELF::R_PPC64_REL16_LO: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt16BE(LocalAddress, applyPPClo(Delta)); } break; case ELF::R_PPC64_REL16_HI: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt16BE(LocalAddress, applyPPChi(Delta)); } break; case ELF::R_PPC64_REL16_HA: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt16BE(LocalAddress, applyPPCha(Delta)); } break; case ELF::R_PPC64_ADDR32: { int64_t Result = static_cast(Value + Addend); if (SignExtend64<32>(Result) != Result) llvm_unreachable("Relocation R_PPC64_ADDR32 overflow"); writeInt32BE(LocalAddress, Result); } break; case ELF::R_PPC64_REL24: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int64_t delta = static_cast(Value - FinalAddress + Addend); if (SignExtend64<26>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL24 overflow"); // We preserve bits other than LI field, i.e. PO and AA/LK fields. uint32_t Inst = readBytesUnaligned(LocalAddress, 4); writeInt32BE(LocalAddress, (Inst & 0xFC000003) | (delta & 0x03FFFFFC)); } break; case ELF::R_PPC64_REL32: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int64_t delta = static_cast(Value - FinalAddress + Addend); if (SignExtend64<32>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL32 overflow"); writeInt32BE(LocalAddress, delta); } break; case ELF::R_PPC64_REL64: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt64BE(LocalAddress, Delta); } break; case ELF::R_PPC64_ADDR64: writeInt64BE(LocalAddress, Value + Addend); break; } } void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_390_PC16DBL: case ELF::R_390_PLT16DBL: { int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int16_t(Delta / 2) * 2 == Delta && "R_390_PC16DBL overflow"); writeInt16BE(LocalAddress, Delta / 2); break; } case ELF::R_390_PC32DBL: case ELF::R_390_PLT32DBL: { int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int32_t(Delta / 2) * 2 == Delta && "R_390_PC32DBL overflow"); writeInt32BE(LocalAddress, Delta / 2); break; } case ELF::R_390_PC16: { int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int16_t(Delta) == Delta && "R_390_PC16 overflow"); writeInt16BE(LocalAddress, Delta); break; } case ELF::R_390_PC32: { int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int32_t(Delta) == Delta && "R_390_PC32 overflow"); writeInt32BE(LocalAddress, Delta); break; } case ELF::R_390_PC64: { int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); writeInt64BE(LocalAddress, Delta); break; } case ELF::R_390_8: *LocalAddress = (uint8_t)(Value + Addend); break; case ELF::R_390_16: writeInt16BE(LocalAddress, Value + Addend); break; case ELF::R_390_32: writeInt32BE(LocalAddress, Value + Addend); break; case ELF::R_390_64: writeInt64BE(LocalAddress, Value + Addend); break; } } void RuntimeDyldELF::resolveBPFRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { bool isBE = Arch == Triple::bpfeb; switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_BPF_NONE: break; case ELF::R_BPF_64_64: { write(isBE, Section.getAddressWithOffset(Offset), Value + Addend); LLVM_DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at " << format("%p\n", Section.getAddressWithOffset(Offset))); break; } case ELF::R_BPF_64_32: { Value += Addend; assert(Value <= UINT32_MAX); write(isBE, Section.getAddressWithOffset(Offset), static_cast(Value)); LLVM_DEBUG(dbgs() << "Writing " << format("%p", Value) << " at " << format("%p\n", Section.getAddressWithOffset(Offset))); break; } } } // The target location for the relocation is described by RE.SectionID and // RE.Offset. RE.SectionID can be used to find the SectionEntry. Each // SectionEntry has three members describing its location. // SectionEntry::Address is the address at which the section has been loaded // into memory in the current (host) process. SectionEntry::LoadAddress is the // address that the section will have in the target process. // SectionEntry::ObjAddress is the address of the bits for this section in the // original emitted object image (also in the current address space). // // Relocations will be applied as if the section were loaded at // SectionEntry::LoadAddress, but they will be applied at an address based // on SectionEntry::Address. SectionEntry::ObjAddress will be used to refer to // Target memory contents if they are required for value calculations. // // The Value parameter here is the load address of the symbol for the // relocation to be applied. For relocations which refer to symbols in the // current object Value will be the LoadAddress of the section in which // the symbol resides (RE.Addend provides additional information about the // symbol location). For external symbols, Value will be the address of the // symbol in the target address space. void RuntimeDyldELF::resolveRelocation(const RelocationEntry &RE, uint64_t Value) { const SectionEntry &Section = Sections[RE.SectionID]; return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend, RE.SymOffset, RE.SectionID); } void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend, uint64_t SymOffset, SID SectionID) { switch (Arch) { case Triple::x86_64: resolveX86_64Relocation(Section, Offset, Value, Type, Addend, SymOffset); break; case Triple::x86: resolveX86Relocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; case Triple::aarch64: case Triple::aarch64_be: resolveAArch64Relocation(Section, Offset, Value, Type, Addend); break; case Triple::arm: // Fall through. case Triple::armeb: case Triple::thumb: case Triple::thumbeb: resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; case Triple::ppc: resolvePPC32Relocation(Section, Offset, Value, Type, Addend); break; case Triple::ppc64: // Fall through. case Triple::ppc64le: resolvePPC64Relocation(Section, Offset, Value, Type, Addend); break; case Triple::systemz: resolveSystemZRelocation(Section, Offset, Value, Type, Addend); break; case Triple::bpfel: case Triple::bpfeb: resolveBPFRelocation(Section, Offset, Value, Type, Addend); break; default: llvm_unreachable("Unsupported CPU type!"); } } void *RuntimeDyldELF::computePlaceholderAddress(unsigned SectionID, uint64_t Offset) const { return (void *)(Sections[SectionID].getObjAddress() + Offset); } void RuntimeDyldELF::processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value) { RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, Value.Offset); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); } uint32_t RuntimeDyldELF::getMatchingLoRelocation(uint32_t RelType, bool IsLocal) const { switch (RelType) { case ELF::R_MICROMIPS_GOT16: if (IsLocal) return ELF::R_MICROMIPS_LO16; break; case ELF::R_MICROMIPS_HI16: return ELF::R_MICROMIPS_LO16; case ELF::R_MIPS_GOT16: if (IsLocal) return ELF::R_MIPS_LO16; break; case ELF::R_MIPS_HI16: return ELF::R_MIPS_LO16; case ELF::R_MIPS_PCHI16: return ELF::R_MIPS_PCLO16; default: break; } return ELF::R_MIPS_NONE; } // Sometimes we don't need to create thunk for a branch. // This typically happens when branch target is located // in the same object file. In such case target is either // a weak symbol or symbol in a different executable section. // This function checks if branch target is located in the // same object file and if distance between source and target // fits R_AARCH64_CALL26 relocation. If both conditions are // met, it emits direct jump to the target and returns true. // Otherwise false is returned and thunk is created. bool RuntimeDyldELF::resolveAArch64ShortBranch( unsigned SectionID, relocation_iterator RelI, const RelocationValueRef &Value) { uint64_t Address; if (Value.SymbolName) { auto Loc = GlobalSymbolTable.find(Value.SymbolName); // Don't create direct branch for external symbols. if (Loc == GlobalSymbolTable.end()) return false; const auto &SymInfo = Loc->second; Address = uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset( SymInfo.getOffset())); } else { Address = uint64_t(Sections[Value.SectionID].getLoadAddress()); } uint64_t Offset = RelI->getOffset(); uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset); // R_AARCH64_CALL26 requires immediate to be in range -2^27 <= imm < 2^27 // If distance between source and target is out of range then we should // create thunk. if (!isInt<28>(Address + Value.Addend - SourceAddress)) return false; resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(), Value.Addend); return true; } void RuntimeDyldELF::resolveAArch64Branch(unsigned SectionID, const RelocationValueRef &Value, relocation_iterator RelI, StubMap &Stubs) { LLVM_DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation."); SectionEntry &Section = Sections[SectionID]; uint64_t Offset = RelI->getOffset(); unsigned RelType = RelI->getType(); // Look for an existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { resolveRelocation(Section, Offset, (uint64_t)Section.getAddressWithOffset(i->second), RelType, 0); LLVM_DEBUG(dbgs() << " Stub function found\n"); } else if (!resolveAArch64ShortBranch(SectionID, RelI, Value)) { // Create a new stub function. LLVM_DEBUG(dbgs() << " Create a new stub function\n"); Stubs[Value] = Section.getStubOffset(); uint8_t *StubTargetAddr = createStubFunction( Section.getAddressWithOffset(Section.getStubOffset())); RelocationEntry REmovz_g3(SectionID, StubTargetAddr - Section.getAddress(), ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend); RelocationEntry REmovk_g2(SectionID, StubTargetAddr - Section.getAddress() + 4, ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend); RelocationEntry REmovk_g1(SectionID, StubTargetAddr - Section.getAddress() + 8, ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend); RelocationEntry REmovk_g0(SectionID, StubTargetAddr - Section.getAddress() + 12, ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend); if (Value.SymbolName) { addRelocationForSymbol(REmovz_g3, Value.SymbolName); addRelocationForSymbol(REmovk_g2, Value.SymbolName); addRelocationForSymbol(REmovk_g1, Value.SymbolName); addRelocationForSymbol(REmovk_g0, Value.SymbolName); } else { addRelocationForSection(REmovz_g3, Value.SectionID); addRelocationForSection(REmovk_g2, Value.SectionID); addRelocationForSection(REmovk_g1, Value.SectionID); addRelocationForSection(REmovk_g0, Value.SectionID); } resolveRelocation(Section, Offset, reinterpret_cast(Section.getAddressWithOffset( Section.getStubOffset())), RelType, 0); Section.advanceStubOffset(getMaxStubSize()); } } Expected RuntimeDyldELF::processRelocationRef( unsigned SectionID, relocation_iterator RelI, const ObjectFile &O, ObjSectionToIDMap &ObjSectionToID, StubMap &Stubs) { const auto &Obj = cast(O); uint64_t RelType = RelI->getType(); int64_t Addend = 0; if (Expected AddendOrErr = ELFRelocationRef(*RelI).getAddend()) Addend = *AddendOrErr; else consumeError(AddendOrErr.takeError()); elf_symbol_iterator Symbol = RelI->getSymbol(); // Obtain the symbol name which is referenced in the relocation StringRef TargetName; if (Symbol != Obj.symbol_end()) { if (auto TargetNameOrErr = Symbol->getName()) TargetName = *TargetNameOrErr; else return TargetNameOrErr.takeError(); } LLVM_DEBUG(dbgs() << "\t\tRelType: " << RelType << " Addend: " << Addend << " TargetName: " << TargetName << "\n"); RelocationValueRef Value; // First search for the symbol in the local symbol table SymbolRef::Type SymType = SymbolRef::ST_Unknown; // Search for the symbol in the global symbol table RTDyldSymbolTable::const_iterator gsi = GlobalSymbolTable.end(); if (Symbol != Obj.symbol_end()) { gsi = GlobalSymbolTable.find(TargetName.data()); Expected SymTypeOrErr = Symbol->getType(); if (!SymTypeOrErr) { std::string Buf; raw_string_ostream OS(Buf); logAllUnhandledErrors(SymTypeOrErr.takeError(), OS); OS.flush(); report_fatal_error(Buf); } SymType = *SymTypeOrErr; } if (gsi != GlobalSymbolTable.end()) { const auto &SymInfo = gsi->second; Value.SectionID = SymInfo.getSectionID(); Value.Offset = SymInfo.getOffset(); Value.Addend = SymInfo.getOffset() + Addend; } else { switch (SymType) { case SymbolRef::ST_Debug: { // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously // and can be changed by another developers. Maybe best way is add // a new symbol type ST_Section to SymbolRef and use it. auto SectionOrErr = Symbol->getSection(); if (!SectionOrErr) { std::string Buf; raw_string_ostream OS(Buf); logAllUnhandledErrors(SectionOrErr.takeError(), OS); OS.flush(); report_fatal_error(Buf); } section_iterator si = *SectionOrErr; if (si == Obj.section_end()) llvm_unreachable("Symbol section not found, bad object file format!"); LLVM_DEBUG(dbgs() << "\t\tThis is section symbol\n"); bool isCode = si->isText(); if (auto SectionIDOrErr = findOrEmitSection(Obj, (*si), isCode, ObjSectionToID)) Value.SectionID = *SectionIDOrErr; else return SectionIDOrErr.takeError(); Value.Addend = Addend; break; } case SymbolRef::ST_Data: case SymbolRef::ST_Function: case SymbolRef::ST_Unknown: { Value.SymbolName = TargetName.data(); Value.Addend = Addend; // Absolute relocations will have a zero symbol ID (STN_UNDEF), which // will manifest here as a NULL symbol name. // We can set this as a valid (but empty) symbol name, and rely // on addRelocationForSymbol to handle this. if (!Value.SymbolName) Value.SymbolName = ""; break; } default: llvm_unreachable("Unresolved symbol type!"); break; } } uint64_t Offset = RelI->getOffset(); LLVM_DEBUG(dbgs() << "\t\tSectionID: " << SectionID << " Offset: " << Offset << "\n"); if ((Arch == Triple::aarch64 || Arch == Triple::aarch64_be)) { if (RelType == ELF::R_AARCH64_CALL26 || RelType == ELF::R_AARCH64_JUMP26) { resolveAArch64Branch(SectionID, Value, RelI, Stubs); } else if (RelType == ELF::R_AARCH64_ADR_GOT_PAGE) { // Craete new GOT entry or find existing one. If GOT entry is // to be created, then we also emit ABS64 relocation for it. uint64_t GOTOffset = findOrAllocGOTEntry(Value, ELF::R_AARCH64_ABS64); resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend, ELF::R_AARCH64_ADR_PREL_PG_HI21); } else if (RelType == ELF::R_AARCH64_LD64_GOT_LO12_NC) { uint64_t GOTOffset = findOrAllocGOTEntry(Value, ELF::R_AARCH64_ABS64); resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend, ELF::R_AARCH64_LDST64_ABS_LO12_NC); } else { processSimpleRelocation(SectionID, Offset, RelType, Value); } } else if (Arch == Triple::arm) { if (RelType == ELF::R_ARM_PC24 || RelType == ELF::R_ARM_CALL || RelType == ELF::R_ARM_JUMP24) { // This is an ARM branch relocation, need to use a stub function. LLVM_DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.\n"); SectionEntry &Section = Sections[SectionID]; // Look for an existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { resolveRelocation( Section, Offset, reinterpret_cast(Section.getAddressWithOffset(i->second)), RelType, 0); LLVM_DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. LLVM_DEBUG(dbgs() << " Create a new stub function\n"); Stubs[Value] = Section.getStubOffset(); uint8_t *StubTargetAddr = createStubFunction( Section.getAddressWithOffset(Section.getStubOffset())); RelocationEntry RE(SectionID, StubTargetAddr - Section.getAddress(), ELF::R_ARM_ABS32, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); resolveRelocation(Section, Offset, reinterpret_cast( Section.getAddressWithOffset( Section.getStubOffset())), RelType, 0); Section.advanceStubOffset(getMaxStubSize()); } } else { uint32_t *Placeholder = reinterpret_cast(computePlaceholderAddress(SectionID, Offset)); if (RelType == ELF::R_ARM_PREL31 || RelType == ELF::R_ARM_TARGET1 || RelType == ELF::R_ARM_ABS32) { Value.Addend += *Placeholder; } else if (RelType == ELF::R_ARM_MOVW_ABS_NC || RelType == ELF::R_ARM_MOVT_ABS) { // See ELF for ARM documentation Value.Addend += (int16_t)((*Placeholder & 0xFFF) | (((*Placeholder >> 16) & 0xF) << 12)); } processSimpleRelocation(SectionID, Offset, RelType, Value); } } else if (IsMipsO32ABI) { uint8_t *Placeholder = reinterpret_cast( computePlaceholderAddress(SectionID, Offset)); uint32_t Opcode = readBytesUnaligned(Placeholder, 4); if (RelType == ELF::R_MIPS_26) { // This is an Mips branch relocation, need to use a stub function. LLVM_DEBUG(dbgs() << "\t\tThis is a Mips branch relocation."); SectionEntry &Section = Sections[SectionID]; // Extract the addend from the instruction. // We shift up by two since the Value will be down shifted again // when applying the relocation. uint32_t Addend = (Opcode & 0x03ffffff) << 2; Value.Addend += Addend; // Look up for existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { RelocationEntry RE(SectionID, Offset, RelType, i->second); addRelocationForSection(RE, SectionID); LLVM_DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. LLVM_DEBUG(dbgs() << " Create a new stub function\n"); Stubs[Value] = Section.getStubOffset(); unsigned AbiVariant = Obj.getPlatformFlags(); uint8_t *StubTargetAddr = createStubFunction( Section.getAddressWithOffset(Section.getStubOffset()), AbiVariant); // Creating Hi and Lo relocations for the filled stub instructions. RelocationEntry REHi(SectionID, StubTargetAddr - Section.getAddress(), ELF::R_MIPS_HI16, Value.Addend); RelocationEntry RELo(SectionID, StubTargetAddr - Section.getAddress() + 4, ELF::R_MIPS_LO16, Value.Addend); if (Value.SymbolName) { addRelocationForSymbol(REHi, Value.SymbolName); addRelocationForSymbol(RELo, Value.SymbolName); } else { addRelocationForSection(REHi, Value.SectionID); addRelocationForSection(RELo, Value.SectionID); } RelocationEntry RE(SectionID, Offset, RelType, Section.getStubOffset()); addRelocationForSection(RE, SectionID); Section.advanceStubOffset(getMaxStubSize()); } } else if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16) { int64_t Addend = (Opcode & 0x0000ffff) << 16; RelocationEntry RE(SectionID, Offset, RelType, Addend); PendingRelocs.push_back(std::make_pair(Value, RE)); } else if (RelType == ELF::R_MIPS_LO16 || RelType == ELF::R_MIPS_PCLO16) { int64_t Addend = Value.Addend + SignExtend32<16>(Opcode & 0x0000ffff); for (auto I = PendingRelocs.begin(); I != PendingRelocs.end();) { const RelocationValueRef &MatchingValue = I->first; RelocationEntry &Reloc = I->second; if (MatchingValue == Value && RelType == getMatchingLoRelocation(Reloc.RelType) && SectionID == Reloc.SectionID) { Reloc.Addend += Addend; if (Value.SymbolName) addRelocationForSymbol(Reloc, Value.SymbolName); else addRelocationForSection(Reloc, Value.SectionID); I = PendingRelocs.erase(I); } else ++I; } RelocationEntry RE(SectionID, Offset, RelType, Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); } else { if (RelType == ELF::R_MIPS_32) Value.Addend += Opcode; else if (RelType == ELF::R_MIPS_PC16) Value.Addend += SignExtend32<18>((Opcode & 0x0000ffff) << 2); else if (RelType == ELF::R_MIPS_PC19_S2) Value.Addend += SignExtend32<21>((Opcode & 0x0007ffff) << 2); else if (RelType == ELF::R_MIPS_PC21_S2) Value.Addend += SignExtend32<23>((Opcode & 0x001fffff) << 2); else if (RelType == ELF::R_MIPS_PC26_S2) Value.Addend += SignExtend32<28>((Opcode & 0x03ffffff) << 2); processSimpleRelocation(SectionID, Offset, RelType, Value); } } else if (IsMipsN32ABI || IsMipsN64ABI) { uint32_t r_type = RelType & 0xff; RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); if (r_type == ELF::R_MIPS_CALL16 || r_type == ELF::R_MIPS_GOT_PAGE || r_type == ELF::R_MIPS_GOT_DISP) { StringMap::iterator i = GOTSymbolOffsets.find(TargetName); if (i != GOTSymbolOffsets.end()) RE.SymOffset = i->second; else { RE.SymOffset = allocateGOTEntries(1); GOTSymbolOffsets[TargetName] = RE.SymOffset; } if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); } else if (RelType == ELF::R_MIPS_26) { // This is an Mips branch relocation, need to use a stub function. LLVM_DEBUG(dbgs() << "\t\tThis is a Mips branch relocation."); SectionEntry &Section = Sections[SectionID]; // Look up for existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { RelocationEntry RE(SectionID, Offset, RelType, i->second); addRelocationForSection(RE, SectionID); LLVM_DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. LLVM_DEBUG(dbgs() << " Create a new stub function\n"); Stubs[Value] = Section.getStubOffset(); unsigned AbiVariant = Obj.getPlatformFlags(); uint8_t *StubTargetAddr = createStubFunction( Section.getAddressWithOffset(Section.getStubOffset()), AbiVariant); if (IsMipsN32ABI) { // Creating Hi and Lo relocations for the filled stub instructions. RelocationEntry REHi(SectionID, StubTargetAddr - Section.getAddress(), ELF::R_MIPS_HI16, Value.Addend); RelocationEntry RELo(SectionID, StubTargetAddr - Section.getAddress() + 4, ELF::R_MIPS_LO16, Value.Addend); if (Value.SymbolName) { addRelocationForSymbol(REHi, Value.SymbolName); addRelocationForSymbol(RELo, Value.SymbolName); } else { addRelocationForSection(REHi, Value.SectionID); addRelocationForSection(RELo, Value.SectionID); } } else { // Creating Highest, Higher, Hi and Lo relocations for the filled stub // instructions. RelocationEntry REHighest(SectionID, StubTargetAddr - Section.getAddress(), ELF::R_MIPS_HIGHEST, Value.Addend); RelocationEntry REHigher(SectionID, StubTargetAddr - Section.getAddress() + 4, ELF::R_MIPS_HIGHER, Value.Addend); RelocationEntry REHi(SectionID, StubTargetAddr - Section.getAddress() + 12, ELF::R_MIPS_HI16, Value.Addend); RelocationEntry RELo(SectionID, StubTargetAddr - Section.getAddress() + 20, ELF::R_MIPS_LO16, Value.Addend); if (Value.SymbolName) { addRelocationForSymbol(REHighest, Value.SymbolName); addRelocationForSymbol(REHigher, Value.SymbolName); addRelocationForSymbol(REHi, Value.SymbolName); addRelocationForSymbol(RELo, Value.SymbolName); } else { addRelocationForSection(REHighest, Value.SectionID); addRelocationForSection(REHigher, Value.SectionID); addRelocationForSection(REHi, Value.SectionID); addRelocationForSection(RELo, Value.SectionID); } } RelocationEntry RE(SectionID, Offset, RelType, Section.getStubOffset()); addRelocationForSection(RE, SectionID); Section.advanceStubOffset(getMaxStubSize()); } } else { processSimpleRelocation(SectionID, Offset, RelType, Value); } } else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) { if (RelType == ELF::R_PPC64_REL24) { // Determine ABI variant in use for this object. unsigned AbiVariant = Obj.getPlatformFlags(); AbiVariant &= ELF::EF_PPC64_ABI; // A PPC branch relocation will need a stub function if the target is // an external symbol (either Value.SymbolName is set, or SymType is // Symbol::ST_Unknown) or if the target address is not within the // signed 24-bits branch address. SectionEntry &Section = Sections[SectionID]; uint8_t *Target = Section.getAddressWithOffset(Offset); bool RangeOverflow = false; bool IsExtern = Value.SymbolName || SymType == SymbolRef::ST_Unknown; if (!IsExtern) { if (AbiVariant != 2) { // In the ELFv1 ABI, a function call may point to the .opd entry, // so the final symbol value is calculated based on the relocation // values in the .opd section. if (auto Err = findOPDEntrySection(Obj, ObjSectionToID, Value)) return std::move(Err); } else { // In the ELFv2 ABI, a function symbol may provide a local entry // point, which must be used for direct calls. if (Value.SectionID == SectionID){ uint8_t SymOther = Symbol->getOther(); Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther); } } uint8_t *RelocTarget = Sections[Value.SectionID].getAddressWithOffset(Value.Addend); int64_t delta = static_cast(Target - RelocTarget); // If it is within 26-bits branch range, just set the branch target if (SignExtend64<26>(delta) != delta) { RangeOverflow = true; } else if ((AbiVariant != 2) || (AbiVariant == 2 && Value.SectionID == SectionID)) { RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); addRelocationForSection(RE, Value.SectionID); } } if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID) || RangeOverflow) { // It is an external symbol (either Value.SymbolName is set, or // SymType is SymbolRef::ST_Unknown) or out of range. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { // Symbol function stub already created, just relocate to it resolveRelocation(Section, Offset, reinterpret_cast( Section.getAddressWithOffset(i->second)), RelType, 0); LLVM_DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. LLVM_DEBUG(dbgs() << " Create a new stub function\n"); Stubs[Value] = Section.getStubOffset(); uint8_t *StubTargetAddr = createStubFunction( Section.getAddressWithOffset(Section.getStubOffset()), AbiVariant); RelocationEntry RE(SectionID, StubTargetAddr - Section.getAddress(), ELF::R_PPC64_ADDR64, Value.Addend); // Generates the 64-bits address loads as exemplified in section // 4.5.1 in PPC64 ELF ABI. Note that the relocations need to // apply to the low part of the instructions, so we have to update // the offset according to the target endianness. uint64_t StubRelocOffset = StubTargetAddr - Section.getAddress(); if (!IsTargetLittleEndian) StubRelocOffset += 2; RelocationEntry REhst(SectionID, StubRelocOffset + 0, ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend); RelocationEntry REhr(SectionID, StubRelocOffset + 4, ELF::R_PPC64_ADDR16_HIGHER, Value.Addend); RelocationEntry REh(SectionID, StubRelocOffset + 12, ELF::R_PPC64_ADDR16_HI, Value.Addend); RelocationEntry REl(SectionID, StubRelocOffset + 16, ELF::R_PPC64_ADDR16_LO, Value.Addend); if (Value.SymbolName) { addRelocationForSymbol(REhst, Value.SymbolName); addRelocationForSymbol(REhr, Value.SymbolName); addRelocationForSymbol(REh, Value.SymbolName); addRelocationForSymbol(REl, Value.SymbolName); } else { addRelocationForSection(REhst, Value.SectionID); addRelocationForSection(REhr, Value.SectionID); addRelocationForSection(REh, Value.SectionID); addRelocationForSection(REl, Value.SectionID); } resolveRelocation(Section, Offset, reinterpret_cast( Section.getAddressWithOffset( Section.getStubOffset())), RelType, 0); Section.advanceStubOffset(getMaxStubSize()); } if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID)) { // Restore the TOC for external calls if (AbiVariant == 2) writeInt32BE(Target + 4, 0xE8410018); // ld r2,24(r1) else writeInt32BE(Target + 4, 0xE8410028); // ld r2,40(r1) } } } else if (RelType == ELF::R_PPC64_TOC16 || RelType == ELF::R_PPC64_TOC16_DS || RelType == ELF::R_PPC64_TOC16_LO || RelType == ELF::R_PPC64_TOC16_LO_DS || RelType == ELF::R_PPC64_TOC16_HI || RelType == ELF::R_PPC64_TOC16_HA) { // These relocations are supposed to subtract the TOC address from // the final value. This does not fit cleanly into the RuntimeDyld // scheme, since there may be *two* sections involved in determining // the relocation value (the section of the symbol referred to by the // relocation, and the TOC section associated with the current module). // // Fortunately, these relocations are currently only ever generated // referring to symbols that themselves reside in the TOC, which means // that the two sections are actually the same. Thus they cancel out // and we can immediately resolve the relocation right now. switch (RelType) { case ELF::R_PPC64_TOC16: RelType = ELF::R_PPC64_ADDR16; break; case ELF::R_PPC64_TOC16_DS: RelType = ELF::R_PPC64_ADDR16_DS; break; case ELF::R_PPC64_TOC16_LO: RelType = ELF::R_PPC64_ADDR16_LO; break; case ELF::R_PPC64_TOC16_LO_DS: RelType = ELF::R_PPC64_ADDR16_LO_DS; break; case ELF::R_PPC64_TOC16_HI: RelType = ELF::R_PPC64_ADDR16_HI; break; case ELF::R_PPC64_TOC16_HA: RelType = ELF::R_PPC64_ADDR16_HA; break; default: llvm_unreachable("Wrong relocation type."); } RelocationValueRef TOCValue; if (auto Err = findPPC64TOCSection(Obj, ObjSectionToID, TOCValue)) return std::move(Err); if (Value.SymbolName || Value.SectionID != TOCValue.SectionID) llvm_unreachable("Unsupported TOC relocation."); Value.Addend -= TOCValue.Addend; resolveRelocation(Sections[SectionID], Offset, Value.Addend, RelType, 0); } else { // There are two ways to refer to the TOC address directly: either // via a ELF::R_PPC64_TOC relocation (where both symbol and addend are // ignored), or via any relocation that refers to the magic ".TOC." // symbols (in which case the addend is respected). if (RelType == ELF::R_PPC64_TOC) { RelType = ELF::R_PPC64_ADDR64; if (auto Err = findPPC64TOCSection(Obj, ObjSectionToID, Value)) return std::move(Err); } else if (TargetName == ".TOC.") { if (auto Err = findPPC64TOCSection(Obj, ObjSectionToID, Value)) return std::move(Err); Value.Addend += Addend; } RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); } } else if (Arch == Triple::systemz && (RelType == ELF::R_390_PLT32DBL || RelType == ELF::R_390_GOTENT)) { // Create function stubs for both PLT and GOT references, regardless of // whether the GOT reference is to data or code. The stub contains the // full address of the symbol, as needed by GOT references, and the // executable part only adds an overhead of 8 bytes. // // We could try to conserve space by allocating the code and data // parts of the stub separately. However, as things stand, we allocate // a stub for every relocation, so using a GOT in JIT code should be // no less space efficient than using an explicit constant pool. LLVM_DEBUG(dbgs() << "\t\tThis is a SystemZ indirect relocation."); SectionEntry &Section = Sections[SectionID]; // Look for an existing stub. StubMap::const_iterator i = Stubs.find(Value); uintptr_t StubAddress; if (i != Stubs.end()) { StubAddress = uintptr_t(Section.getAddressWithOffset(i->second)); LLVM_DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. LLVM_DEBUG(dbgs() << " Create a new stub function\n"); uintptr_t BaseAddress = uintptr_t(Section.getAddress()); uintptr_t StubAlignment = getStubAlignment(); StubAddress = (BaseAddress + Section.getStubOffset() + StubAlignment - 1) & -StubAlignment; unsigned StubOffset = StubAddress - BaseAddress; Stubs[Value] = StubOffset; createStubFunction((uint8_t *)StubAddress); RelocationEntry RE(SectionID, StubOffset + 8, ELF::R_390_64, Value.Offset); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); Section.advanceStubOffset(getMaxStubSize()); } if (RelType == ELF::R_390_GOTENT) resolveRelocation(Section, Offset, StubAddress + 8, ELF::R_390_PC32DBL, Addend); else resolveRelocation(Section, Offset, StubAddress, RelType, Addend); } else if (Arch == Triple::x86_64) { if (RelType == ELF::R_X86_64_PLT32) { // The way the PLT relocations normally work is that the linker allocates // the // PLT and this relocation makes a PC-relative call into the PLT. The PLT // entry will then jump to an address provided by the GOT. On first call, // the // GOT address will point back into PLT code that resolves the symbol. After // the first call, the GOT entry points to the actual function. // // For local functions we're ignoring all of that here and just replacing // the PLT32 relocation type with PC32, which will translate the relocation // into a PC-relative call directly to the function. For external symbols we // can't be sure the function will be within 2^32 bytes of the call site, so // we need to create a stub, which calls into the GOT. This case is // equivalent to the usual PLT implementation except that we use the stub // mechanism in RuntimeDyld (which puts stubs at the end of the section) // rather than allocating a PLT section. if (Value.SymbolName) { // This is a call to an external function. // Look for an existing stub. SectionEntry &Section = Sections[SectionID]; StubMap::const_iterator i = Stubs.find(Value); uintptr_t StubAddress; if (i != Stubs.end()) { StubAddress = uintptr_t(Section.getAddress()) + i->second; LLVM_DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function (equivalent to a PLT entry). LLVM_DEBUG(dbgs() << " Create a new stub function\n"); uintptr_t BaseAddress = uintptr_t(Section.getAddress()); uintptr_t StubAlignment = getStubAlignment(); StubAddress = (BaseAddress + Section.getStubOffset() + StubAlignment - 1) & -StubAlignment; unsigned StubOffset = StubAddress - BaseAddress; Stubs[Value] = StubOffset; createStubFunction((uint8_t *)StubAddress); // Bump our stub offset counter Section.advanceStubOffset(getMaxStubSize()); // Allocate a GOT Entry uint64_t GOTOffset = allocateGOTEntries(1); // The load of the GOT address has an addend of -4 resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4, ELF::R_X86_64_PC32); // Fill in the value of the symbol we're targeting into the GOT addRelocationForSymbol( computeGOTOffsetRE(GOTOffset, 0, ELF::R_X86_64_64), Value.SymbolName); } // Make the target call a call into the stub table. resolveRelocation(Section, Offset, StubAddress, ELF::R_X86_64_PC32, Addend); } else { RelocationEntry RE(SectionID, Offset, ELF::R_X86_64_PC32, Value.Addend, Value.Offset); addRelocationForSection(RE, Value.SectionID); } } else if (RelType == ELF::R_X86_64_GOTPCREL || RelType == ELF::R_X86_64_GOTPCRELX || RelType == ELF::R_X86_64_REX_GOTPCRELX) { uint64_t GOTOffset = allocateGOTEntries(1); resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend, ELF::R_X86_64_PC32); // Fill in the value of the symbol we're targeting into the GOT RelocationEntry RE = computeGOTOffsetRE(GOTOffset, Value.Offset, ELF::R_X86_64_64); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); } else if (RelType == ELF::R_X86_64_GOT64) { // Fill in a 64-bit GOT offset. uint64_t GOTOffset = allocateGOTEntries(1); resolveRelocation(Sections[SectionID], Offset, GOTOffset, ELF::R_X86_64_64, 0); // Fill in the value of the symbol we're targeting into the GOT RelocationEntry RE = computeGOTOffsetRE(GOTOffset, Value.Offset, ELF::R_X86_64_64); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); } else if (RelType == ELF::R_X86_64_GOTPC64) { // Materialize the address of the base of the GOT relative to the PC. // This doesn't create a GOT entry, but it does mean we need a GOT // section. (void)allocateGOTEntries(0); resolveGOTOffsetRelocation(SectionID, Offset, Addend, ELF::R_X86_64_PC64); } else if (RelType == ELF::R_X86_64_GOTOFF64) { // GOTOFF relocations ultimately require a section difference relocation. (void)allocateGOTEntries(0); processSimpleRelocation(SectionID, Offset, RelType, Value); } else if (RelType == ELF::R_X86_64_PC32) { Value.Addend += support::ulittle32_t::ref(computePlaceholderAddress(SectionID, Offset)); processSimpleRelocation(SectionID, Offset, RelType, Value); } else if (RelType == ELF::R_X86_64_PC64) { Value.Addend += support::ulittle64_t::ref(computePlaceholderAddress(SectionID, Offset)); processSimpleRelocation(SectionID, Offset, RelType, Value); } else { processSimpleRelocation(SectionID, Offset, RelType, Value); } } else { if (Arch == Triple::x86) { Value.Addend += support::ulittle32_t::ref(computePlaceholderAddress(SectionID, Offset)); } processSimpleRelocation(SectionID, Offset, RelType, Value); } return ++RelI; } size_t RuntimeDyldELF::getGOTEntrySize() { // We don't use the GOT in all of these cases, but it's essentially free // to put them all here. size_t Result = 0; switch (Arch) { case Triple::x86_64: case Triple::aarch64: case Triple::aarch64_be: case Triple::ppc64: case Triple::ppc64le: case Triple::systemz: Result = sizeof(uint64_t); break; case Triple::x86: case Triple::arm: case Triple::thumb: Result = sizeof(uint32_t); break; case Triple::mips: case Triple::mipsel: case Triple::mips64: case Triple::mips64el: if (IsMipsO32ABI || IsMipsN32ABI) Result = sizeof(uint32_t); else if (IsMipsN64ABI) Result = sizeof(uint64_t); else llvm_unreachable("Mips ABI not handled"); break; default: llvm_unreachable("Unsupported CPU type!"); } return Result; } uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned no) { if (GOTSectionID == 0) { GOTSectionID = Sections.size(); // Reserve a section id. We'll allocate the section later // once we know the total size Sections.push_back(SectionEntry(".got", nullptr, 0, 0, 0)); } uint64_t StartOffset = CurrentGOTIndex * getGOTEntrySize(); CurrentGOTIndex += no; return StartOffset; } uint64_t RuntimeDyldELF::findOrAllocGOTEntry(const RelocationValueRef &Value, unsigned GOTRelType) { auto E = GOTOffsetMap.insert({Value, 0}); if (E.second) { uint64_t GOTOffset = allocateGOTEntries(1); // Create relocation for newly created GOT entry RelocationEntry RE = computeGOTOffsetRE(GOTOffset, Value.Offset, GOTRelType); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); E.first->second = GOTOffset; } return E.first->second; } void RuntimeDyldELF::resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset, uint64_t GOTOffset, uint32_t Type) { // Fill in the relative address of the GOT Entry into the stub RelocationEntry GOTRE(SectionID, Offset, Type, GOTOffset); addRelocationForSection(GOTRE, GOTSectionID); } RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(uint64_t GOTOffset, uint64_t SymbolOffset, uint32_t Type) { return RelocationEntry(GOTSectionID, GOTOffset, Type, SymbolOffset); } Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) { if (IsMipsO32ABI) if (!PendingRelocs.empty()) return make_error("Can't find matching LO16 reloc"); // If necessary, allocate the global offset table if (GOTSectionID != 0) { // Allocate memory for the section size_t TotalSize = CurrentGOTIndex * getGOTEntrySize(); uint8_t *Addr = MemMgr.allocateDataSection(TotalSize, getGOTEntrySize(), GOTSectionID, ".got", false); if (!Addr) return make_error("Unable to allocate memory for GOT!"); Sections[GOTSectionID] = SectionEntry(".got", Addr, TotalSize, TotalSize, 0); // For now, initialize all GOT entries to zero. We'll fill them in as // needed when GOT-based relocations are applied. memset(Addr, 0, TotalSize); if (IsMipsN32ABI || IsMipsN64ABI) { // To correctly resolve Mips GOT relocations, we need a mapping from // object's sections to GOTs. for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); SI != SE; ++SI) { if (SI->relocation_begin() != SI->relocation_end()) { Expected RelSecOrErr = SI->getRelocatedSection(); if (!RelSecOrErr) return make_error( toString(RelSecOrErr.takeError())); section_iterator RelocatedSection = *RelSecOrErr; ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection); assert (i != SectionMap.end()); SectionToGOTMap[i->second] = GOTSectionID; } } GOTSymbolOffsets.clear(); } } // Look for and record the EH frame section. ObjSectionToIDMap::iterator i, e; for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) { const SectionRef &Section = i->first; StringRef Name; Expected NameOrErr = Section.getName(); if (NameOrErr) Name = *NameOrErr; else consumeError(NameOrErr.takeError()); if (Name == ".eh_frame") { UnregisteredEHFrameSections.push_back(i->second); break; } } GOTSectionID = 0; CurrentGOTIndex = 0; return Error::success(); } bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const { return Obj.isELF(); } bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const { unsigned RelTy = R.getType(); if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) return RelTy == ELF::R_AARCH64_ADR_GOT_PAGE || RelTy == ELF::R_AARCH64_LD64_GOT_LO12_NC; if (Arch == Triple::x86_64) return RelTy == ELF::R_X86_64_GOTPCREL || RelTy == ELF::R_X86_64_GOTPCRELX || RelTy == ELF::R_X86_64_GOT64 || RelTy == ELF::R_X86_64_REX_GOTPCRELX; return false; } bool RuntimeDyldELF::relocationNeedsStub(const RelocationRef &R) const { if (Arch != Triple::x86_64) return true; // Conservative answer switch (R.getType()) { default: return true; // Conservative answer case ELF::R_X86_64_GOTPCREL: case ELF::R_X86_64_GOTPCRELX: case ELF::R_X86_64_REX_GOTPCRELX: case ELF::R_X86_64_GOTPC64: case ELF::R_X86_64_GOT64: case ELF::R_X86_64_GOTOFF64: case ELF::R_X86_64_PC32: case ELF::R_X86_64_PC64: case ELF::R_X86_64_64: // We know that these reloation types won't need a stub function. This list // can be extended as needed. return false; } } } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp index aa50bd05cb71..aaadc8dc1b60 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp @@ -1,126 +1,150 @@ //= AArch64WinCOFFObjectWriter.cpp - AArch64 Windows COFF Object Writer C++ =// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===---------------------------------------------------------------------===// #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; namespace { class AArch64WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { public: AArch64WinCOFFObjectWriter() : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) {} ~AArch64WinCOFFObjectWriter() override = default; unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const override; bool recordRelocation(const MCFixup &) const override; }; } // end anonymous namespace unsigned AArch64WinCOFFObjectWriter::getRelocType( MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const { auto Modifier = Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); const MCExpr *Expr = Fixup.getValue(); + if (const AArch64MCExpr *A64E = dyn_cast(Expr)) { + AArch64MCExpr::VariantKind RefKind = A64E->getKind(); + switch (AArch64MCExpr::getSymbolLoc(RefKind)) { + case AArch64MCExpr::VK_ABS: + case AArch64MCExpr::VK_SECREL: + // Supported + break; + default: + Ctx.reportError(Fixup.getLoc(), "relocation variant " + + A64E->getVariantKindName() + + " unsupported on COFF targets"); + return COFF::IMAGE_REL_ARM64_ABSOLUTE; // Dummy return value + } + } + switch (static_cast(Fixup.getKind())) { default: { - const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind()); - report_fatal_error(Twine("unsupported relocation type: ") + Info.Name); + if (const AArch64MCExpr *A64E = dyn_cast(Expr)) { + Ctx.reportError(Fixup.getLoc(), "relocation type " + + A64E->getVariantKindName() + + " unsupported on COFF targets"); + } else { + const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind()); + Ctx.reportError(Fixup.getLoc(), Twine("relocation type ") + Info.Name + + " unsupported on COFF targets"); + } + return COFF::IMAGE_REL_ARM64_ABSOLUTE; // Dummy return value } case FK_Data_4: switch (Modifier) { default: return COFF::IMAGE_REL_ARM64_ADDR32; case MCSymbolRefExpr::VK_COFF_IMGREL32: return COFF::IMAGE_REL_ARM64_ADDR32NB; case MCSymbolRefExpr::VK_SECREL: return COFF::IMAGE_REL_ARM64_SECREL; } case FK_Data_8: return COFF::IMAGE_REL_ARM64_ADDR64; case FK_SecRel_2: return COFF::IMAGE_REL_ARM64_SECTION; case FK_SecRel_4: return COFF::IMAGE_REL_ARM64_SECREL; case AArch64::fixup_aarch64_add_imm12: if (const AArch64MCExpr *A64E = dyn_cast(Expr)) { AArch64MCExpr::VariantKind RefKind = A64E->getKind(); if (RefKind == AArch64MCExpr::VK_SECREL_LO12) return COFF::IMAGE_REL_ARM64_SECREL_LOW12A; if (RefKind == AArch64MCExpr::VK_SECREL_HI12) return COFF::IMAGE_REL_ARM64_SECREL_HIGH12A; } return COFF::IMAGE_REL_ARM64_PAGEOFFSET_12A; case AArch64::fixup_aarch64_ldst_imm12_scale1: case AArch64::fixup_aarch64_ldst_imm12_scale2: case AArch64::fixup_aarch64_ldst_imm12_scale4: case AArch64::fixup_aarch64_ldst_imm12_scale8: case AArch64::fixup_aarch64_ldst_imm12_scale16: if (const AArch64MCExpr *A64E = dyn_cast(Expr)) { AArch64MCExpr::VariantKind RefKind = A64E->getKind(); if (RefKind == AArch64MCExpr::VK_SECREL_LO12) return COFF::IMAGE_REL_ARM64_SECREL_LOW12L; } return COFF::IMAGE_REL_ARM64_PAGEOFFSET_12L; case AArch64::fixup_aarch64_pcrel_adr_imm21: return COFF::IMAGE_REL_ARM64_REL21; case AArch64::fixup_aarch64_pcrel_adrp_imm21: return COFF::IMAGE_REL_ARM64_PAGEBASE_REL21; case AArch64::fixup_aarch64_pcrel_branch14: return COFF::IMAGE_REL_ARM64_BRANCH14; case AArch64::fixup_aarch64_pcrel_branch19: return COFF::IMAGE_REL_ARM64_BRANCH19; case AArch64::fixup_aarch64_pcrel_branch26: case AArch64::fixup_aarch64_pcrel_call26: return COFF::IMAGE_REL_ARM64_BRANCH26; } } bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { return true; } namespace llvm { std::unique_ptr createAArch64WinCOFFObjectWriter() { return std::make_unique(); } } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index b09e92c07f9b..45f515c5115e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -1,183 +1,193 @@ //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// This pass adds amdgpu.uniform metadata to IR values so this information /// can be used during instruction selection. // //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "amdgpu-annotate-uniform" using namespace llvm; namespace { class AMDGPUAnnotateUniformValues : public FunctionPass, public InstVisitor { LegacyDivergenceAnalysis *DA; MemoryDependenceResults *MDR; LoopInfo *LI; DenseMap noClobberClones; bool isEntryFunc; public: static char ID; AMDGPUAnnotateUniformValues() : FunctionPass(ID) { } bool doInitialization(Module &M) override; bool runOnFunction(Function &F) override; StringRef getPassName() const override { return "AMDGPU Annotate Uniform Values"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); } void visitBranchInst(BranchInst &I); void visitLoadInst(LoadInst &I); bool isClobberedInFunction(LoadInst * Load); }; } // End anonymous namespace INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, "Add AMDGPU uniform metadata", false, false) INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, "Add AMDGPU uniform metadata", false, false) char AMDGPUAnnotateUniformValues::ID = 0; static void setUniformMetadata(Instruction *I) { I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); } static void setNoClobberMetadata(Instruction *I) { I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); } static void DFS(BasicBlock *Root, SetVector & Set) { for (auto I : predecessors(Root)) if (Set.insert(I)) DFS(I, Set); } bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { // 1. get Loop for the Load->getparent(); // 2. if it exists, collect all the BBs from the most outer // loop and check for the writes. If NOT - start DFS over all preds. // 3. Start DFS over all preds from the most outer loop header. SetVector Checklist; BasicBlock *Start = Load->getParent(); Checklist.insert(Start); const Value *Ptr = Load->getPointerOperand(); const Loop *L = LI->getLoopFor(Start); if (L) { const Loop *P = L; do { L = P; P = P->getParentLoop(); } while (P); Checklist.insert(L->block_begin(), L->block_end()); Start = L->getHeader(); } DFS(Start, Checklist); for (auto &BB : Checklist) { BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? BasicBlock::iterator(Load) : BB->end(); auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, StartIt, BB, Load); if (Q.isClobber() || Q.isUnknown()) return true; } return false; } void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { if (DA->isUniform(&I)) setUniformMetadata(I.getParent()->getTerminator()); } void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { Value *Ptr = I.getPointerOperand(); if (!DA->isUniform(Ptr)) return; auto isGlobalLoad = [&](LoadInst &Load)->bool { return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }; // We're tracking up to the Function boundaries, and cannot go beyond because // of FunctionPass restrictions. We can ensure that is memory not clobbered // for memory operations that are live in to entry points only. - bool NotClobbered = isEntryFunc && !isClobberedInFunction(&I); Instruction *PtrI = dyn_cast(Ptr); - if (!PtrI && NotClobbered && isGlobalLoad(I)) { - if (isa(Ptr) || isa(Ptr)) { + + if (!isEntryFunc) { + if (PtrI) + setUniformMetadata(PtrI); + return; + } + + bool NotClobbered = false; + if (PtrI) + NotClobbered = !isClobberedInFunction(&I); + else if (isa(Ptr) || isa(Ptr)) { + if (isGlobalLoad(I) && !isClobberedInFunction(&I)) { + NotClobbered = true; // Lookup for the existing GEP if (noClobberClones.count(Ptr)) { PtrI = noClobberClones[Ptr]; } else { // Create GEP of the Value Function *F = I.getParent()->getParent(); Value *Idx = Constant::getIntegerValue( Type::getInt32Ty(Ptr->getContext()), APInt(64, 0)); // Insert GEP at the entry to make it dominate all uses PtrI = GetElementPtrInst::Create( Ptr->getType()->getPointerElementType(), Ptr, ArrayRef(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI()); } I.replaceUsesOfWith(Ptr, PtrI); } } if (PtrI) { setUniformMetadata(PtrI); if (NotClobbered) setNoClobberMetadata(PtrI); } } bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { return false; } bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { if (skipFunction(F)) return false; DA = &getAnalysis(); MDR = &getAnalysis().getMemDep(); LI = &getAnalysis().getLoopInfo(); isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv()); visit(F); noClobberClones.clear(); return true; } FunctionPass * llvm::createAMDGPUAnnotateUniformValues() { return new AMDGPUAnnotateUniformValues(); } diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 11c97210ead9..9a4c57fedac2 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1,4668 +1,4677 @@ //===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains the PowerPC implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// #include "PPCInstrInfo.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCHazardRecognizers.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "ppc-instr-info" #define GET_INSTRMAP_INFO #define GET_INSTRINFO_CTOR_DTOR #include "PPCGenInstrInfo.inc" STATISTIC(NumStoreSPILLVSRRCAsVec, "Number of spillvsrrc spilled to stack as vec"); STATISTIC(NumStoreSPILLVSRRCAsGpr, "Number of spillvsrrc spilled to stack as gpr"); STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc"); STATISTIC(CmpIselsConverted, "Number of ISELs that depend on comparison of constants converted"); STATISTIC(MissedConvertibleImmediateInstrs, "Number of compare-immediate instructions fed by constants"); STATISTIC(NumRcRotatesConvertedToRcAnd, "Number of record-form rotates converted to record-form andi"); static cl:: opt DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops")); static cl::opt DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden); static cl::opt VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden); static cl::opt UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation")); // Pin the vtable to this file. void PPCInstrInfo::anchor() {} PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI) : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP, /* CatchRetOpcode */ -1, STI.isPPC64() ? PPC::BLR8 : PPC::BLR), Subtarget(STI), RI(STI.getTargetMachine()) {} /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for /// this target when scheduling the DAG. ScheduleHazardRecognizer * PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const { unsigned Directive = static_cast(STI)->getCPUDirective(); if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { const InstrItineraryData *II = static_cast(STI)->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG); } return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); } /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer /// to use for this target when scheduling the DAG. ScheduleHazardRecognizer * PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { unsigned Directive = DAG->MF.getSubtarget().getCPUDirective(); // FIXME: Leaving this as-is until we have POWER9 scheduling info if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8) return new PPCDispatchGroupSBHazardRecognizer(II, DAG); // Most subtargets use a PPC970 recognizer. if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { assert(DAG->TII && "No InstrInfo?"); return new PPCHazardRecognizer970(*DAG); } return new ScoreboardHazardRecognizer(II, DAG); } unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost) const { if (!ItinData || UseOldLatencyCalc) return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost); // The default implementation of getInstrLatency calls getStageLatency, but // getStageLatency does not do the right thing for us. While we have // itinerary, most cores are fully pipelined, and so the itineraries only // express the first part of the pipeline, not every stage. Instead, we need // to use the listed output operand cycle number (using operand 0 here, which // is an output). unsigned Latency = 1; unsigned DefClass = MI.getDesc().getSchedClass(); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) continue; int Cycle = ItinData->getOperandCycle(DefClass, i); if (Cycle < 0) continue; Latency = std::max(Latency, (unsigned) Cycle); } return Latency; } int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); if (!DefMI.getParent()) return Latency; const MachineOperand &DefMO = DefMI.getOperand(DefIdx); Register Reg = DefMO.getReg(); bool IsRegCR; if (Register::isVirtualRegister(Reg)) { const MachineRegisterInfo *MRI = &DefMI.getParent()->getParent()->getRegInfo(); IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass); } else { IsRegCR = PPC::CRRCRegClass.contains(Reg) || PPC::CRBITRCRegClass.contains(Reg); } if (UseMI.isBranch() && IsRegCR) { if (Latency < 0) Latency = getInstrLatency(ItinData, DefMI); // On some cores, there is an additional delay between writing to a condition // register, and using it from a branch. unsigned Directive = Subtarget.getCPUDirective(); switch (Directive) { default: break; case PPC::DIR_7400: case PPC::DIR_750: case PPC::DIR_970: case PPC::DIR_E5500: case PPC::DIR_PWR4: case PPC::DIR_PWR5: case PPC::DIR_PWR5X: case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: // FIXME: Is this needed for POWER9? Latency += 2; break; } } return Latency; } /// This is an architecture-specific helper function of reassociateOps. /// Set special operand attributes for new instructions after reassociation. void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const { // Propagate FP flags from the original instructions. // But clear poison-generating flags because those may not be valid now. uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); NewMI1.setFlags(IntersectedFlags); NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap); NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap); NewMI1.clearFlag(MachineInstr::MIFlag::IsExact); NewMI2.setFlags(IntersectedFlags); NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap); NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap); NewMI2.clearFlag(MachineInstr::MIFlag::IsExact); } void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const { MI.setFlags(Flags); MI.clearFlag(MachineInstr::MIFlag::NoSWrap); MI.clearFlag(MachineInstr::MIFlag::NoUWrap); MI.clearFlag(MachineInstr::MIFlag::IsExact); } // This function does not list all associative and commutative operations, but // only those worth feeding through the machine combiner in an attempt to // reduce the critical path. Mostly, this means floating-point operations, // because they have high latencies(>=5) (compared to other operations, such as // and/or, which are also associative and commutative, but have low latencies). bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { switch (Inst.getOpcode()) { // Floating point: // FP Add: case PPC::FADD: case PPC::FADDS: // FP Multiply: case PPC::FMUL: case PPC::FMULS: // Altivec Add: case PPC::VADDFP: // VSX Add: case PPC::XSADDDP: case PPC::XVADDDP: case PPC::XVADDSP: case PPC::XSADDSP: // VSX Multiply: case PPC::XSMULDP: case PPC::XVMULDP: case PPC::XVMULSP: case PPC::XSMULSP: // QPX Add: case PPC::QVFADD: case PPC::QVFADDS: case PPC::QVFADDSs: // QPX Multiply: case PPC::QVFMUL: case PPC::QVFMULS: case PPC::QVFMULSs: return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && Inst.getFlag(MachineInstr::MIFlag::FmNsz); // Fixed point: // Multiply: case PPC::MULHD: case PPC::MULLD: case PPC::MULHW: case PPC::MULLW: return true; default: return false; } } #define InfoArrayIdxFMAInst 0 #define InfoArrayIdxFAddInst 1 #define InfoArrayIdxFMULInst 2 #define InfoArrayIdxAddOpIdx 3 #define InfoArrayIdxMULOpIdx 4 // Array keeps info for FMA instructions: // Index 0(InfoArrayIdxFMAInst): FMA instruction; // Index 1(InfoArrayIdxFAddInst): ADD instruction assoaicted with FMA; // Index 2(InfoArrayIdxFMULInst): MUL instruction assoaicted with FMA; // Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands; // Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands; // second MUL operand index is plus 1. static const uint16_t FMAOpIdxInfo[][5] = { // FIXME: Add more FMA instructions like XSNMADDADP and so on. {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2}, {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2}, {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2}, {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2}, {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1}, {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1}, {PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1}, {PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}}; // Check if an opcode is a FMA instruction. If it is, return the index in array // FMAOpIdxInfo. Otherwise, return -1. int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const { for (unsigned I = 0; I < array_lengthof(FMAOpIdxInfo); I++) if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode) return I; return -1; } // Try to reassociate FMA chains like below: // // Pattern 1: // A = FADD X, Y (Leaf) // B = FMA A, M21, M22 (Prev) // C = FMA B, M31, M32 (Root) // --> // A = FMA X, M21, M22 // B = FMA Y, M31, M32 // C = FADD A, B // // Pattern 2: // A = FMA X, M11, M12 (Leaf) // B = FMA A, M21, M22 (Prev) // C = FMA B, M31, M32 (Root) // --> // A = FMUL M11, M12 // B = FMA X, M21, M22 // D = FMA A, M31, M32 // C = FADD B, D // // breaking the dependency between A and B, allowing FMA to be executed in // parallel (or back-to-back in a pipeline) instead of depending on each other. bool PPCInstrInfo::getFMAPatterns( MachineInstr &Root, SmallVectorImpl &Patterns) const { MachineBasicBlock *MBB = Root.getParent(); const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) { for (const auto &MO : Instr.explicit_operands()) if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) return false; return true; }; auto IsReassociable = [&](const MachineInstr &Instr, int16_t &AddOpIdx, bool IsLeaf, bool IsAdd) { int16_t Idx = -1; if (!IsAdd) { Idx = getFMAOpIdxInfo(Instr.getOpcode()); if (Idx < 0) return false; } else if (Instr.getOpcode() != FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())] [InfoArrayIdxFAddInst]) return false; // Instruction can be reassociated. // fast math flags may prohibit reassociation. if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) && Instr.getFlag(MachineInstr::MIFlag::FmNsz))) return false; // Instruction operands are virtual registers for reassociation. if (!IsAllOpsVirtualReg(Instr)) return false; if (IsAdd && IsLeaf) return true; AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx]; const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx); MachineInstr *MIAdd = MRI.getUniqueVRegDef(OpAdd.getReg()); // If 'add' operand's def is not in current block, don't do ILP related opt. if (!MIAdd || MIAdd->getParent() != MBB) return false; // If this is not Leaf FMA Instr, its 'add' operand should only have one use // as this fma will be changed later. return IsLeaf ? true : MRI.hasOneNonDBGUse(OpAdd.getReg()); }; int16_t AddOpIdx = -1; // Root must be a valid FMA like instruction. if (!IsReassociable(Root, AddOpIdx, false, false)) return false; assert((AddOpIdx >= 0) && "add operand index not right!"); Register RegB = Root.getOperand(AddOpIdx).getReg(); MachineInstr *Prev = MRI.getUniqueVRegDef(RegB); // Prev must be a valid FMA like instruction. AddOpIdx = -1; if (!IsReassociable(*Prev, AddOpIdx, false, false)) return false; assert((AddOpIdx >= 0) && "add operand index not right!"); Register RegA = Prev->getOperand(AddOpIdx).getReg(); MachineInstr *Leaf = MRI.getUniqueVRegDef(RegA); AddOpIdx = -1; if (IsReassociable(*Leaf, AddOpIdx, true, false)) { Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM); return true; } if (IsReassociable(*Leaf, AddOpIdx, true, true)) { Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM); return true; } return false; } bool PPCInstrInfo::getMachineCombinerPatterns( MachineInstr &Root, SmallVectorImpl &Patterns) const { // Using the machine combiner in this way is potentially expensive, so // restrict to when aggressive optimizations are desired. if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) return false; if (getFMAPatterns(Root, Patterns)) return true; return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); } void PPCInstrInfo::genAlternativeCodeSequence( MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { switch (Pattern) { case MachineCombinerPattern::REASSOC_XY_AMM_BMM: case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); break; default: // Reassociate default patterns. TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); break; } } // Currently, only handle two patterns REASSOC_XY_AMM_BMM and // REASSOC_XMM_AMM_BMM. See comments for getFMAPatterns. void PPCInstrInfo::reassociateFMA( MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineOperand &OpC = Root.getOperand(0); Register RegC = OpC.getReg(); const TargetRegisterClass *RC = MRI.getRegClass(RegC); MRI.constrainRegClass(RegC, RC); unsigned FmaOp = Root.getOpcode(); int16_t Idx = getFMAOpIdxInfo(FmaOp); assert(Idx >= 0 && "Root must be a FMA instruction"); uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx]; uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; MachineInstr *Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg()); MachineInstr *Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg()); uint16_t IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags(); auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg, bool &KillFlag) { Reg = Operand.getReg(); MRI.constrainRegClass(Reg, RC); KillFlag = Operand.isKill(); }; auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1, Register &MulOp2, bool &MulOp1KillFlag, bool &MulOp2KillFlag) { GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag); GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag); }; Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32; bool KillX = false, KillY = false, KillM11 = false, KillM12 = false, KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false; GetFMAInstrInfo(Root, RegM31, RegM32, KillM31, KillM32); GetFMAInstrInfo(*Prev, RegM21, RegM22, KillM21, KillM22); if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { GetFMAInstrInfo(*Leaf, RegM11, RegM12, KillM11, KillM12); GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX); } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) { GetOperandInfo(Leaf->getOperand(1), RegX, KillX); GetOperandInfo(Leaf->getOperand(2), RegY, KillY); } // Create new virtual registers for the new results instead of // recycling legacy ones because the MachineCombiner's computation of the // critical path requires a new register definition rather than an existing // one. Register NewVRA = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0)); Register NewVRB = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1)); Register NewVRD = 0; if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { NewVRD = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2)); } auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd, Register RegMul1, bool KillRegMul1, Register RegMul2, bool KillRegMul2) { MI->getOperand(AddOpIdx).setReg(RegAdd); MI->getOperand(AddOpIdx).setIsKill(KillAdd); MI->getOperand(FirstMulOpIdx).setReg(RegMul1); MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1); MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2); MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2); }; if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) { // Create new instructions for insertion. MachineInstrBuilder MINewB = BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB) .addReg(RegX, getKillRegState(KillX)) .addReg(RegM21, getKillRegState(KillM21)) .addReg(RegM22, getKillRegState(KillM22)); MachineInstrBuilder MINewA = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA) .addReg(RegY, getKillRegState(KillY)) .addReg(RegM31, getKillRegState(KillM31)) .addReg(RegM32, getKillRegState(KillM32)); // If AddOpIdx is not 1, adjust the order. if (AddOpIdx != 1) { AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22); AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32); } MachineInstrBuilder MINewC = BuildMI(*MF, Root.getDebugLoc(), get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC) .addReg(NewVRB, getKillRegState(true)) .addReg(NewVRA, getKillRegState(true)); // Update flags for newly created instructions. setSpecialOperandAttr(*MINewA, IntersectedFlags); setSpecialOperandAttr(*MINewB, IntersectedFlags); setSpecialOperandAttr(*MINewC, IntersectedFlags); // Record new instructions for insertion. InsInstrs.push_back(MINewA); InsInstrs.push_back(MINewB); InsInstrs.push_back(MINewC); } else if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { assert(NewVRD && "new FMA register not created!"); // Create new instructions for insertion. MachineInstrBuilder MINewA = BuildMI(*MF, Leaf->getDebugLoc(), get(FMAOpIdxInfo[Idx][InfoArrayIdxFMULInst]), NewVRA) .addReg(RegM11, getKillRegState(KillM11)) .addReg(RegM12, getKillRegState(KillM12)); MachineInstrBuilder MINewB = BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB) .addReg(RegX, getKillRegState(KillX)) .addReg(RegM21, getKillRegState(KillM21)) .addReg(RegM22, getKillRegState(KillM22)); MachineInstrBuilder MINewD = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD) .addReg(NewVRA, getKillRegState(true)) .addReg(RegM31, getKillRegState(KillM31)) .addReg(RegM32, getKillRegState(KillM32)); // If AddOpIdx is not 1, adjust the order. if (AddOpIdx != 1) { AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22); AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32, KillM32); } MachineInstrBuilder MINewC = BuildMI(*MF, Root.getDebugLoc(), get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC) .addReg(NewVRB, getKillRegState(true)) .addReg(NewVRD, getKillRegState(true)); // Update flags for newly created instructions. setSpecialOperandAttr(*MINewA, IntersectedFlags); setSpecialOperandAttr(*MINewB, IntersectedFlags); setSpecialOperandAttr(*MINewD, IntersectedFlags); setSpecialOperandAttr(*MINewC, IntersectedFlags); // Record new instructions for insertion. InsInstrs.push_back(MINewA); InsInstrs.push_back(MINewB); InsInstrs.push_back(MINewD); InsInstrs.push_back(MINewC); } assert(!InsInstrs.empty() && "Insertion instructions set should not be empty!"); // Record old instructions for deletion. DelInstrs.push_back(Leaf); DelInstrs.push_back(Prev); DelInstrs.push_back(&Root); } // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const { switch (MI.getOpcode()) { default: return false; case PPC::EXTSW: case PPC::EXTSW_32: case PPC::EXTSW_32_64: SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); SubIdx = PPC::sub_32; return true; } } unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { unsigned Opcode = MI.getOpcode(); const unsigned *OpcodesForSpill = getLoadOpcodesForSpillArray(); const unsigned *End = OpcodesForSpill + SOK_LastOpcodeSpill; if (End != std::find(OpcodesForSpill, End, Opcode)) { // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && MI.getOperand(2).isFI()) { FrameIndex = MI.getOperand(2).getIndex(); return MI.getOperand(0).getReg(); } } return 0; } // For opcodes with the ReMaterializable flag set, this function is called to // verify the instruction is really rematable. bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable // flag set. llvm_unreachable("Unknown rematerializable operation!"); break; case PPC::LI: case PPC::LI8: case PPC::LIS: case PPC::LIS8: case PPC::QVGPCI: case PPC::ADDIStocHA: case PPC::ADDIStocHA8: case PPC::ADDItocL: case PPC::LOAD_STACK_GUARD: case PPC::XXLXORz: case PPC::XXLXORspz: case PPC::XXLXORdpz: case PPC::XXLEQVOnes: case PPC::V_SET0B: case PPC::V_SET0H: case PPC::V_SET0: case PPC::V_SETALLONESB: case PPC::V_SETALLONESH: case PPC::V_SETALLONES: case PPC::CRSET: case PPC::CRUNSET: return true; } return false; } unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { unsigned Opcode = MI.getOpcode(); const unsigned *OpcodesForSpill = getStoreOpcodesForSpillArray(); const unsigned *End = OpcodesForSpill + SOK_LastOpcodeSpill; if (End != std::find(OpcodesForSpill, End, Opcode)) { if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && MI.getOperand(2).isFI()) { FrameIndex = MI.getOperand(2).getIndex(); return MI.getOperand(0).getReg(); } } return 0; } MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { MachineFunction &MF = *MI.getParent()->getParent(); // Normal instructions can be commuted the obvious way. if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec) return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because // changing the relative order of the mask operands might change what happens // to the high-bits of the mask (and, thus, the result). // Cannot commute if it has a non-zero rotate count. if (MI.getOperand(3).getImm() != 0) return nullptr; // If we have a zero rotate count, we have: // M = mask(MB,ME) // Op0 = (Op1 & ~M) | (Op2 & M) // Change this to: // M = mask((ME+1)&31, (MB-1)&31) // Op0 = (Op2 & ~M) | (Op1 & M) // Swap op1/op2 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) && "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec."); Register Reg0 = MI.getOperand(0).getReg(); Register Reg1 = MI.getOperand(1).getReg(); Register Reg2 = MI.getOperand(2).getReg(); unsigned SubReg1 = MI.getOperand(1).getSubReg(); unsigned SubReg2 = MI.getOperand(2).getSubReg(); bool Reg1IsKill = MI.getOperand(1).isKill(); bool Reg2IsKill = MI.getOperand(2).isKill(); bool ChangeReg0 = false; // If machine instrs are no longer in two-address forms, update // destination register as well. if (Reg0 == Reg1) { // Must be two address instruction! assert(MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) && "Expecting a two-address instruction!"); assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch"); Reg2IsKill = false; ChangeReg0 = true; } // Masks. unsigned MB = MI.getOperand(4).getImm(); unsigned ME = MI.getOperand(5).getImm(); // We can't commute a trivial mask (there is no way to represent an all-zero // mask). if (MB == 0 && ME == 31) return nullptr; if (NewMI) { // Create a new instruction. Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg(); bool Reg0IsDead = MI.getOperand(0).isDead(); return BuildMI(MF, MI.getDebugLoc(), MI.getDesc()) .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) .addReg(Reg2, getKillRegState(Reg2IsKill)) .addReg(Reg1, getKillRegState(Reg1IsKill)) .addImm((ME + 1) & 31) .addImm((MB - 1) & 31); } if (ChangeReg0) { MI.getOperand(0).setReg(Reg2); MI.getOperand(0).setSubReg(SubReg2); } MI.getOperand(2).setReg(Reg1); MI.getOperand(1).setReg(Reg2); MI.getOperand(2).setSubReg(SubReg1); MI.getOperand(1).setSubReg(SubReg2); MI.getOperand(2).setIsKill(Reg1IsKill); MI.getOperand(1).setIsKill(Reg2IsKill); // Swap the mask around. MI.getOperand(4).setImm((ME + 1) & 31); MI.getOperand(5).setImm((MB - 1) & 31); return &MI; } bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { // For VSX A-Type FMA instructions, it is the first two operands that can be // commuted, however, because the non-encoded tied input operand is listed // first, the operands to swap are actually the second and third. int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode()); if (AltOpc == -1) return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1 // and SrcOpIdx2. return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); } void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { // This function is used for scheduling, and the nop wanted here is the type // that terminates dispatch groups on the POWER cores. unsigned Directive = Subtarget.getCPUDirective(); unsigned Opcode; switch (Directive) { default: Opcode = PPC::NOP; break; case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break; case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break; case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */ // FIXME: Update when POWER9 scheduling model is ready. case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break; } DebugLoc DL; BuildMI(MBB, MI, DL, get(Opcode)); } /// Return the noop instruction to use for a noop. void PPCInstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(PPC::NOP); } // Branch analysis. // Note: If the condition register is set to CTR or CTR8 then this is a // BDNZ (imm == 1) or BDZ (imm == 0) branch. bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { bool isPPC64 = Subtarget.isPPC64(); // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end()) return false; if (!isUnpredicatedTerminator(*I)) return false; if (AllowModify) { // If the BB ends with an unconditional branch to the fallthrough BB, // we eliminate the branch instruction. if (I->getOpcode() == PPC::B && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { I->eraseFromParent(); // We update iterator after deleting the last branch. I = MBB.getLastNonDebugInstr(); if (I == MBB.end() || !isUnpredicatedTerminator(*I)) return false; } } // Get the last instruction in the block. MachineInstr &LastInst = *I; // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { if (LastInst.getOpcode() == PPC::B) { if (!LastInst.getOperand(0).isMBB()) return true; TBB = LastInst.getOperand(0).getMBB(); return false; } else if (LastInst.getOpcode() == PPC::BCC) { if (!LastInst.getOperand(2).isMBB()) return true; // Block ends with fall-through condbranch. TBB = LastInst.getOperand(2).getMBB(); Cond.push_back(LastInst.getOperand(0)); Cond.push_back(LastInst.getOperand(1)); return false; } else if (LastInst.getOpcode() == PPC::BC) { if (!LastInst.getOperand(1).isMBB()) return true; // Block ends with fall-through condbranch. TBB = LastInst.getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); Cond.push_back(LastInst.getOperand(0)); return false; } else if (LastInst.getOpcode() == PPC::BCn) { if (!LastInst.getOperand(1).isMBB()) return true; // Block ends with fall-through condbranch. TBB = LastInst.getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); Cond.push_back(LastInst.getOperand(0)); return false; } else if (LastInst.getOpcode() == PPC::BDNZ8 || LastInst.getOpcode() == PPC::BDNZ) { if (!LastInst.getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = LastInst.getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(1)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); return false; } else if (LastInst.getOpcode() == PPC::BDZ8 || LastInst.getOpcode() == PPC::BDZ) { if (!LastInst.getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = LastInst.getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(0)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); return false; } // Otherwise, don't know what this is. return true; } // Get the instruction before it if it's a terminator. MachineInstr &SecondLastInst = *I; // If there are three terminators, we don't know what sort of block this is. if (I != MBB.begin() && isUnpredicatedTerminator(*--I)) return true; // If the block ends with PPC::B and PPC:BCC, handle it. if (SecondLastInst.getOpcode() == PPC::BCC && LastInst.getOpcode() == PPC::B) { if (!SecondLastInst.getOperand(2).isMBB() || !LastInst.getOperand(0).isMBB()) return true; TBB = SecondLastInst.getOperand(2).getMBB(); Cond.push_back(SecondLastInst.getOperand(0)); Cond.push_back(SecondLastInst.getOperand(1)); FBB = LastInst.getOperand(0).getMBB(); return false; } else if (SecondLastInst.getOpcode() == PPC::BC && LastInst.getOpcode() == PPC::B) { if (!SecondLastInst.getOperand(1).isMBB() || !LastInst.getOperand(0).isMBB()) return true; TBB = SecondLastInst.getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); Cond.push_back(SecondLastInst.getOperand(0)); FBB = LastInst.getOperand(0).getMBB(); return false; } else if (SecondLastInst.getOpcode() == PPC::BCn && LastInst.getOpcode() == PPC::B) { if (!SecondLastInst.getOperand(1).isMBB() || !LastInst.getOperand(0).isMBB()) return true; TBB = SecondLastInst.getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); Cond.push_back(SecondLastInst.getOperand(0)); FBB = LastInst.getOperand(0).getMBB(); return false; } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 || SecondLastInst.getOpcode() == PPC::BDNZ) && LastInst.getOpcode() == PPC::B) { if (!SecondLastInst.getOperand(0).isMBB() || !LastInst.getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = SecondLastInst.getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(1)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); FBB = LastInst.getOperand(0).getMBB(); return false; } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 || SecondLastInst.getOpcode() == PPC::BDZ) && LastInst.getOpcode() == PPC::B) { if (!SecondLastInst.getOperand(0).isMBB() || !LastInst.getOperand(0).isMBB()) return true; if (DisableCTRLoopAnal) return true; TBB = SecondLastInst.getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(0)); Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, true)); FBB = LastInst.getOperand(0).getMBB(); return false; } // If the block ends with two PPC:Bs, handle it. The second one is not // executed, so remove it. if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) { if (!SecondLastInst.getOperand(0).isMBB()) return true; TBB = SecondLastInst.getOperand(0).getMBB(); I = LastInst; if (AllowModify) I->eraseFromParent(); return false; } // Otherwise, can't handle this. return true; } unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end()) return 0; if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC && I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) return 0; // Remove the branch. I->eraseFromParent(); I = MBB.end(); if (I == MBB.begin()) return 1; --I; if (I->getOpcode() != PPC::BCC && I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) return 1; // Remove the branch. I->eraseFromParent(); return 2; } unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && "PPC branch conditions have two components!"); assert(!BytesAdded && "code size not handled"); bool isPPC64 = Subtarget.isPPC64(); // One-way branch. if (!FBB) { if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB); else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) BuildMI(&MBB, DL, get(Cond[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); else if (Cond[0].getImm() == PPC::PRED_BIT_SET) BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB); else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB); else // Conditional branch BuildMI(&MBB, DL, get(PPC::BCC)) .addImm(Cond[0].getImm()) .add(Cond[1]) .addMBB(TBB); return 1; } // Two-way Conditional Branch. if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) BuildMI(&MBB, DL, get(Cond[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); else if (Cond[0].getImm() == PPC::PRED_BIT_SET) BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB); else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB); else BuildMI(&MBB, DL, get(PPC::BCC)) .addImm(Cond[0].getImm()) .add(Cond[1]) .addMBB(TBB); BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); return 2; } // Select analysis. bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, ArrayRef Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { if (Cond.size() != 2) return false; // If this is really a bdnz-like condition, then it cannot be turned into a // select. if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) return false; // Check register classes. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); if (!RC) return false; // isel is for regular integer GPRs only. if (!PPC::GPRCRegClass.hasSubClassEq(RC) && !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) && !PPC::G8RCRegClass.hasSubClassEq(RC) && !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) return false; // FIXME: These numbers are for the A2, how well they work for other cores is // an open question. On the A2, the isel instruction has a 2-cycle latency // but single-cycle throughput. These numbers are used in combination with // the MispredictPenalty setting from the active SchedMachineModel. CondCycles = 1; TrueCycles = 1; FalseCycles = 1; return true; } void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &dl, Register DestReg, ArrayRef Cond, Register TrueReg, Register FalseReg) const { assert(Cond.size() == 2 && "PPC branch conditions have two components!"); // Get the register classes. MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); assert(RC && "TrueReg and FalseReg must have overlapping register classes"); bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) || PPC::G8RC_NOX0RegClass.hasSubClassEq(RC); assert((Is64Bit || PPC::GPRCRegClass.hasSubClassEq(RC) || PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) && "isel is for regular integer GPRs only"); unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL; auto SelectPred = static_cast(Cond[0].getImm()); unsigned SubIdx = 0; bool SwapOps = false; switch (SelectPred) { case PPC::PRED_EQ: case PPC::PRED_EQ_MINUS: case PPC::PRED_EQ_PLUS: SubIdx = PPC::sub_eq; SwapOps = false; break; case PPC::PRED_NE: case PPC::PRED_NE_MINUS: case PPC::PRED_NE_PLUS: SubIdx = PPC::sub_eq; SwapOps = true; break; case PPC::PRED_LT: case PPC::PRED_LT_MINUS: case PPC::PRED_LT_PLUS: SubIdx = PPC::sub_lt; SwapOps = false; break; case PPC::PRED_GE: case PPC::PRED_GE_MINUS: case PPC::PRED_GE_PLUS: SubIdx = PPC::sub_lt; SwapOps = true; break; case PPC::PRED_GT: case PPC::PRED_GT_MINUS: case PPC::PRED_GT_PLUS: SubIdx = PPC::sub_gt; SwapOps = false; break; case PPC::PRED_LE: case PPC::PRED_LE_MINUS: case PPC::PRED_LE_PLUS: SubIdx = PPC::sub_gt; SwapOps = true; break; case PPC::PRED_UN: case PPC::PRED_UN_MINUS: case PPC::PRED_UN_PLUS: SubIdx = PPC::sub_un; SwapOps = false; break; case PPC::PRED_NU: case PPC::PRED_NU_MINUS: case PPC::PRED_NU_PLUS: SubIdx = PPC::sub_un; SwapOps = true; break; case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break; case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break; } Register FirstReg = SwapOps ? FalseReg : TrueReg, SecondReg = SwapOps ? TrueReg : FalseReg; // The first input register of isel cannot be r0. If it is a member // of a register class that can be r0, then copy it first (the // register allocator should eliminate the copy). if (MRI.getRegClass(FirstReg)->contains(PPC::R0) || MRI.getRegClass(FirstReg)->contains(PPC::X0)) { const TargetRegisterClass *FirstRC = MRI.getRegClass(FirstReg)->contains(PPC::X0) ? &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass; Register OldFirstReg = FirstReg; FirstReg = MRI.createVirtualRegister(FirstRC); BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg) .addReg(OldFirstReg); } BuildMI(MBB, MI, dl, get(OpCode), DestReg) .addReg(FirstReg).addReg(SecondReg) .addReg(Cond[1].getReg(), 0, SubIdx); } static unsigned getCRBitValue(unsigned CRBit) { unsigned Ret = 4; if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT || CRBit == PPC::CR2LT || CRBit == PPC::CR3LT || CRBit == PPC::CR4LT || CRBit == PPC::CR5LT || CRBit == PPC::CR6LT || CRBit == PPC::CR7LT) Ret = 3; if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT || CRBit == PPC::CR2GT || CRBit == PPC::CR3GT || CRBit == PPC::CR4GT || CRBit == PPC::CR5GT || CRBit == PPC::CR6GT || CRBit == PPC::CR7GT) Ret = 2; if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ || CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ || CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ || CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ) Ret = 1; if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN || CRBit == PPC::CR2UN || CRBit == PPC::CR3UN || CRBit == PPC::CR4UN || CRBit == PPC::CR5UN || CRBit == PPC::CR6UN || CRBit == PPC::CR7UN) Ret = 0; assert(Ret != 4 && "Invalid CR bit register"); return Ret; } void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const { // We can end up with self copies and similar things as a result of VSX copy // legalization. Promote them here. const TargetRegisterInfo *TRI = &getRegisterInfo(); if (PPC::F8RCRegClass.contains(DestReg) && PPC::VSRCRegClass.contains(SrcReg)) { MCRegister SuperReg = TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass); if (VSXSelfCopyCrash && SrcReg == SuperReg) llvm_unreachable("nop VSX copy"); DestReg = SuperReg; } else if (PPC::F8RCRegClass.contains(SrcReg) && PPC::VSRCRegClass.contains(DestReg)) { MCRegister SuperReg = TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass); if (VSXSelfCopyCrash && DestReg == SuperReg) llvm_unreachable("nop VSX copy"); SrcReg = SuperReg; } // Different class register copy if (PPC::CRBITRCRegClass.contains(SrcReg) && PPC::GPRCRegClass.contains(DestReg)) { MCRegister CRReg = getCRFromCRBit(SrcReg); BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg); getKillRegState(KillSrc); // Rotate the CR bit in the CR fields to be the least significant bit and // then mask with 0x1 (MB = ME = 31). BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg) .addReg(DestReg, RegState::Kill) .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg))) .addImm(31) .addImm(31); return; } else if (PPC::CRRCRegClass.contains(SrcReg) && PPC::G8RCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; } else if (PPC::CRRCRegClass.contains(SrcReg) && PPC::GPRCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; } else if (PPC::G8RCRegClass.contains(SrcReg) && PPC::VSFRCRegClass.contains(DestReg)) { assert(Subtarget.hasDirectMove() && "Subtarget doesn't support directmove, don't know how to copy."); BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg); NumGPRtoVSRSpill++; getKillRegState(KillSrc); return; } else if (PPC::VSFRCRegClass.contains(SrcReg) && PPC::G8RCRegClass.contains(DestReg)) { assert(Subtarget.hasDirectMove() && "Subtarget doesn't support directmove, don't know how to copy."); BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; } else if (PPC::SPERCRegClass.contains(SrcReg) && PPC::GPRCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; } else if (PPC::GPRCRegClass.contains(SrcReg) && PPC::SPERCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; } unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::OR; else if (PPC::G8RCRegClass.contains(DestReg, SrcReg)) Opc = PPC::OR8; else if (PPC::F4RCRegClass.contains(DestReg, SrcReg)) Opc = PPC::FMR; else if (PPC::CRRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::MCRF; else if (PPC::VRRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::VOR; else if (PPC::VSRCRegClass.contains(DestReg, SrcReg)) // There are two different ways this can be done: // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only // issue in VSU pipeline 0. // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but // can go to either pipeline. // We'll always use xxlor here, because in practically all cases where // copies are generated, they are close enough to some use that the // lower-latency form is preferable. Opc = PPC::XXLOR; else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) || PPC::VSSRCRegClass.contains(DestReg, SrcReg)) Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf; else if (PPC::QFRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::QVFMR; else if (PPC::QSRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::QVFMRs; else if (PPC::QBRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::QVFMRb; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; else if (PPC::SPERCRegClass.contains(DestReg, SrcReg)) Opc = PPC::EVOR; else llvm_unreachable("Impossible reg-to-reg copy"); const MCInstrDesc &MCID = get(Opc); if (MCID.getNumOperands() == 3) BuildMI(MBB, I, DL, MCID, DestReg) .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); else BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } static unsigned getSpillIndex(const TargetRegisterClass *RC) { int OpcodeIndex = 0; if (PPC::GPRCRegClass.hasSubClassEq(RC) || PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_Int4Spill; } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_Int8Spill; } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_Float8Spill; } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_SPESpill; } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_CRBitSpill; } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_VRVectorSpill; } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_VSXVectorSpill; } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_VectorFloat8Spill; } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_VectorFloat4Spill; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_VRSaveSpill; } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_QuadFloat8Spill; } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_QuadFloat4Spill; } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_QuadBitSpill; } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_SpillToVSR; } else { llvm_unreachable("Unknown regclass!"); } return OpcodeIndex; } unsigned PPCInstrInfo::getStoreOpcodeForSpill(const TargetRegisterClass *RC) const { const unsigned *OpcodesForSpill = getStoreOpcodesForSpillArray(); return OpcodesForSpill[getSpillIndex(RC)]; } unsigned PPCInstrInfo::getLoadOpcodeForSpill(const TargetRegisterClass *RC) const { const unsigned *OpcodesForSpill = getLoadOpcodesForSpillArray(); return OpcodesForSpill[getSpillIndex(RC)]; } void PPCInstrInfo::StoreRegToStackSlot( MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const { unsigned Opcode = getStoreOpcodeForSpill(RC); DebugLoc DL; PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setHasSpills(); NewMIs.push_back(addFrameReference( BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); if (PPC::CRRCRegClass.hasSubClassEq(RC) || PPC::CRBITRCRegClass.hasSubClassEq(RC)) FuncInfo->setSpillsCR(); if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) FuncInfo->setSpillsVRSAVE(); if (isXFormMemOp(Opcode)) FuncInfo->setHasNonRISpills(); } void PPCInstrInfo::storeRegToStackSlotNoUpd( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); SmallVector NewMIs; StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); const MachineFrameInfo &MFI = MF.getFrameInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FrameIdx), MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), MFI.getObjectAlign(FrameIdx)); NewMIs.back()->addMemOperand(MF, MMO); } void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { // We need to avoid a situation in which the value from a VRRC register is // spilled using an Altivec instruction and reloaded into a VSRC register // using a VSX instruction. The issue with this is that the VSX // load/store instructions swap the doublewords in the vector and the Altivec // ones don't. The register classes on the spill/reload may be different if // the register is defined using an Altivec instruction and is then used by a // VSX instruction. RC = updatedRC(RC); storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI); } void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const { unsigned Opcode = getLoadOpcodeForSpill(RC); NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg), FrameIdx)); PPCFunctionInfo *FuncInfo = MF.getInfo(); if (PPC::CRRCRegClass.hasSubClassEq(RC) || PPC::CRBITRCRegClass.hasSubClassEq(RC)) FuncInfo->setSpillsCR(); if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) FuncInfo->setSpillsVRSAVE(); if (isXFormMemOp(Opcode)) FuncInfo->setHasNonRISpills(); } void PPCInstrInfo::loadRegFromStackSlotNoUpd( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); SmallVector NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setHasSpills(); LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); const MachineFrameInfo &MFI = MF.getFrameInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FrameIdx), MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), MFI.getObjectAlign(FrameIdx)); NewMIs.back()->addMemOperand(MF, MMO); } void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { // We need to avoid a situation in which the value from a VRRC register is // spilled using an Altivec instruction and reloaded into a VSRC register // using a VSX instruction. The issue with this is that the VSX // load/store instructions swap the doublewords in the vector and the Altivec // ones don't. The register classes on the spill/reload may be different if // the register is defined using an Altivec instruction and is then used by a // VSX instruction. RC = updatedRC(RC); loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI); } bool PPCInstrInfo:: reverseBranchCondition(SmallVectorImpl &Cond) const { assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR) Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0); else // Leave the CR# the same, but invert the condition. Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm())); return false; } // For some instructions, it is legal to fold ZERO into the RA register field. // This function performs that fold by replacing the operand with PPC::ZERO, // it does not consider whether the load immediate zero is no longer in use. bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const { // A zero immediate should always be loaded with a single li. unsigned DefOpc = DefMI.getOpcode(); if (DefOpc != PPC::LI && DefOpc != PPC::LI8) return false; if (!DefMI.getOperand(1).isImm()) return false; if (DefMI.getOperand(1).getImm() != 0) return false; // Note that we cannot here invert the arguments of an isel in order to fold // a ZERO into what is presented as the second argument. All we have here // is the condition bit, and that might come from a CR-logical bit operation. const MCInstrDesc &UseMCID = UseMI.getDesc(); // Only fold into real machine instructions. if (UseMCID.isPseudo()) return false; // We need to find which of the User's operands is to be folded, that will be // the operand that matches the given register ID. unsigned UseIdx; for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx) if (UseMI.getOperand(UseIdx).isReg() && UseMI.getOperand(UseIdx).getReg() == Reg) break; assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI"); assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg"); const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx]; // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0 // register (which might also be specified as a pointer class kind). if (UseInfo->isLookupPtrRegClass()) { if (UseInfo->RegClass /* Kind */ != 1) return false; } else { if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID && UseInfo->RegClass != PPC::G8RC_NOX0RegClassID) return false; } // Make sure this is not tied to an output register (or otherwise // constrained). This is true for ST?UX registers, for example, which // are tied to their output registers. if (UseInfo->Constraints != 0) return false; MCRegister ZeroReg; if (UseInfo->isLookupPtrRegClass()) { bool isPPC64 = Subtarget.isPPC64(); ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO; } else { ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ? PPC::ZERO8 : PPC::ZERO; } UseMI.getOperand(UseIdx).setReg(ZeroReg); return true; } // Folds zero into instructions which have a load immediate zero as an operand // but also recognize zero as immediate zero. If the definition of the load // has no more users it is deleted. bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const { bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg); if (MRI->use_nodbg_empty(Reg)) DefMI.eraseFromParent(); return Changed; } static bool MBBDefinesCTR(MachineBasicBlock &MBB) { for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); I != IE; ++I) if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8)) return true; return false; } // We should make sure that, if we're going to predicate both sides of a // condition (a diamond), that both sides don't define the counter register. We // can predicate counter-decrement-based branches, but while that predicates // the branching, it does not predicate the counter decrement. If we tried to // merge the triangle into one predicated block, we'd decrement the counter // twice. bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, unsigned ExtraT, MachineBasicBlock &FMBB, unsigned NumF, unsigned ExtraF, BranchProbability Probability) const { return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB)); } bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const { // The predicated branches are identified by their type, not really by the // explicit presence of a predicate. Furthermore, some of them can be // predicated more than once. Because if conversion won't try to predicate // any instruction which already claims to be predicated (by returning true // here), always return false. In doing so, we let isPredicable() be the // final word on whether not the instruction can be (further) predicated. return false; } bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, ArrayRef Pred) const { unsigned OpC = MI.getOpcode(); if (OpC == PPC::BLR || OpC == PPC::BLR8) { if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { bool isPPC64 = Subtarget.isPPC64(); MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MI.setDesc(get(PPC::BCLR)); MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { MI.setDesc(get(PPC::BCLRn)); MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); } else { MI.setDesc(get(PPC::BCCLR)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(Pred[0].getImm()) .add(Pred[1]); } return true; } else if (OpC == PPC::B) { if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { bool isPPC64 = Subtarget.isPPC64(); MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); MI.RemoveOperand(0); MI.setDesc(get(PPC::BC)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .add(Pred[1]) .addMBB(MBB); } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); MI.RemoveOperand(0); MI.setDesc(get(PPC::BCn)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .add(Pred[1]) .addMBB(MBB); } else { MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); MI.RemoveOperand(0); MI.setDesc(get(PPC::BCC)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(Pred[0].getImm()) .add(Pred[1]) .addMBB(MBB); } return true; } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL || OpC == PPC::BCTRL8) { if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) llvm_unreachable("Cannot predicate bctr[l] on the ctr register"); bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8; bool isPPC64 = Subtarget.isPPC64(); if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : (setLR ? PPC::BCCTRL : PPC::BCCTR))); MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); return true; } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) : (setLR ? PPC::BCCTRLn : PPC::BCCTRn))); MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); return true; } MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) : (setLR ? PPC::BCCCTRL : PPC::BCCCTR))); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(Pred[0].getImm()) .add(Pred[1]); return true; } return false; } bool PPCInstrInfo::SubsumesPredicate(ArrayRef Pred1, ArrayRef Pred2) const { assert(Pred1.size() == 2 && "Invalid PPC first predicate"); assert(Pred2.size() == 2 && "Invalid PPC second predicate"); if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR) return false; if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR) return false; // P1 can only subsume P2 if they test the same condition register. if (Pred1[1].getReg() != Pred2[1].getReg()) return false; PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm(); PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm(); if (P1 == P2) return true; // Does P1 subsume P2, e.g. GE subsumes GT. if (P1 == PPC::PRED_LE && (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ)) return true; if (P1 == PPC::PRED_GE && (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ)) return true; return false; } bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI, std::vector &Pred) const { // Note: At the present time, the contents of Pred from this function is // unused by IfConversion. This implementation follows ARM by pushing the // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of // predicate, instructions defining CTR or CTR8 are also included as // predicate-defining instructions. const TargetRegisterClass *RCs[] = { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass, &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass }; bool Found = false; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) { const TargetRegisterClass *RC = RCs[c]; if (MO.isReg()) { if (MO.isDef() && RC->contains(MO.getReg())) { Pred.push_back(MO); Found = true; } } else if (MO.isRegMask()) { for (TargetRegisterClass::iterator I = RC->begin(), IE = RC->end(); I != IE; ++I) if (MO.clobbersPhysReg(*I)) { Pred.push_back(MO); Found = true; } } } } return Found; } bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int &Mask, int &Value) const { unsigned Opc = MI.getOpcode(); switch (Opc) { default: return false; case PPC::CMPWI: case PPC::CMPLWI: case PPC::CMPDI: case PPC::CMPLDI: SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; Value = MI.getOperand(2).getImm(); Mask = 0xFFFF; return true; case PPC::CMPW: case PPC::CMPLW: case PPC::CMPD: case PPC::CMPLD: case PPC::FCMPUS: case PPC::FCMPUD: SrcReg = MI.getOperand(1).getReg(); SrcReg2 = MI.getOperand(2).getReg(); Value = 0; Mask = 0; return true; } } bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const { if (DisableCmpOpt) return false; int OpC = CmpInstr.getOpcode(); Register CRReg = CmpInstr.getOperand(0).getReg(); // FP record forms set CR1 based on the exception status bits, not a // comparison with zero. if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD) return false; const TargetRegisterInfo *TRI = &getRegisterInfo(); // The record forms set the condition register based on a signed comparison // with zero (so says the ISA manual). This is not as straightforward as it // seems, however, because this is always a 64-bit comparison on PPC64, even // for instructions that are 32-bit in nature (like slw for example). // So, on PPC32, for unsigned comparisons, we can use the record forms only // for equality checks (as those don't depend on the sign). On PPC64, // we are restricted to equality for unsigned 64-bit comparisons and for // signed 32-bit comparisons the applicability is more restricted. bool isPPC64 = Subtarget.isPPC64(); bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW; bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW; bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD; // Look through copies unless that gets us to a physical register. Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI); if (ActualSrc.isVirtual()) SrcReg = ActualSrc; // Get the unique definition of SrcReg. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); if (!MI) return false; bool equalityOnly = false; bool noSub = false; if (isPPC64) { if (is32BitSignedCompare) { // We can perform this optimization only if MI is sign-extending. if (isSignExtended(*MI)) noSub = true; else return false; } else if (is32BitUnsignedCompare) { // We can perform this optimization, equality only, if MI is // zero-extending. if (isZeroExtended(*MI)) { noSub = true; equalityOnly = true; } else return false; } else equalityOnly = is64BitUnsignedCompare; } else equalityOnly = is32BitUnsignedCompare; if (equalityOnly) { // We need to check the uses of the condition register in order to reject // non-equality comparisons. for (MachineRegisterInfo::use_instr_iterator I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end(); I != IE; ++I) { MachineInstr *UseMI = &*I; if (UseMI->getOpcode() == PPC::BCC) { PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); unsigned PredCond = PPC::getPredicateCondition(Pred); // We ignore hint bits when checking for non-equality comparisons. if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) return false; } else if (UseMI->getOpcode() == PPC::ISEL || UseMI->getOpcode() == PPC::ISEL8) { unsigned SubIdx = UseMI->getOperand(3).getSubReg(); if (SubIdx != PPC::sub_eq) return false; } else return false; } } MachineBasicBlock::iterator I = CmpInstr; // Scan forward to find the first use of the compare. for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL; ++I) { bool FoundUse = false; for (MachineRegisterInfo::use_instr_iterator J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end(); J != JE; ++J) if (&*J == &*I) { FoundUse = true; break; } if (FoundUse) break; } SmallVector, 4> PredsToUpdate; SmallVector, 4> SubRegsToUpdate; // There are two possible candidates which can be changed to set CR[01]. // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). MachineInstr *Sub = nullptr; if (SrcReg2 != 0) // MI is not a candidate for CMPrr. MI = nullptr; // FIXME: Conservatively refuse to convert an instruction which isn't in the // same BB as the comparison. This is to allow the check below to avoid calls // (and other explicit clobbers); instead we should really check for these // more explicitly (in at least a few predecessors). else if (MI->getParent() != CmpInstr.getParent()) return false; else if (Value != 0) { // The record-form instructions set CR bit based on signed comparison // against 0. We try to convert a compare against 1 or -1 into a compare // against 0 to exploit record-form instructions. For example, we change // the condition "greater than -1" into "greater than or equal to 0" // and "less than 1" into "less than or equal to 0". // Since we optimize comparison based on a specific branch condition, // we don't optimize if condition code is used by more than once. if (equalityOnly || !MRI->hasOneUse(CRReg)) return false; MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg); if (UseMI->getOpcode() != PPC::BCC) return false; PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); unsigned PredCond = PPC::getPredicateCondition(Pred); unsigned PredHint = PPC::getPredicateHint(Pred); int16_t Immed = (int16_t)Value; // When modifying the condition in the predicate, we propagate hint bits // from the original predicate to the new one. if (Immed == -1 && PredCond == PPC::PRED_GT) // We convert "greater than -1" into "greater than or equal to 0", // since we are assuming signed comparison by !equalityOnly Pred = PPC::getPredicate(PPC::PRED_GE, PredHint); else if (Immed == -1 && PredCond == PPC::PRED_LE) // We convert "less than or equal to -1" into "less than 0". Pred = PPC::getPredicate(PPC::PRED_LT, PredHint); else if (Immed == 1 && PredCond == PPC::PRED_LT) // We convert "less than 1" into "less than or equal to 0". Pred = PPC::getPredicate(PPC::PRED_LE, PredHint); else if (Immed == 1 && PredCond == PPC::PRED_GE) // We convert "greater than or equal to 1" into "greater than 0". Pred = PPC::getPredicate(PPC::PRED_GT, PredHint); else return false; PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), Pred)); } // Search for Sub. --I; // Get ready to iterate backward from CmpInstr. MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin(); for (; I != E && !noSub; --I) { const MachineInstr &Instr = *I; unsigned IOpC = Instr.getOpcode(); if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) || Instr.readsRegister(PPC::CR0, TRI))) // This instruction modifies or uses the record condition register after // the one we want to change. While we could do this transformation, it // would likely not be profitable. This transformation removes one // instruction, and so even forcing RA to generate one move probably // makes it unprofitable. return false; // Check whether CmpInstr can be made redundant by the current instruction. if ((OpC == PPC::CMPW || OpC == PPC::CMPLW || OpC == PPC::CMPD || OpC == PPC::CMPLD) && (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) && ((Instr.getOperand(1).getReg() == SrcReg && Instr.getOperand(2).getReg() == SrcReg2) || (Instr.getOperand(1).getReg() == SrcReg2 && Instr.getOperand(2).getReg() == SrcReg))) { Sub = &*I; break; } if (I == B) // The 'and' is below the comparison instruction. return false; } // Return false if no candidates exist. if (!MI && !Sub) return false; // The single candidate is called MI. if (!MI) MI = Sub; int NewOpC = -1; int MIOpC = MI->getOpcode(); if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec || MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec) NewOpC = MIOpC; else { NewOpC = PPC::getRecordFormOpcode(MIOpC); if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1) NewOpC = MIOpC; } // FIXME: On the non-embedded POWER architectures, only some of the record // forms are fast, and we should use only the fast ones. // The defining instruction has a record form (or is already a record // form). It is possible, however, that we'll need to reverse the condition // code of the users. if (NewOpC == -1) return false; // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP // needs to be updated to be based on SUB. Push the condition code // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the // condition code of these operands will be modified. // Here, Value == 0 means we haven't converted comparison against 1 or -1 to // comparison against 0, which may modify predicate. bool ShouldSwap = false; if (Sub && Value == 0) { ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && Sub->getOperand(2).getReg() == SrcReg; // The operands to subf are the opposite of sub, so only in the fixed-point // case, invert the order. ShouldSwap = !ShouldSwap; } if (ShouldSwap) for (MachineRegisterInfo::use_instr_iterator I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end(); I != IE; ++I) { MachineInstr *UseMI = &*I; if (UseMI->getOpcode() == PPC::BCC) { PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm(); unsigned PredCond = PPC::getPredicateCondition(Pred); assert((!equalityOnly || PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) && "Invalid predicate for equality-only optimization"); (void)PredCond; // To suppress warning in release build. PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), PPC::getSwappedPredicate(Pred))); } else if (UseMI->getOpcode() == PPC::ISEL || UseMI->getOpcode() == PPC::ISEL8) { unsigned NewSubReg = UseMI->getOperand(3).getSubReg(); assert((!equalityOnly || NewSubReg == PPC::sub_eq) && "Invalid CR bit for equality-only optimization"); if (NewSubReg == PPC::sub_lt) NewSubReg = PPC::sub_gt; else if (NewSubReg == PPC::sub_gt) NewSubReg = PPC::sub_lt; SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)), NewSubReg)); } else // We need to abort on a user we don't understand. return false; } assert(!(Value != 0 && ShouldSwap) && "Non-zero immediate support and ShouldSwap" "may conflict in updating predicate"); // Create a new virtual register to hold the value of the CR set by the // record-form instruction. If the instruction was not previously in // record form, then set the kill flag on the CR. CmpInstr.eraseFromParent(); MachineBasicBlock::iterator MII = MI; BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(), get(TargetOpcode::COPY), CRReg) .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0); // Even if CR0 register were dead before, it is alive now since the // instruction we just built uses it. MI->clearRegisterDeads(PPC::CR0); if (MIOpC != NewOpC) { // We need to be careful here: we're replacing one instruction with // another, and we need to make sure that we get all of the right // implicit uses and defs. On the other hand, the caller may be holding // an iterator to this instruction, and so we can't delete it (this is // specifically the case if this is the instruction directly after the // compare). // Rotates are expensive instructions. If we're emitting a record-form // rotate that can just be an andi/andis, we should just emit that. if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) { Register GPRRes = MI->getOperand(0).getReg(); int64_t SH = MI->getOperand(2).getImm(); int64_t MB = MI->getOperand(3).getImm(); int64_t ME = MI->getOperand(4).getImm(); // We can only do this if both the start and end of the mask are in the // same halfword. bool MBInLoHWord = MB >= 16; bool MEInLoHWord = ME >= 16; uint64_t Mask = ~0LLU; if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) { Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1); // The mask value needs to shift right 16 if we're emitting andis. Mask >>= MBInLoHWord ? 0 : 16; NewOpC = MIOpC == PPC::RLWINM ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec) : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec); } else if (MRI->use_empty(GPRRes) && (ME == 31) && (ME - MB + 1 == SH) && (MB >= 16)) { // If we are rotating by the exact number of bits as are in the mask // and the mask is in the least significant bits of the register, // that's just an andis. (as long as the GPR result has no uses). Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1); Mask >>= 16; NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec; } // If we've set the mask, we can transform. if (Mask != ~0LLU) { MI->RemoveOperand(4); MI->RemoveOperand(3); MI->getOperand(2).setImm(Mask); NumRcRotatesConvertedToRcAnd++; } } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) { int64_t MB = MI->getOperand(3).getImm(); if (MB >= 48) { uint64_t Mask = (1LLU << (63 - MB + 1)) - 1; NewOpC = PPC::ANDI8_rec; MI->RemoveOperand(3); MI->getOperand(2).setImm(Mask); NumRcRotatesConvertedToRcAnd++; } } const MCInstrDesc &NewDesc = get(NewOpC); MI->setDesc(NewDesc); if (NewDesc.ImplicitDefs) for (const MCPhysReg *ImpDefs = NewDesc.getImplicitDefs(); *ImpDefs; ++ImpDefs) if (!MI->definesRegister(*ImpDefs)) MI->addOperand(*MI->getParent()->getParent(), MachineOperand::CreateReg(*ImpDefs, true, true)); if (NewDesc.ImplicitUses) for (const MCPhysReg *ImpUses = NewDesc.getImplicitUses(); *ImpUses; ++ImpUses) if (!MI->readsRegister(*ImpUses)) MI->addOperand(*MI->getParent()->getParent(), MachineOperand::CreateReg(*ImpUses, false, true)); } assert(MI->definesRegister(PPC::CR0) && "Record-form instruction does not define cr0?"); // Modify the condition code of operands in OperandsToUpdate. // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++) PredsToUpdate[i].first->setImm(PredsToUpdate[i].second); for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++) SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second); return true; } /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) { const MachineFunction *MF = MI.getParent()->getParent(); const char *AsmStr = MI.getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } else if (Opcode == TargetOpcode::STACKMAP) { StackMapOpers Opers(&MI); return Opers.getNumPatchBytes(); } else if (Opcode == TargetOpcode::PATCHPOINT) { PatchPointOpers Opers(&MI); return Opers.getNumPatchBytes(); } else { return get(Opcode).getSize(); } } std::pair PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { const unsigned Mask = PPCII::MO_ACCESS_MASK; return std::make_pair(TF & Mask, TF & ~Mask); } ArrayRef> PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { using namespace PPCII; static const std::pair TargetFlags[] = { {MO_LO, "ppc-lo"}, {MO_HA, "ppc-ha"}, {MO_TPREL_LO, "ppc-tprel-lo"}, {MO_TPREL_HA, "ppc-tprel-ha"}, {MO_DTPREL_LO, "ppc-dtprel-lo"}, {MO_TLSLD_LO, "ppc-tlsld-lo"}, {MO_TOC_LO, "ppc-toc-lo"}, {MO_TLS, "ppc-tls"}}; return makeArrayRef(TargetFlags); } ArrayRef> PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { using namespace PPCII; static const std::pair TargetFlags[] = { {MO_PLT, "ppc-plt"}, {MO_PIC_FLAG, "ppc-pic"}, {MO_PCREL_FLAG, "ppc-pcrel"}, {MO_GOT_FLAG, "ppc-got"}}; return makeArrayRef(TargetFlags); } // Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction. // The VSX versions have the advantage of a full 64-register target whereas // the FP ones have the advantage of lower latency and higher throughput. So // what we are after is using the faster instructions in low register pressure // situations and using the larger register file in high register pressure // situations. bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const { unsigned UpperOpcode, LowerOpcode; switch (MI.getOpcode()) { case PPC::DFLOADf32: UpperOpcode = PPC::LXSSP; LowerOpcode = PPC::LFS; break; case PPC::DFLOADf64: UpperOpcode = PPC::LXSD; LowerOpcode = PPC::LFD; break; case PPC::DFSTOREf32: UpperOpcode = PPC::STXSSP; LowerOpcode = PPC::STFS; break; case PPC::DFSTOREf64: UpperOpcode = PPC::STXSD; LowerOpcode = PPC::STFD; break; case PPC::XFLOADf32: UpperOpcode = PPC::LXSSPX; LowerOpcode = PPC::LFSX; break; case PPC::XFLOADf64: UpperOpcode = PPC::LXSDX; LowerOpcode = PPC::LFDX; break; case PPC::XFSTOREf32: UpperOpcode = PPC::STXSSPX; LowerOpcode = PPC::STFSX; break; case PPC::XFSTOREf64: UpperOpcode = PPC::STXSDX; LowerOpcode = PPC::STFDX; break; case PPC::LIWAX: UpperOpcode = PPC::LXSIWAX; LowerOpcode = PPC::LFIWAX; break; case PPC::LIWZX: UpperOpcode = PPC::LXSIWZX; LowerOpcode = PPC::LFIWZX; break; case PPC::STIWX: UpperOpcode = PPC::STXSIWX; LowerOpcode = PPC::STFIWX; break; default: llvm_unreachable("Unknown Operation!"); } Register TargetReg = MI.getOperand(0).getReg(); unsigned Opcode; if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) || (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31)) Opcode = LowerOpcode; else Opcode = UpperOpcode; MI.setDesc(get(Opcode)); return true; } static bool isAnImmediateOperand(const MachineOperand &MO) { return MO.isCPI() || MO.isGlobal() || MO.isImm(); } bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { auto &MBB = *MI.getParent(); auto DL = MI.getDebugLoc(); switch (MI.getOpcode()) { case TargetOpcode::LOAD_STACK_GUARD: { assert(Subtarget.isTargetLinux() && "Only Linux target is expected to contain LOAD_STACK_GUARD"); const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(Offset) .addReg(Reg); return true; } case PPC::DFLOADf32: case PPC::DFLOADf64: case PPC::DFSTOREf32: case PPC::DFSTOREf64: { assert(Subtarget.hasP9Vector() && "Invalid D-Form Pseudo-ops on Pre-P9 target."); assert(MI.getOperand(2).isReg() && isAnImmediateOperand(MI.getOperand(1)) && "D-form op must have register and immediate operands"); return expandVSXMemPseudo(MI); } case PPC::XFLOADf32: case PPC::XFSTOREf32: case PPC::LIWAX: case PPC::LIWZX: case PPC::STIWX: { assert(Subtarget.hasP8Vector() && "Invalid X-Form Pseudo-ops on Pre-P8 target."); assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() && "X-form op must have register and register operands"); return expandVSXMemPseudo(MI); } case PPC::XFLOADf64: case PPC::XFSTOREf64: { assert(Subtarget.hasVSX() && "Invalid X-Form Pseudo-ops on target that has no VSX."); assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() && "X-form op must have register and register operands"); return expandVSXMemPseudo(MI); } case PPC::SPILLTOVSR_LD: { Register TargetReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(TargetReg)) { MI.setDesc(get(PPC::DFLOADf64)); return expandPostRAPseudo(MI); } else MI.setDesc(get(PPC::LD)); return true; } case PPC::SPILLTOVSR_ST: { Register SrcReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(SrcReg)) { NumStoreSPILLVSRRCAsVec++; MI.setDesc(get(PPC::DFSTOREf64)); return expandPostRAPseudo(MI); } else { NumStoreSPILLVSRRCAsGpr++; MI.setDesc(get(PPC::STD)); } return true; } case PPC::SPILLTOVSR_LDX: { Register TargetReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(TargetReg)) MI.setDesc(get(PPC::LXSDX)); else MI.setDesc(get(PPC::LDX)); return true; } case PPC::SPILLTOVSR_STX: { Register SrcReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(SrcReg)) { NumStoreSPILLVSRRCAsVec++; MI.setDesc(get(PPC::STXSDX)); } else { NumStoreSPILLVSRRCAsGpr++; MI.setDesc(get(PPC::STDX)); } return true; } case PPC::CFENCE8: { auto Val = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val); BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP)) .addImm(PPC::PRED_NE_MINUS) .addReg(PPC::CR7) .addImm(1); MI.setDesc(get(PPC::ISYNC)); MI.RemoveOperand(0); return true; } } return false; } // Essentially a compile-time implementation of a compare->isel sequence. // It takes two constants to compare, along with the true/false registers // and the comparison type (as a subreg to a CR field) and returns one // of the true/false registers, depending on the comparison results. static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg) { // Signed comparisons. The immediates are assumed to be sign-extended. if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) { switch (CRSubReg) { default: llvm_unreachable("Unknown integer comparison type."); case PPC::sub_lt: return Imm1 < Imm2 ? TrueReg : FalseReg; case PPC::sub_gt: return Imm1 > Imm2 ? TrueReg : FalseReg; case PPC::sub_eq: return Imm1 == Imm2 ? TrueReg : FalseReg; } } // Unsigned comparisons. else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) { switch (CRSubReg) { default: llvm_unreachable("Unknown integer comparison type."); case PPC::sub_lt: return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg; case PPC::sub_gt: return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg; case PPC::sub_eq: return Imm1 == Imm2 ? TrueReg : FalseReg; } } return PPC::NoRegister; } void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const { assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG"); // Replace the REG with the Immediate. Register InUseReg = MI.getOperand(OpNo).getReg(); MI.getOperand(OpNo).ChangeToImmediate(Imm); if (MI.implicit_operands().empty()) return; // We need to make sure that the MI didn't have any implicit use // of this REG any more. const TargetRegisterInfo *TRI = &getRegisterInfo(); int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI); if (UseOpIdx >= 0) { MachineOperand &MO = MI.getOperand(UseOpIdx); if (MO.isImplicit()) // The operands must always be in the following order: // - explicit reg defs, // - other explicit operands (reg uses, immediates, etc.), // - implicit reg defs // - implicit reg uses // Therefore, removing the implicit operand won't change the explicit // operands layout. MI.RemoveOperand(UseOpIdx); } } // Replace an instruction with one that materializes a constant (and sets // CR0 if the original instruction was a record-form instruction). void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const { // Remove existing operands. int OperandToKeep = LII.SetCR ? 1 : 0; for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--) MI.RemoveOperand(i); // Replace the instruction. if (LII.SetCR) { MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); // Set the immediate. MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine); return; } else MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI)); // Set the immediate. MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(LII.Imm); } MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const { assert(!MI.getParent()->getParent()->getRegInfo().isSSA() && "Should be called after register allocation."); const TargetRegisterInfo *TRI = &getRegisterInfo(); MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI; It++; SeenIntermediateUse = false; for (; It != E; ++It) { if (It->modifiesRegister(Reg, TRI)) return &*It; if (It->readsRegister(Reg, TRI)) SeenIntermediateUse = true; } return nullptr; } MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineInstr &MI, unsigned &OpNoForForwarding, bool &SeenIntermediateUse) const { OpNoForForwarding = ~0U; MachineInstr *DefMI = nullptr; MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); const TargetRegisterInfo *TRI = &getRegisterInfo(); // If we're in SSA, get the defs through the MRI. Otherwise, only look // within the basic block to see if the register is defined using an // LI/LI8/ADDI/ADDI8. if (MRI->isSSA()) { for (int i = 1, e = MI.getNumOperands(); i < e; i++) { if (!MI.getOperand(i).isReg()) continue; Register Reg = MI.getOperand(i).getReg(); if (!Register::isVirtualRegister(Reg)) continue; unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI); if (Register::isVirtualRegister(TrueReg)) { DefMI = MRI->getVRegDef(TrueReg); if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8 || DefMI->getOpcode() == PPC::ADDI || DefMI->getOpcode() == PPC::ADDI8) { OpNoForForwarding = i; // The ADDI and LI operand maybe exist in one instruction at same // time. we prefer to fold LI operand as LI only has one Imm operand // and is more possible to be converted. So if current DefMI is // ADDI/ADDI8, we continue to find possible LI/LI8. if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) break; } } } } else { // Looking back through the definition for each operand could be expensive, // so exit early if this isn't an instruction that either has an immediate // form or is already an immediate form that we can handle. ImmInstrInfo III; unsigned Opc = MI.getOpcode(); bool ConvertibleImmForm = Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI || Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 || Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI || Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec || Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 || Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec; bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg()) ? isVFRegister(MI.getOperand(0).getReg()) : false; if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true)) return nullptr; // Don't convert or %X, %Y, %Y since that's just a register move. if ((Opc == PPC::OR || Opc == PPC::OR8) && MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) return nullptr; for (int i = 1, e = MI.getNumOperands(); i < e; i++) { MachineOperand &MO = MI.getOperand(i); SeenIntermediateUse = false; if (MO.isReg() && MO.isUse() && !MO.isImplicit()) { Register Reg = MI.getOperand(i).getReg(); // If we see another use of this reg between the def and the MI, // we want to flat it so the def isn't deleted. MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse); if (DefMI) { // Is this register defined by some form of add-immediate (including // load-immediate) within this basic block? switch (DefMI->getOpcode()) { default: break; case PPC::LI: case PPC::LI8: case PPC::ADDItocL: case PPC::ADDI: case PPC::ADDI8: OpNoForForwarding = i; return DefMI; } } } } } return OpNoForForwarding == ~0U ? nullptr : DefMI; } unsigned PPCInstrInfo::getSpillTarget() const { return Subtarget.hasP9Vector() ? 1 : 0; } const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const { return StoreSpillOpcodesArray[getSpillTarget()]; } const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const { return LoadSpillOpcodesArray[getSpillTarget()]; } void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, unsigned RegNo) const { + // Conservatively clear kill flag for the register if the instructions are in + // different basic blocks and in SSA form, because the kill flag may no longer + // be right. There is no need to bother with dead flags since defs with no + // uses will be handled by DCE. + MachineRegisterInfo &MRI = StartMI.getParent()->getParent()->getRegInfo(); + if (MRI.isSSA() && (StartMI.getParent() != EndMI.getParent())) { + MRI.clearKillFlags(RegNo); + return; + } // Instructions between [StartMI, EndMI] should be in same basic block. assert((StartMI.getParent() == EndMI.getParent()) && "Instructions are not in same basic block"); bool IsKillSet = false; auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) { MachineOperand &MO = MI.getOperand(Index); if (MO.isReg() && MO.isUse() && MO.isKill() && getRegisterInfo().regsOverlap(MO.getReg(), RegNo)) MO.setIsKill(false); }; // Set killed flag for EndMI. // No need to do anything if EndMI defines RegNo. int UseIndex = EndMI.findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo()); if (UseIndex != -1) { EndMI.getOperand(UseIndex).setIsKill(true); IsKillSet = true; // Clear killed flag for other EndMI operands related to RegNo. In some // upexpected cases, killed may be set multiple times for same register // operand in same MI. for (int i = 0, e = EndMI.getNumOperands(); i != e; ++i) if (i != UseIndex) clearOperandKillInfo(EndMI, i); } // Walking the inst in reverse order (EndMI -> StartMI]. MachineBasicBlock::reverse_iterator It = EndMI; MachineBasicBlock::reverse_iterator E = EndMI.getParent()->rend(); // EndMI has been handled above, skip it here. It++; MachineOperand *MO = nullptr; for (; It != E; ++It) { // Skip insturctions which could not be a def/use of RegNo. if (It->isDebugInstr() || It->isPosition()) continue; // Clear killed flag for all It operands related to RegNo. In some // upexpected cases, killed may be set multiple times for same register // operand in same MI. for (int i = 0, e = It->getNumOperands(); i != e; ++i) clearOperandKillInfo(*It, i); // If killed is not set, set killed for its last use or set dead for its def // if no use found. if (!IsKillSet) { if ((MO = It->findRegisterUseOperand(RegNo, false, &getRegisterInfo()))) { // Use found, set it killed. IsKillSet = true; MO->setIsKill(true); continue; } else if ((MO = It->findRegisterDefOperand(RegNo, false, true, &getRegisterInfo()))) { // No use found, set dead for its def. assert(&*It == &StartMI && "No new def between StartMI and EndMI."); MO->setIsDead(true); break; } } if ((&*It) == &StartMI) break; } // Ensure RegMo liveness is killed after EndMI. assert((IsKillSet || (MO && MO->isDead())) && "RegNo should be killed or dead"); } // This opt tries to convert the following imm form to an index form to save an // add for stack variables. // Return false if no such pattern found. // // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi // ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg // Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed) // // can be converted to: // // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm) // Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed) // // In order to eliminate ADD instr, make sure that: // 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in // new ADDI instr and ADDI can only take int16 Imm. // 2: ToBeChangedReg must be killed in ADD instr and there is no other use // between ADDI and ADD instr since its original def in ADDI will be changed // in new ADDI instr. And also there should be no new def for it between // ADD and Imm instr as ToBeChangedReg will be used in Index instr. // 3: ToBeDeletedReg must be killed in Imm instr and there is no other use // between ADD and Imm instr since ADD instr will be eliminated. // 4: ScaleReg must not be redefined between ADD and Imm instr since it will be // moved to Index instr. bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const { MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); bool PostRA = !MRI->isSSA(); // Do this opt after PEI which is after RA. The reason is stack slot expansion // in PEI may expose such opportunities since in PEI, stack slot offsets to // frame base(OffsetAddi) are determined. if (!PostRA) return false; unsigned ToBeDeletedReg = 0; int64_t OffsetImm = 0; unsigned XFormOpcode = 0; ImmInstrInfo III; // Check if Imm instr meets requirement. if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm, III)) return false; bool OtherIntermediateUse = false; MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse); // Exit if there is other use between ADD and Imm instr or no def found. if (OtherIntermediateUse || !ADDMI) return false; // Check if ADD instr meets requirement. if (!isADDInstrEligibleForFolding(*ADDMI)) return false; unsigned ScaleRegIdx = 0; int64_t OffsetAddi = 0; MachineInstr *ADDIMI = nullptr; // Check if there is a valid ToBeChangedReg in ADDMI. // 1: It must be killed. // 2: Its definition must be a valid ADDIMI. // 3: It must satify int16 offset requirement. if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm)) ScaleRegIdx = 2; else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm)) ScaleRegIdx = 1; else return false; assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg."); unsigned ToBeChangedReg = ADDIMI->getOperand(0).getReg(); unsigned ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg(); auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End) { for (auto It = ++Start; It != End; It++) if (It->modifiesRegister(Reg, &getRegisterInfo())) return true; return false; }; // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is // treated as special zero when ScaleReg is R0/X0 register. if (III.ZeroIsSpecialOrig == III.ImmOpNo && (ScaleReg == PPC::R0 || ScaleReg == PPC::X0)) return false; // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr // and Imm Instr. if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI)) return false; // Now start to do the transformation. LLVM_DEBUG(dbgs() << "Replace instruction: " << "\n"); LLVM_DEBUG(ADDIMI->dump()); LLVM_DEBUG(ADDMI->dump()); LLVM_DEBUG(MI.dump()); LLVM_DEBUG(dbgs() << "with: " << "\n"); // Update ADDI instr. ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm); // Update Imm instr. MI.setDesc(get(XFormOpcode)); MI.getOperand(III.ImmOpNo) .ChangeToRegister(ScaleReg, false, false, ADDMI->getOperand(ScaleRegIdx).isKill()); MI.getOperand(III.OpNoForForwarding) .ChangeToRegister(ToBeChangedReg, false, false, true); // Eliminate ADD instr. ADDMI->eraseFromParent(); LLVM_DEBUG(ADDIMI->dump()); LLVM_DEBUG(MI.dump()); return true; } bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const { unsigned Opc = ADDIMI.getOpcode(); // Exit if the instruction is not ADDI. if (Opc != PPC::ADDI && Opc != PPC::ADDI8) return false; // The operand may not necessarily be an immediate - it could be a relocation. if (!ADDIMI.getOperand(2).isImm()) return false; Imm = ADDIMI.getOperand(2).getImm(); return true; } bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const { unsigned Opc = ADDMI.getOpcode(); // Exit if the instruction is not ADD. return Opc == PPC::ADD4 || Opc == PPC::ADD8; } bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &ToBeDeletedReg, unsigned &XFormOpcode, int64_t &OffsetImm, ImmInstrInfo &III) const { // Only handle load/store. if (!MI.mayLoadOrStore()) return false; unsigned Opc = MI.getOpcode(); XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc); // Exit if instruction has no index form. if (XFormOpcode == PPC::INSTRUCTION_LIST_END) return false; // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap. if (!instrHasImmForm(XFormOpcode, isVFRegister(MI.getOperand(0).getReg()), III, true)) return false; if (!III.IsSummingOperands) return false; MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo); MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding); // Only support imm operands, not relocation slots or others. if (!ImmOperand.isImm()) return false; assert(RegOperand.isReg() && "Instruction format is not right"); // There are other use for ToBeDeletedReg after Imm instr, can not delete it. if (!RegOperand.isKill()) return false; ToBeDeletedReg = RegOperand.getReg(); OffsetImm = ImmOperand.getImm(); return true; } bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const { assert((Index == 1 || Index == 2) && "Invalid operand index for add."); MachineOperand &MO = ADDMI->getOperand(Index); if (!MO.isKill()) return false; bool OtherIntermediateUse = false; ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse); // Currently handle only one "add + Imminstr" pair case, exit if other // intermediate use for ToBeChangedReg found. // TODO: handle the cases where there are other "add + Imminstr" pairs // with same offset in Imminstr which is like: // // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed) // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed) // // can be converted to: // // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, // (OffsetAddi + OffsetImm) // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed) if (OtherIntermediateUse || !ADDIMI) return false; // Check if ADDI instr meets requirement. if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi)) return false; if (isInt<16>(OffsetAddi + OffsetImm)) return true; return false; } // If this instruction has an immediate form and one of its operands is a // result of a load-immediate or an add-immediate, convert it to // the immediate form if the constant is in range. bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef) const { MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); bool PostRA = !MRI->isSSA(); bool SeenIntermediateUse = true; unsigned ForwardingOperand = ~0U; MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand, SeenIntermediateUse); if (!DefMI) return false; assert(ForwardingOperand < MI.getNumOperands() && "The forwarding operand needs to be valid at this point"); bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill(); bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled; if (KilledDef && KillFwdDefMI) *KilledDef = DefMI; // If this is a imm instruction and its register operands is produced by ADDI, // put the imm into imm inst directly. if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) != PPC::INSTRUCTION_LIST_END && transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand)) return true; ImmInstrInfo III; bool IsVFReg = MI.getOperand(0).isReg() ? isVFRegister(MI.getOperand(0).getReg()) : false; bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA); // If this is a reg+reg instruction that has a reg+imm form, // and one of the operands is produced by an add-immediate, // try to convert it. if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI, KillFwdDefMI)) return true; // If this is a reg+reg instruction that has a reg+imm form, // and one of the operands is produced by LI, convert it now. if (HasImmForm && transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI)) return true; // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI // can be simpified to LI. if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef)) return true; return false; } bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const { // The vast majority of the instructions would need their operand 2 replaced // with an immediate when switching to the reg+imm form. A marked exception // are the update form loads/stores for which a constant operand 2 would need // to turn into a displacement and move operand 1 to the operand 2 position. III.ImmOpNo = 2; III.OpNoForForwarding = 2; III.ImmWidth = 16; III.ImmMustBeMultipleOf = 1; III.TruncateImmTo = 0; III.IsSummingOperands = false; switch (Opc) { default: return false; case PPC::ADD4: case PPC::ADD8: III.SignedImm = true; III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 1; III.IsCommutative = true; III.IsSummingOperands = true; III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8; break; case PPC::ADDC: case PPC::ADDC8: III.SignedImm = true; III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = true; III.IsSummingOperands = true; III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8; break; case PPC::ADDC_rec: III.SignedImm = true; III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = true; III.IsSummingOperands = true; III.ImmOpcode = PPC::ADDIC_rec; break; case PPC::SUBFC: case PPC::SUBFC8: III.SignedImm = true; III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = false; III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8; break; case PPC::CMPW: case PPC::CMPD: III.SignedImm = true; III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = false; III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI; break; case PPC::CMPLW: case PPC::CMPLD: III.SignedImm = false; III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = false; III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI; break; case PPC::AND_rec: case PPC::AND8_rec: case PPC::OR: case PPC::OR8: case PPC::XOR: case PPC::XOR8: III.SignedImm = false; III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = true; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::AND_rec: III.ImmOpcode = PPC::ANDI_rec; break; case PPC::AND8_rec: III.ImmOpcode = PPC::ANDI8_rec; break; case PPC::OR: III.ImmOpcode = PPC::ORI; break; case PPC::OR8: III.ImmOpcode = PPC::ORI8; break; case PPC::XOR: III.ImmOpcode = PPC::XORI; break; case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break; } break; case PPC::RLWNM: case PPC::RLWNM8: case PPC::RLWNM_rec: case PPC::RLWNM8_rec: case PPC::SLW: case PPC::SLW8: case PPC::SLW_rec: case PPC::SLW8_rec: case PPC::SRW: case PPC::SRW8: case PPC::SRW_rec: case PPC::SRW8_rec: case PPC::SRAW: case PPC::SRAW_rec: III.SignedImm = false; III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = false; // This isn't actually true, but the instructions ignore any of the // upper bits, so any immediate loaded with an LI is acceptable. // This does not apply to shift right algebraic because a value // out of range will produce a -1/0. III.ImmWidth = 16; if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec || Opc == PPC::RLWNM8_rec) III.TruncateImmTo = 5; else III.TruncateImmTo = 6; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break; case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break; case PPC::RLWNM_rec: III.ImmOpcode = PPC::RLWINM_rec; break; case PPC::RLWNM8_rec: III.ImmOpcode = PPC::RLWINM8_rec; break; case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break; case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break; case PPC::SLW_rec: III.ImmOpcode = PPC::RLWINM_rec; break; case PPC::SLW8_rec: III.ImmOpcode = PPC::RLWINM8_rec; break; case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break; case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break; case PPC::SRW_rec: III.ImmOpcode = PPC::RLWINM_rec; break; case PPC::SRW8_rec: III.ImmOpcode = PPC::RLWINM8_rec; break; case PPC::SRAW: III.ImmWidth = 5; III.TruncateImmTo = 0; III.ImmOpcode = PPC::SRAWI; break; case PPC::SRAW_rec: III.ImmWidth = 5; III.TruncateImmTo = 0; III.ImmOpcode = PPC::SRAWI_rec; break; } break; case PPC::RLDCL: case PPC::RLDCL_rec: case PPC::RLDCR: case PPC::RLDCR_rec: case PPC::SLD: case PPC::SLD_rec: case PPC::SRD: case PPC::SRD_rec: case PPC::SRAD: case PPC::SRAD_rec: III.SignedImm = false; III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = false; // This isn't actually true, but the instructions ignore any of the // upper bits, so any immediate loaded with an LI is acceptable. // This does not apply to shift right algebraic because a value // out of range will produce a -1/0. III.ImmWidth = 16; if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR || Opc == PPC::RLDCR_rec) III.TruncateImmTo = 6; else III.TruncateImmTo = 7; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break; case PPC::RLDCL_rec: III.ImmOpcode = PPC::RLDICL_rec; break; case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break; case PPC::RLDCR_rec: III.ImmOpcode = PPC::RLDICR_rec; break; case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break; case PPC::SLD_rec: III.ImmOpcode = PPC::RLDICR_rec; break; case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break; case PPC::SRD_rec: III.ImmOpcode = PPC::RLDICL_rec; break; case PPC::SRAD: III.ImmWidth = 6; III.TruncateImmTo = 0; III.ImmOpcode = PPC::SRADI; break; case PPC::SRAD_rec: III.ImmWidth = 6; III.TruncateImmTo = 0; III.ImmOpcode = PPC::SRADI_rec; break; } break; // Loads and stores: case PPC::LBZX: case PPC::LBZX8: case PPC::LHZX: case PPC::LHZX8: case PPC::LHAX: case PPC::LHAX8: case PPC::LWZX: case PPC::LWZX8: case PPC::LWAX: case PPC::LDX: case PPC::LFSX: case PPC::LFDX: case PPC::STBX: case PPC::STBX8: case PPC::STHX: case PPC::STHX8: case PPC::STWX: case PPC::STWX8: case PPC::STDX: case PPC::STFSX: case PPC::STFDX: III.SignedImm = true; III.ZeroIsSpecialOrig = 1; III.ZeroIsSpecialNew = 2; III.IsCommutative = true; III.IsSummingOperands = true; III.ImmOpNo = 1; III.OpNoForForwarding = 2; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break; case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break; case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break; case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break; case PPC::LHAX: III.ImmOpcode = PPC::LHA; break; case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break; case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break; case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break; case PPC::LWAX: III.ImmOpcode = PPC::LWA; III.ImmMustBeMultipleOf = 4; break; case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break; case PPC::LFSX: III.ImmOpcode = PPC::LFS; break; case PPC::LFDX: III.ImmOpcode = PPC::LFD; break; case PPC::STBX: III.ImmOpcode = PPC::STB; break; case PPC::STBX8: III.ImmOpcode = PPC::STB8; break; case PPC::STHX: III.ImmOpcode = PPC::STH; break; case PPC::STHX8: III.ImmOpcode = PPC::STH8; break; case PPC::STWX: III.ImmOpcode = PPC::STW; break; case PPC::STWX8: III.ImmOpcode = PPC::STW8; break; case PPC::STDX: III.ImmOpcode = PPC::STD; III.ImmMustBeMultipleOf = 4; break; case PPC::STFSX: III.ImmOpcode = PPC::STFS; break; case PPC::STFDX: III.ImmOpcode = PPC::STFD; break; } break; case PPC::LBZUX: case PPC::LBZUX8: case PPC::LHZUX: case PPC::LHZUX8: case PPC::LHAUX: case PPC::LHAUX8: case PPC::LWZUX: case PPC::LWZUX8: case PPC::LDUX: case PPC::LFSUX: case PPC::LFDUX: case PPC::STBUX: case PPC::STBUX8: case PPC::STHUX: case PPC::STHUX8: case PPC::STWUX: case PPC::STWUX8: case PPC::STDUX: case PPC::STFSUX: case PPC::STFDUX: III.SignedImm = true; III.ZeroIsSpecialOrig = 2; III.ZeroIsSpecialNew = 3; III.IsCommutative = false; III.IsSummingOperands = true; III.ImmOpNo = 2; III.OpNoForForwarding = 3; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break; case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break; case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break; case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break; case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break; case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break; case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break; case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break; case PPC::LDUX: III.ImmOpcode = PPC::LDU; III.ImmMustBeMultipleOf = 4; break; case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break; case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break; case PPC::STBUX: III.ImmOpcode = PPC::STBU; break; case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break; case PPC::STHUX: III.ImmOpcode = PPC::STHU; break; case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break; case PPC::STWUX: III.ImmOpcode = PPC::STWU; break; case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break; case PPC::STDUX: III.ImmOpcode = PPC::STDU; III.ImmMustBeMultipleOf = 4; break; case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break; case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break; } break; // Power9 and up only. For some of these, the X-Form version has access to all // 64 VSR's whereas the D-Form only has access to the VR's. We replace those // with pseudo-ops pre-ra and for post-ra, we check that the register loaded // into or stored from is one of the VR registers. case PPC::LXVX: case PPC::LXSSPX: case PPC::LXSDX: case PPC::STXVX: case PPC::STXSSPX: case PPC::STXSDX: case PPC::XFLOADf32: case PPC::XFLOADf64: case PPC::XFSTOREf32: case PPC::XFSTOREf64: if (!Subtarget.hasP9Vector()) return false; III.SignedImm = true; III.ZeroIsSpecialOrig = 1; III.ZeroIsSpecialNew = 2; III.IsCommutative = true; III.IsSummingOperands = true; III.ImmOpNo = 1; III.OpNoForForwarding = 2; III.ImmMustBeMultipleOf = 4; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::LXVX: III.ImmOpcode = PPC::LXV; III.ImmMustBeMultipleOf = 16; break; case PPC::LXSSPX: if (PostRA) { if (IsVFReg) III.ImmOpcode = PPC::LXSSP; else { III.ImmOpcode = PPC::LFS; III.ImmMustBeMultipleOf = 1; } break; } LLVM_FALLTHROUGH; case PPC::XFLOADf32: III.ImmOpcode = PPC::DFLOADf32; break; case PPC::LXSDX: if (PostRA) { if (IsVFReg) III.ImmOpcode = PPC::LXSD; else { III.ImmOpcode = PPC::LFD; III.ImmMustBeMultipleOf = 1; } break; } LLVM_FALLTHROUGH; case PPC::XFLOADf64: III.ImmOpcode = PPC::DFLOADf64; break; case PPC::STXVX: III.ImmOpcode = PPC::STXV; III.ImmMustBeMultipleOf = 16; break; case PPC::STXSSPX: if (PostRA) { if (IsVFReg) III.ImmOpcode = PPC::STXSSP; else { III.ImmOpcode = PPC::STFS; III.ImmMustBeMultipleOf = 1; } break; } LLVM_FALLTHROUGH; case PPC::XFSTOREf32: III.ImmOpcode = PPC::DFSTOREf32; break; case PPC::STXSDX: if (PostRA) { if (IsVFReg) III.ImmOpcode = PPC::STXSD; else { III.ImmOpcode = PPC::STFD; III.ImmMustBeMultipleOf = 1; } break; } LLVM_FALLTHROUGH; case PPC::XFSTOREf64: III.ImmOpcode = PPC::DFSTOREf64; break; } break; } return true; } // Utility function for swaping two arbitrary operands of an instruction. static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) { assert(Op1 != Op2 && "Cannot swap operand with itself."); unsigned MaxOp = std::max(Op1, Op2); unsigned MinOp = std::min(Op1, Op2); MachineOperand MOp1 = MI.getOperand(MinOp); MachineOperand MOp2 = MI.getOperand(MaxOp); MI.RemoveOperand(std::max(Op1, Op2)); MI.RemoveOperand(std::min(Op1, Op2)); // If the operands we are swapping are the two at the end (the common case) // we can just remove both and add them in the opposite order. if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) { MI.addOperand(MOp2); MI.addOperand(MOp1); } else { // Store all operands in a temporary vector, remove them and re-add in the // right order. SmallVector MOps; unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops. for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) { MOps.push_back(MI.getOperand(i)); MI.RemoveOperand(i); } // MOp2 needs to be added next. MI.addOperand(MOp2); // Now add the rest. for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) { if (i == MaxOp) MI.addOperand(MOp1); else { MI.addOperand(MOps.back()); MOps.pop_back(); } } } } // Check if the 'MI' that has the index OpNoForForwarding // meets the requirement described in the ImmInstrInfo. bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding ) const { // As the algorithm of checking for PPC::ZERO/PPC::ZERO8 // would not work pre-RA, we can only do the check post RA. MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); if (MRI.isSSA()) return false; // Cannot do the transform if MI isn't summing the operands. if (!III.IsSummingOperands) return false; // The instruction we are trying to replace must have the ZeroIsSpecialOrig set. if (!III.ZeroIsSpecialOrig) return false; // We cannot do the transform if the operand we are trying to replace // isn't the same as the operand the instruction allows. if (OpNoForForwarding != III.OpNoForForwarding) return false; // Check if the instruction we are trying to transform really has // the special zero register as its operand. if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO && MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8) return false; // This machine instruction is convertible if it is, // 1. summing the operands. // 2. one of the operands is special zero register. // 3. the operand we are trying to replace is allowed by the MI. return true; } // Check if the DefMI is the add inst and set the ImmMO and RegMO // accordingly. bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI, const ImmInstrInfo &III, MachineOperand *&ImmMO, MachineOperand *&RegMO) const { unsigned Opc = DefMI.getOpcode(); if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8) return false; assert(DefMI.getNumOperands() >= 3 && "Add inst must have at least three operands"); RegMO = &DefMI.getOperand(1); ImmMO = &DefMI.getOperand(2); // Before RA, ADDI first operand could be a frame index. if (!RegMO->isReg()) return false; // This DefMI is elgible for forwarding if it is: // 1. add inst // 2. one of the operands is Imm/CPI/Global. return isAnImmediateOperand(*ImmMO); } bool PPCInstrInfo::isRegElgibleForForwarding( const MachineOperand &RegMO, const MachineInstr &DefMI, const MachineInstr &MI, bool KillDefMI, bool &IsFwdFeederRegKilled) const { // x = addi y, imm // ... // z = lfdx 0, x -> z = lfd imm(y) // The Reg "y" can be forwarded to the MI(z) only when there is no DEF // of "y" between the DEF of "x" and "z". // The query is only valid post RA. const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); if (MRI.isSSA()) return false; Register Reg = RegMO.getReg(); // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg. MachineBasicBlock::const_reverse_iterator It = MI; MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend(); It++; for (; It != E; ++It) { if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) return false; else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) IsFwdFeederRegKilled = true; // Made it to DefMI without encountering a clobber. if ((&*It) == &DefMI) break; } assert((&*It) == &DefMI && "DefMI is missing"); // If DefMI also defines the register to be forwarded, we can only forward it // if DefMI is being erased. if (DefMI.modifiesRegister(Reg, &getRegisterInfo())) return KillDefMI; return true; } bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO, const MachineInstr &DefMI, const ImmInstrInfo &III, int64_t &Imm, int64_t BaseImm) const { assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate"); if (DefMI.getOpcode() == PPC::ADDItocL) { // The operand for ADDItocL is CPI, which isn't imm at compiling time, // However, we know that, it is 16-bit width, and has the alignment of 4. // Check if the instruction met the requirement. if (III.ImmMustBeMultipleOf > 4 || III.TruncateImmTo || III.ImmWidth != 16) return false; // Going from XForm to DForm loads means that the displacement needs to be // not just an immediate but also a multiple of 4, or 16 depending on the // load. A DForm load cannot be represented if it is a multiple of say 2. // XForm loads do not have this restriction. if (ImmMO.isGlobal()) { const DataLayout &DL = ImmMO.getGlobal()->getParent()->getDataLayout(); if (ImmMO.getGlobal()->getPointerAlignment(DL) < III.ImmMustBeMultipleOf) return false; } return true; } if (ImmMO.isImm()) { // It is Imm, we need to check if the Imm fit the range. // Sign-extend to 64-bits. // DefMI may be folded with another imm form instruction, the result Imm is // the sum of Imm of DefMI and BaseImm which is from imm form instruction. Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm); if (Imm % III.ImmMustBeMultipleOf) return false; if (III.TruncateImmTo) Imm &= ((1 << III.TruncateImmTo) - 1); if (III.SignedImm) { APInt ActualValue(64, Imm, true); if (!ActualValue.isSignedIntN(III.ImmWidth)) return false; } else { uint64_t UnsignedMax = (1 << III.ImmWidth) - 1; if ((uint64_t)Imm > UnsignedMax) return false; } } else return false; // This ImmMO is forwarded if it meets the requriement describle // in ImmInstrInfo return true; } bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding, MachineInstr **KilledDef) const { if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) || !DefMI.getOperand(1).isImm()) return false; MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); bool PostRA = !MRI->isSSA(); int64_t Immediate = DefMI.getOperand(1).getImm(); // Sign-extend to 64-bits. int64_t SExtImm = SignExtend64<16>(Immediate); bool IsForwardingOperandKilled = MI.getOperand(OpNoForForwarding).isKill(); Register ForwardingOperandReg = MI.getOperand(OpNoForForwarding).getReg(); bool ReplaceWithLI = false; bool Is64BitLI = false; int64_t NewImm = 0; bool SetCR = false; unsigned Opc = MI.getOpcode(); switch (Opc) { default: return false; // FIXME: Any branches conditional on such a comparison can be made // unconditional. At this time, this happens too infrequently to be worth // the implementation effort, but if that ever changes, we could convert // such a pattern here. case PPC::CMPWI: case PPC::CMPLWI: case PPC::CMPDI: case PPC::CMPLDI: { // Doing this post-RA would require dataflow analysis to reliably find uses // of the CR register set by the compare. // No need to fixup killed/dead flag since this transformation is only valid // before RA. if (PostRA) return false; // If a compare-immediate is fed by an immediate and is itself an input of // an ISEL (the most common case) into a COPY of the correct register. bool Changed = false; Register DefReg = MI.getOperand(0).getReg(); int64_t Comparand = MI.getOperand(2).getImm(); int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ? (Comparand | 0xFFFFFFFFFFFF0000) : Comparand; for (auto &CompareUseMI : MRI->use_instructions(DefReg)) { unsigned UseOpc = CompareUseMI.getOpcode(); if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8) continue; unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg(); Register TrueReg = CompareUseMI.getOperand(1).getReg(); Register FalseReg = CompareUseMI.getOperand(2).getReg(); unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg); if (RegToCopy == PPC::NoRegister) continue; // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0. if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) { CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI)); replaceInstrOperandWithImm(CompareUseMI, 1, 0); CompareUseMI.RemoveOperand(3); CompareUseMI.RemoveOperand(2); continue; } LLVM_DEBUG( dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n"); LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump()); LLVM_DEBUG(dbgs() << "Is converted to:\n"); // Convert to copy and remove unneeded operands. CompareUseMI.setDesc(get(PPC::COPY)); CompareUseMI.RemoveOperand(3); CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1); CmpIselsConverted++; Changed = true; LLVM_DEBUG(CompareUseMI.dump()); } if (Changed) return true; // This may end up incremented multiple times since this function is called // during a fixed-point transformation, but it is only meant to indicate the // presence of this opportunity. MissedConvertibleImmediateInstrs++; return false; } // Immediate forms - may simply be convertable to an LI. case PPC::ADDI: case PPC::ADDI8: { // Does the sum fit in a 16-bit signed field? int64_t Addend = MI.getOperand(2).getImm(); if (isInt<16>(Addend + SExtImm)) { ReplaceWithLI = true; Is64BitLI = Opc == PPC::ADDI8; NewImm = Addend + SExtImm; break; } return false; } case PPC::RLDICL: case PPC::RLDICL_rec: case PPC::RLDICL_32: case PPC::RLDICL_32_64: { // Use APInt's rotate function. int64_t SH = MI.getOperand(2).getImm(); int64_t MB = MI.getOperand(3).getImm(); APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32, SExtImm, true); InVal = InVal.rotl(SH); uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1; InVal &= Mask; // Can't replace negative values with an LI as that will sign-extend // and not clear the left bits. If we're setting the CR bit, we will use // ANDI_rec which won't sign extend, so that's safe. if (isUInt<15>(InVal.getSExtValue()) || (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) { ReplaceWithLI = true; Is64BitLI = Opc != PPC::RLDICL_32; NewImm = InVal.getSExtValue(); SetCR = Opc == PPC::RLDICL_rec; break; } return false; } case PPC::RLWINM: case PPC::RLWINM8: case PPC::RLWINM_rec: case PPC::RLWINM8_rec: { int64_t SH = MI.getOperand(2).getImm(); int64_t MB = MI.getOperand(3).getImm(); int64_t ME = MI.getOperand(4).getImm(); APInt InVal(32, SExtImm, true); InVal = InVal.rotl(SH); APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB); InVal &= Mask; // Can't replace negative values with an LI as that will sign-extend // and not clear the left bits. If we're setting the CR bit, we will use // ANDI_rec which won't sign extend, so that's safe. bool ValueFits = isUInt<15>(InVal.getSExtValue()); ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) && isUInt<16>(InVal.getSExtValue())); if (ValueFits) { ReplaceWithLI = true; Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec; NewImm = InVal.getSExtValue(); SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec; break; } return false; } case PPC::ORI: case PPC::ORI8: case PPC::XORI: case PPC::XORI8: { int64_t LogicalImm = MI.getOperand(2).getImm(); int64_t Result = 0; if (Opc == PPC::ORI || Opc == PPC::ORI8) Result = LogicalImm | SExtImm; else Result = LogicalImm ^ SExtImm; if (isInt<16>(Result)) { ReplaceWithLI = true; Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8; NewImm = Result; break; } return false; } } if (ReplaceWithLI) { // We need to be careful with CR-setting instructions we're replacing. if (SetCR) { // We don't know anything about uses when we're out of SSA, so only // replace if the new immediate will be reproduced. bool ImmChanged = (SExtImm & NewImm) != NewImm; if (PostRA && ImmChanged) return false; if (!PostRA) { // If the defining load-immediate has no other uses, we can just replace // the immediate with the new immediate. if (MRI->hasOneUse(DefMI.getOperand(0).getReg())) DefMI.getOperand(1).setImm(NewImm); // If we're not using the GPR result of the CR-setting instruction, we // just need to and with zero/non-zero depending on the new immediate. else if (MRI->use_empty(MI.getOperand(0).getReg())) { if (NewImm) { assert(Immediate && "Transformation converted zero to non-zero?"); NewImm = Immediate; } } else if (ImmChanged) return false; } } LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); LLVM_DEBUG(MI.dump()); LLVM_DEBUG(dbgs() << "Fed by:\n"); LLVM_DEBUG(DefMI.dump()); LoadImmediateInfo LII; LII.Imm = NewImm; LII.Is64Bit = Is64BitLI; LII.SetCR = SetCR; // If we're setting the CR, the original load-immediate must be kept (as an // operand to ANDI_rec/ANDI8_rec). if (KilledDef && SetCR) *KilledDef = nullptr; replaceInstrWithLI(MI, LII); // Fixup killed/dead flag after transformation. // Pattern: // ForwardingOperandReg = LI imm1 // y = op2 imm2, ForwardingOperandReg(killed) if (IsForwardingOperandKilled) fixupIsDeadOrKill(DefMI, MI, ForwardingOperandReg); LLVM_DEBUG(dbgs() << "With:\n"); LLVM_DEBUG(MI.dump()); return true; } return false; } bool PPCInstrInfo::transformToNewImmFormFedByAdd( MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const { MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); bool PostRA = !MRI->isSSA(); // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI // for post-ra. if (PostRA) return false; // Only handle load/store. if (!MI.mayLoadOrStore()) return false; unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode()); assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) && "MI must have x-form opcode"); // get Imm Form info. ImmInstrInfo III; bool IsVFReg = MI.getOperand(0).isReg() ? isVFRegister(MI.getOperand(0).getReg()) : false; if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA)) return false; if (!III.IsSummingOperands) return false; if (OpNoForForwarding != III.OpNoForForwarding) return false; MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo); if (!ImmOperandMI.isImm()) return false; // Check DefMI. MachineOperand *ImmMO = nullptr; MachineOperand *RegMO = nullptr; if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO)) return false; assert(ImmMO && RegMO && "Imm and Reg operand must have been set"); // Check Imm. // Set ImmBase from imm instruction as base and get new Imm inside // isImmElgibleForForwarding. int64_t ImmBase = ImmOperandMI.getImm(); int64_t Imm = 0; if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase)) return false; // Get killed info in case fixup needed after transformation. unsigned ForwardKilledOperandReg = ~0U; if (MI.getOperand(III.OpNoForForwarding).isKill()) ForwardKilledOperandReg = MI.getOperand(III.OpNoForForwarding).getReg(); // Do the transform LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); LLVM_DEBUG(MI.dump()); LLVM_DEBUG(dbgs() << "Fed by:\n"); LLVM_DEBUG(DefMI.dump()); MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg()); MI.getOperand(III.OpNoForForwarding).setIsKill(RegMO->isKill()); MI.getOperand(III.ImmOpNo).setImm(Imm); // FIXME: fix kill/dead flag if MI and DefMI are not in same basic block. if (DefMI.getParent() == MI.getParent()) { // Check if reg is killed between MI and DefMI. auto IsKilledFor = [&](unsigned Reg) { MachineBasicBlock::const_reverse_iterator It = MI; MachineBasicBlock::const_reverse_iterator E = DefMI; It++; for (; It != E; ++It) { if (It->killsRegister(Reg)) return true; } return false; }; // Update kill flag if (RegMO->isKill() || IsKilledFor(RegMO->getReg())) fixupIsDeadOrKill(DefMI, MI, RegMO->getReg()); if (ForwardKilledOperandReg != ~0U) fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg); } LLVM_DEBUG(dbgs() << "With:\n"); LLVM_DEBUG(MI.dump()); return true; } // If an X-Form instruction is fed by an add-immediate and one of its operands // is the literal zero, attempt to forward the source of the add-immediate to // the corresponding D-Form instruction with the displacement coming from // the immediate being added. bool PPCInstrInfo::transformToImmFormFedByAdd( MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding, MachineInstr &DefMI, bool KillDefMI) const { // RegMO ImmMO // | | // x = addi reg, imm <----- DefMI // y = op 0 , x <----- MI // | // OpNoForForwarding // Check if the MI meet the requirement described in the III. if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding)) return false; // Check if the DefMI meet the requirement // described in the III. If yes, set the ImmMO and RegMO accordingly. MachineOperand *ImmMO = nullptr; MachineOperand *RegMO = nullptr; if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO)) return false; assert(ImmMO && RegMO && "Imm and Reg operand must have been set"); // As we get the Imm operand now, we need to check if the ImmMO meet // the requirement described in the III. If yes set the Imm. int64_t Imm = 0; if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm)) return false; bool IsFwdFeederRegKilled = false; // Check if the RegMO can be forwarded to MI. if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI, IsFwdFeederRegKilled)) return false; // Get killed info in case fixup needed after transformation. unsigned ForwardKilledOperandReg = ~0U; MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); bool PostRA = !MRI.isSSA(); if (PostRA && MI.getOperand(OpNoForForwarding).isKill()) ForwardKilledOperandReg = MI.getOperand(OpNoForForwarding).getReg(); // We know that, the MI and DefMI both meet the pattern, and // the Imm also meet the requirement with the new Imm-form. // It is safe to do the transformation now. LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); LLVM_DEBUG(MI.dump()); LLVM_DEBUG(dbgs() << "Fed by:\n"); LLVM_DEBUG(DefMI.dump()); // Update the base reg first. MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(), false, false, RegMO->isKill()); // Then, update the imm. if (ImmMO->isImm()) { // If the ImmMO is Imm, change the operand that has ZERO to that Imm // directly. replaceInstrOperandWithImm(MI, III.ZeroIsSpecialOrig, Imm); } else { // Otherwise, it is Constant Pool Index(CPI) or Global, // which is relocation in fact. We need to replace the special zero // register with ImmMO. // Before that, we need to fixup the target flags for imm. // For some reason, we miss to set the flag for the ImmMO if it is CPI. if (DefMI.getOpcode() == PPC::ADDItocL) ImmMO->setTargetFlags(PPCII::MO_TOC_LO); // MI didn't have the interface such as MI.setOperand(i) though // it has MI.getOperand(i). To repalce the ZERO MachineOperand with // ImmMO, we need to remove ZERO operand and all the operands behind it, // and, add the ImmMO, then, move back all the operands behind ZERO. SmallVector MOps; for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) { MOps.push_back(MI.getOperand(i)); MI.RemoveOperand(i); } // Remove the last MO in the list, which is ZERO operand in fact. MOps.pop_back(); // Add the imm operand. MI.addOperand(*ImmMO); // Now add the rest back. for (auto &MO : MOps) MI.addOperand(MO); } // Update the opcode. MI.setDesc(get(III.ImmOpcode)); // Fix up killed/dead flag after transformation. // Pattern 1: // x = ADD KilledFwdFeederReg, imm // n = opn KilledFwdFeederReg(killed), regn // y = XOP 0, x // Pattern 2: // x = ADD reg(killed), imm // y = XOP 0, x if (IsFwdFeederRegKilled || RegMO->isKill()) fixupIsDeadOrKill(DefMI, MI, RegMO->getReg()); // Pattern 3: // ForwardKilledOperandReg = ADD reg, imm // y = XOP 0, ForwardKilledOperandReg(killed) if (ForwardKilledOperandReg != ~0U) fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg); LLVM_DEBUG(dbgs() << "With:\n"); LLVM_DEBUG(MI.dump()); return true; } bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III, unsigned ConstantOpNo, MachineInstr &DefMI) const { // DefMI must be LI or LI8. if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) || !DefMI.getOperand(1).isImm()) return false; // Get Imm operand and Sign-extend to 64-bits. int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm()); MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); bool PostRA = !MRI.isSSA(); // Exit early if we can't convert this. if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative) return false; if (Imm % III.ImmMustBeMultipleOf) return false; if (III.TruncateImmTo) Imm &= ((1 << III.TruncateImmTo) - 1); if (III.SignedImm) { APInt ActualValue(64, Imm, true); if (!ActualValue.isSignedIntN(III.ImmWidth)) return false; } else { uint64_t UnsignedMax = (1 << III.ImmWidth) - 1; if ((uint64_t)Imm > UnsignedMax) return false; } // If we're post-RA, the instructions don't agree on whether register zero is // special, we can transform this as long as the register operand that will // end up in the location where zero is special isn't R0. if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) { unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig : III.ZeroIsSpecialNew + 1; Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg(); Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg(); // If R0 is in the operand where zero is special for the new instruction, // it is unsafe to transform if the constant operand isn't that operand. if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) && ConstantOpNo != III.ZeroIsSpecialNew) return false; if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) && ConstantOpNo != PosForOrigZero) return false; } // Get killed info in case fixup needed after transformation. unsigned ForwardKilledOperandReg = ~0U; if (PostRA && MI.getOperand(ConstantOpNo).isKill()) ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg(); unsigned Opc = MI.getOpcode(); bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec || Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SLW8 || Opc == PPC::SLW8_rec || Opc == PPC::SRW8 || Opc == PPC::SRW8_rec; bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec || Opc == PPC::SRD || Opc == PPC::SRD_rec; bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec || Opc == PPC::SLD_rec || Opc == PPC::SRD_rec; bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD || Opc == PPC::SRD_rec; MI.setDesc(get(III.ImmOpcode)); if (ConstantOpNo == III.OpNoForForwarding) { // Converting shifts to immediate form is a bit tricky since they may do // one of three things: // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero // 2. If the shift amount is zero, the result is unchanged (save for maybe // setting CR0) // 3. If the shift amount is in [1, OpSize), it's just a shift if (SpecialShift32 || SpecialShift64) { LoadImmediateInfo LII; LII.Imm = 0; LII.SetCR = SetCR; LII.Is64Bit = SpecialShift64; uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F); if (Imm & (SpecialShift32 ? 0x20 : 0x40)) replaceInstrWithLI(MI, LII); // Shifts by zero don't change the value. If we don't need to set CR0, // just convert this to a COPY. Can't do this post-RA since we've already // cleaned up the copies. else if (!SetCR && ShAmt == 0 && !PostRA) { MI.RemoveOperand(2); MI.setDesc(get(PPC::COPY)); } else { // The 32 bit and 64 bit instructions are quite different. if (SpecialShift32) { // Left shifts use (N, 0, 31-N). // Right shifts use (32-N, N, 31) if 0 < N < 32. // use (0, 0, 31) if N == 0. uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt; uint64_t MB = RightShift ? ShAmt : 0; uint64_t ME = RightShift ? 31 : 31 - ShAmt; replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB) .addImm(ME); } else { // Left shifts use (N, 63-N). // Right shifts use (64-N, N) if 0 < N < 64. // use (0, 0) if N == 0. uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt; uint64_t ME = RightShift ? ShAmt : 63 - ShAmt; replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME); } } } else replaceInstrOperandWithImm(MI, ConstantOpNo, Imm); } // Convert commutative instructions (switch the operands and convert the // desired one to an immediate. else if (III.IsCommutative) { replaceInstrOperandWithImm(MI, ConstantOpNo, Imm); swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding); } else llvm_unreachable("Should have exited early!"); // For instructions for which the constant register replaces a different // operand than where the immediate goes, we need to swap them. if (III.OpNoForForwarding != III.ImmOpNo) swapMIOperands(MI, III.OpNoForForwarding, III.ImmOpNo); // If the special R0/X0 register index are different for original instruction // and new instruction, we need to fix up the register class in new // instruction. if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) { if (III.ZeroIsSpecialNew) { // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no // need to fix up register class. Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg(); if (Register::isVirtualRegister(RegToModify)) { const TargetRegisterClass *NewRC = MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ? &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass; MRI.setRegClass(RegToModify, NewRC); } } } // Fix up killed/dead flag after transformation. // Pattern: // ForwardKilledOperandReg = LI imm // y = XOP reg, ForwardKilledOperandReg(killed) if (ForwardKilledOperandReg != ~0U) fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg); return true; } const TargetRegisterClass * PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const { if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass) return &PPC::VSRCRegClass; return RC; } int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) { return PPC::getRecordFormOpcode(Opcode); } // This function returns true if the machine instruction // always outputs a value by sign-extending a 32 bit value, // i.e. 0 to 31-th bits are same as 32-th bit. static bool isSignExtendingOp(const MachineInstr &MI) { int Opcode = MI.getOpcode(); if (Opcode == PPC::LI || Opcode == PPC::LI8 || Opcode == PPC::LIS || Opcode == PPC::LIS8 || Opcode == PPC::SRAW || Opcode == PPC::SRAW_rec || Opcode == PPC::SRAWI || Opcode == PPC::SRAWI_rec || Opcode == PPC::LWA || Opcode == PPC::LWAX || Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 || Opcode == PPC::LHA || Opcode == PPC::LHAX || Opcode == PPC::LHA8 || Opcode == PPC::LHAX8 || Opcode == PPC::LBZ || Opcode == PPC::LBZX || Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || Opcode == PPC::LHZ || Opcode == PPC::LHZX || Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || Opcode == PPC::LHZU || Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || Opcode == PPC::EXTSB || Opcode == PPC::EXTSB_rec || Opcode == PPC::EXTSH || Opcode == PPC::EXTSH_rec || Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 || Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_rec || Opcode == PPC::SETB || Opcode == PPC::SETB8 || Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 || Opcode == PPC::EXTSB8_32_64) return true; if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33) return true; if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec || Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) && MI.getOperand(3).getImm() > 0 && MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) return true; return false; } // This function returns true if the machine instruction // always outputs zeros in higher 32 bits. static bool isZeroExtendingOp(const MachineInstr &MI) { int Opcode = MI.getOpcode(); // The 16-bit immediate is sign-extended in li/lis. // If the most significant bit is zero, all higher bits are zero. if (Opcode == PPC::LI || Opcode == PPC::LI8 || Opcode == PPC::LIS || Opcode == PPC::LIS8) { int64_t Imm = MI.getOperand(1).getImm(); if (((uint64_t)Imm & ~0x7FFFuLL) == 0) return true; } // We have some variations of rotate-and-mask instructions // that clear higher 32-bits. if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec || Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec || Opcode == PPC::RLDICL_32_64) && MI.getOperand(3).getImm() >= 32) return true; if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) && MI.getOperand(3).getImm() >= 32 && MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm()) return true; if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec || Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec || Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) return true; // There are other instructions that clear higher 32-bits. if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec || Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec || Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 || Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec || Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec || Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW || Opcode == PPC::SLW || Opcode == PPC::SLW_rec || Opcode == PPC::SRW || Opcode == PPC::SRW_rec || Opcode == PPC::SLW8 || Opcode == PPC::SRW8 || Opcode == PPC::SLWI || Opcode == PPC::SLWI_rec || Opcode == PPC::SRWI || Opcode == PPC::SRWI_rec || Opcode == PPC::LWZ || Opcode == PPC::LWZX || Opcode == PPC::LWZU || Opcode == PPC::LWZUX || Opcode == PPC::LWBRX || Opcode == PPC::LHBRX || Opcode == PPC::LHZ || Opcode == PPC::LHZX || Opcode == PPC::LHZU || Opcode == PPC::LHZUX || Opcode == PPC::LBZ || Opcode == PPC::LBZX || Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8 || Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 || Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || Opcode == PPC::ANDI_rec || Opcode == PPC::ANDIS_rec || Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWI_rec || Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWI_rec || Opcode == PPC::MFVSRWZ) return true; return false; } // This function returns true if the input MachineInstr is a TOC save // instruction. bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const { if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg()) return false; unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); unsigned StackOffset = MI.getOperand(1).getImm(); Register StackReg = MI.getOperand(2).getReg(); if (StackReg == PPC::X1 && StackOffset == TOCSaveOffset) return true; return false; } // We limit the max depth to track incoming values of PHIs or binary ops // (e.g. AND) to avoid excessive cost. const unsigned MAX_DEPTH = 1; bool PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, const unsigned Depth) const { const MachineFunction *MF = MI.getParent()->getParent(); const MachineRegisterInfo *MRI = &MF->getRegInfo(); // If we know this instruction returns sign- or zero-extended result, // return true. if (SignExt ? isSignExtendingOp(MI): isZeroExtendingOp(MI)) return true; switch (MI.getOpcode()) { case PPC::COPY: { Register SrcReg = MI.getOperand(1).getReg(); // In both ELFv1 and v2 ABI, method parameters and the return value // are sign- or zero-extended. if (MF->getSubtarget().isSVR4ABI()) { const PPCFunctionInfo *FuncInfo = MF->getInfo(); // We check the ZExt/SExt flags for a method parameter. if (MI.getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock()) { Register VReg = MI.getOperand(0).getReg(); if (MF->getRegInfo().isLiveIn(VReg)) return SignExt ? FuncInfo->isLiveInSExt(VReg) : FuncInfo->isLiveInZExt(VReg); } // For a method return value, we check the ZExt/SExt flags in attribute. // We assume the following code sequence for method call. // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1 // BL8_NOP @func,... // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1 // %5 = COPY %x3; G8RC:%5 if (SrcReg == PPC::X3) { const MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::const_instr_iterator II = MachineBasicBlock::const_instr_iterator(&MI); if (II != MBB->instr_begin() && (--II)->getOpcode() == PPC::ADJCALLSTACKUP) { const MachineInstr &CallMI = *(--II); if (CallMI.isCall() && CallMI.getOperand(0).isGlobal()) { const Function *CalleeFn = dyn_cast(CallMI.getOperand(0).getGlobal()); if (!CalleeFn) return false; const IntegerType *IntTy = dyn_cast(CalleeFn->getReturnType()); const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttributes(); if (IntTy && IntTy->getBitWidth() <= 32) return Attrs.hasAttribute(SignExt ? Attribute::SExt : Attribute::ZExt); } } } } // If this is a copy from another register, we recursively check source. if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI != NULL) return isSignOrZeroExtended(*SrcMI, SignExt, Depth); return false; } case PPC::ANDI_rec: case PPC::ANDIS_rec: case PPC::ORI: case PPC::ORIS: case PPC::XORI: case PPC::XORIS: case PPC::ANDI8_rec: case PPC::ANDIS8_rec: case PPC::ORI8: case PPC::ORIS8: case PPC::XORI8: case PPC::XORIS8: { // logical operation with 16-bit immediate does not change the upper bits. // So, we track the operand register as we do for register copy. Register SrcReg = MI.getOperand(1).getReg(); if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI != NULL) return isSignOrZeroExtended(*SrcMI, SignExt, Depth); return false; } // If all incoming values are sign-/zero-extended, // the output of OR, ISEL or PHI is also sign-/zero-extended. case PPC::OR: case PPC::OR8: case PPC::ISEL: case PPC::PHI: { if (Depth >= MAX_DEPTH) return false; // The input registers for PHI are operand 1, 3, ... // The input registers for others are operand 1 and 2. unsigned E = 3, D = 1; if (MI.getOpcode() == PPC::PHI) { E = MI.getNumOperands(); D = 2; } for (unsigned I = 1; I != E; I += D) { if (MI.getOperand(I).isReg()) { Register SrcReg = MI.getOperand(I).getReg(); if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1)) return false; } else return false; } return true; } // If at least one of the incoming values of an AND is zero extended // then the output is also zero-extended. If both of the incoming values // are sign-extended then the output is also sign extended. case PPC::AND: case PPC::AND8: { if (Depth >= MAX_DEPTH) return false; assert(MI.getOperand(1).isReg() && MI.getOperand(2).isReg()); Register SrcReg1 = MI.getOperand(1).getReg(); Register SrcReg2 = MI.getOperand(2).getReg(); if (!Register::isVirtualRegister(SrcReg1) || !Register::isVirtualRegister(SrcReg2)) return false; const MachineInstr *MISrc1 = MRI->getVRegDef(SrcReg1); const MachineInstr *MISrc2 = MRI->getVRegDef(SrcReg2); if (!MISrc1 || !MISrc2) return false; if(SignExt) return isSignOrZeroExtended(*MISrc1, SignExt, Depth+1) && isSignOrZeroExtended(*MISrc2, SignExt, Depth+1); else return isSignOrZeroExtended(*MISrc1, SignExt, Depth+1) || isSignOrZeroExtended(*MISrc2, SignExt, Depth+1); } default: break; } return false; } bool PPCInstrInfo::isBDNZ(unsigned Opcode) const { return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ)); } namespace { class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { MachineInstr *Loop, *EndLoop, *LoopCount; MachineFunction *MF; const TargetInstrInfo *TII; int64_t TripCount; public: PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop, MachineInstr *LoopCount) : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount), MF(Loop->getParent()->getParent()), TII(MF->getSubtarget().getInstrInfo()) { // Inspect the Loop instruction up-front, as it may be deleted when we call // createTripCountGreaterCondition. if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) TripCount = LoopCount->getOperand(1).getImm(); else TripCount = -1; } bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { // Only ignore the terminator. return MI == EndLoop; } Optional createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, SmallVectorImpl &Cond) override { if (TripCount == -1) { // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, // so we don't need to generate any thing here. Cond.push_back(MachineOperand::CreateImm(0)); Cond.push_back(MachineOperand::CreateReg( MF->getSubtarget().isPPC64() ? PPC::CTR8 : PPC::CTR, true)); return {}; } return TripCount > TC; } void setPreheader(MachineBasicBlock *NewPreheader) override { // Do nothing. We want the LOOP setup instruction to stay in the *old* // preheader, so we can use BDZ in the prologs to adapt the loop trip count. } void adjustTripCount(int TripCountAdjust) override { // If the loop trip count is a compile-time value, then just change the // value. if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) { int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust; LoopCount->getOperand(1).setImm(TripCount); return; } // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, // so we don't need to generate any thing here. } void disposed() override { Loop->eraseFromParent(); // Ensure the loop setup instruction is deleted too. LoopCount->eraseFromParent(); } }; } // namespace std::unique_ptr PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { // We really "analyze" only hardware loops right now. MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); MachineBasicBlock *Preheader = *LoopBB->pred_begin(); if (Preheader == LoopBB) Preheader = *std::next(LoopBB->pred_begin()); MachineFunction *MF = Preheader->getParent(); if (I != LoopBB->end() && isBDNZ(I->getOpcode())) { SmallPtrSet Visited; if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) { Register LoopCountReg = LoopInst->getOperand(0).getReg(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); return std::make_unique(LoopInst, &*I, LoopCount); } } return nullptr; } MachineInstr *PPCInstrInfo::findLoopInstr( MachineBasicBlock &PreHeader, SmallPtrSet &Visited) const { unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop); // The loop set-up instruction should be in preheader for (auto &I : PreHeader.instrs()) if (I.getOpcode() == LOOPi) return &I; return nullptr; } // Return true if get the base operand, byte offset of an instruction and the // memory width. Width is the size of memory that is being loaded/stored. bool PPCInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; // Handle only loads/stores with base register followed by immediate offset. if (LdSt.getNumExplicitOperands() != 3) return false; if (!LdSt.getOperand(1).isImm() || !LdSt.getOperand(2).isReg()) return false; if (!LdSt.hasOneMemOperand()) return false; Width = (*LdSt.memoperands_begin())->getSize(); Offset = LdSt.getOperand(1).getImm(); BaseReg = &LdSt.getOperand(2); return true; } bool PPCInstrInfo::areMemAccessesTriviallyDisjoint( const MachineInstr &MIa, const MachineInstr &MIb) const { assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; // Retrieve the base register, offset from the base register and width. Width // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If // base registers are identical, and the offset of a lower memory access + // the width doesn't overlap the offset of a higher memory access, // then the memory accesses are different. const TargetRegisterInfo *TRI = &getRegisterInfo(); const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; unsigned int WidthA = 0, WidthB = 0; if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; if (LowOffset + LowWidth <= HighOffset) return true; } } return false; } diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h index d98597f48340..43973c627fcf 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -1,641 +1,645 @@ //===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains the PowerPC implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H #define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H #include "PPCRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "PPCGenInstrInfo.inc" namespace llvm { /// PPCII - This namespace holds all of the PowerPC target-specific /// per-instruction flags. These must match the corresponding definitions in /// PPC.td and PPCInstrFormats.td. namespace PPCII { enum { // PPC970 Instruction Flags. These flags describe the characteristics of the // PowerPC 970 (aka G5) dispatch groups and how they are formed out of // raw machine instructions. /// PPC970_First - This instruction starts a new dispatch group, so it will /// always be the first one in the group. PPC970_First = 0x1, /// PPC970_Single - This instruction starts a new dispatch group and /// terminates it, so it will be the sole instruction in the group. PPC970_Single = 0x2, /// PPC970_Cracked - This instruction is cracked into two pieces, requiring /// two dispatch pipes to be available to issue. PPC970_Cracked = 0x4, /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that /// an instruction is issued to. PPC970_Shift = 3, PPC970_Mask = 0x07 << PPC970_Shift }; enum PPC970_Unit { /// These are the various PPC970 execution unit pipelines. Each instruction /// is one of these. PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit PPC970_VALU = 5 << PPC970_Shift, // Vector ALU PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit PPC970_BRU = 7 << PPC970_Shift // Branch Unit }; enum { /// Shift count to bypass PPC970 flags NewDef_Shift = 6, /// This instruction is an X-Form memory operation. XFormMemOp = 0x1 << NewDef_Shift, /// This instruction is prefixed. Prefixed = 0x1 << (NewDef_Shift+1) }; } // end namespace PPCII // Instructions that have an immediate form might be convertible to that // form if the correct input is a result of a load immediate. In order to // know whether the transformation is special, we might need to know some // of the details of the two forms. struct ImmInstrInfo { // Is the immediate field in the immediate form signed or unsigned? uint64_t SignedImm : 1; // Does the immediate need to be a multiple of some value? uint64_t ImmMustBeMultipleOf : 5; // Is R0/X0 treated specially by the original r+r instruction? // If so, in which operand? uint64_t ZeroIsSpecialOrig : 3; // Is R0/X0 treated specially by the new r+i instruction? // If so, in which operand? uint64_t ZeroIsSpecialNew : 3; // Is the operation commutative? uint64_t IsCommutative : 1; // The operand number to check for add-immediate def. uint64_t OpNoForForwarding : 3; // The operand number for the immediate. uint64_t ImmOpNo : 3; // The opcode of the new instruction. uint64_t ImmOpcode : 16; // The size of the immediate. uint64_t ImmWidth : 5; // The immediate should be truncated to N bits. uint64_t TruncateImmTo : 5; // Is the instruction summing the operand uint64_t IsSummingOperands : 1; }; // Information required to convert an instruction to just a materialized // immediate. struct LoadImmediateInfo { unsigned Imm : 16; unsigned Is64Bit : 1; unsigned SetCR : 1; }; // Index into the OpcodesForSpill array. enum SpillOpcodeKey { SOK_Int4Spill, SOK_Int8Spill, SOK_Float8Spill, SOK_Float4Spill, SOK_CRSpill, SOK_CRBitSpill, SOK_VRVectorSpill, SOK_VSXVectorSpill, SOK_VectorFloat8Spill, SOK_VectorFloat4Spill, SOK_VRSaveSpill, SOK_QuadFloat8Spill, SOK_QuadFloat4Spill, SOK_QuadBitSpill, SOK_SpillToVSR, SOK_SPESpill, SOK_LastOpcodeSpill // This must be last on the enum. }; // Define list of load and store spill opcodes. #define Pwr8LoadOpcodes \ { \ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \ PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, \ PPC::SPILLTOVSR_LD, PPC::EVLDD \ } #define Pwr9LoadOpcodes \ { \ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, \ PPC::QVLFDXb, PPC::SPILLTOVSR_LD \ } #define Pwr8StoreOpcodes \ { \ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, \ PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, PPC::SPILLTOVSR_ST, \ PPC::EVSTDD \ } #define Pwr9StoreOpcodes \ { \ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, \ PPC::SPILLTOVSR_ST \ } // Initialize arrays for load and store spill opcodes on supported subtargets. #define StoreOpcodesForSpill \ { Pwr8StoreOpcodes, Pwr9StoreOpcodes } #define LoadOpcodesForSpill \ { Pwr8LoadOpcodes, Pwr9LoadOpcodes } class PPCSubtarget; class PPCInstrInfo : public PPCGenInstrInfo { PPCSubtarget &Subtarget; const PPCRegisterInfo RI; const unsigned StoreSpillOpcodesArray[2][SOK_LastOpcodeSpill] = StoreOpcodesForSpill; const unsigned LoadSpillOpcodesArray[2][SOK_LastOpcodeSpill] = LoadOpcodesForSpill; void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const; void LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const; // Replace the instruction with single LI if possible. \p DefMI must be LI or // LI8. bool simplifyToLI(MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding, MachineInstr **KilledDef) const; // If the inst is imm-form and its register operand is produced by a ADDI, put // the imm into the inst directly and remove the ADDI if possible. bool transformToNewImmFormFedByAdd(MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const; // If the inst is x-form and has imm-form and one of its operand is produced // by a LI, put the imm into the inst directly and remove the LI if possible. bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III, unsigned ConstantOpNo, MachineInstr &DefMI) const; // If the inst is x-form and has imm-form and one of its operand is produced // by an add-immediate, try to transform it when possible. bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III, unsigned ConstantOpNo, MachineInstr &DefMI, bool KillDefMI) const; // Try to find that, if the instruction 'MI' contains any operand that // could be forwarded from some inst that feeds it. If yes, return the // Def of that operand. And OpNoForForwarding is the operand index in // the 'MI' for that 'Def'. If we see another use of this Def between // the Def and the MI, SeenIntermediateUse becomes 'true'. MachineInstr *getForwardingDefMI(MachineInstr &MI, unsigned &OpNoForForwarding, bool &SeenIntermediateUse) const; // Can the user MI have it's source at index \p OpNoForForwarding // forwarded from an add-immediate that feeds it? bool isUseMIElgibleForForwarding(MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding) const; bool isDefMIElgibleForForwarding(MachineInstr &DefMI, const ImmInstrInfo &III, MachineOperand *&ImmMO, MachineOperand *&RegMO) const; bool isImmElgibleForForwarding(const MachineOperand &ImmMO, const MachineInstr &DefMI, const ImmInstrInfo &III, int64_t &Imm, int64_t BaseImm = 0) const; bool isRegElgibleForForwarding(const MachineOperand &RegMO, const MachineInstr &DefMI, const MachineInstr &MI, bool KillDefMI, bool &IsFwdFeederRegKilled) const; unsigned getSpillTarget() const; const unsigned *getStoreOpcodesForSpillArray() const; const unsigned *getLoadOpcodesForSpillArray() const; int16_t getFMAOpIdxInfo(unsigned Opcode) const; void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const; virtual void anchor(); protected: /// Commutes the operands in the given instruction. /// The commutable operands are specified by their indices OpIdx1 and OpIdx2. /// /// Do not call this method for a non-commutable instruction or for /// non-commutable pair of operand indices OpIdx1 and OpIdx2. /// Even though the instruction is commutable, the method may still /// fail to commute the operands, null pointer is returned in such cases. /// /// For example, we can commute rlwimi instructions, but only if the /// rotate amt is zero. We also have to munge the immediates a bit. MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override; public: explicit PPCInstrInfo(PPCSubtarget &STI); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// const PPCRegisterInfo &getRegisterInfo() const { return RI; } bool isXFormMemOp(unsigned Opcode) const { return get(Opcode).TSFlags & PPCII::XFormMemOp; } bool isPrefixed(unsigned Opcode) const { return get(Opcode).TSFlags & PPCII::Prefixed; } static bool isSameClassPhysRegCopy(unsigned Opcode) { unsigned CopyOpcodes[] = { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf, PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb, PPC::CROR, PPC::EVOR, -1U }; for (int i = 0; CopyOpcodes[i] != -1U; i++) if (Opcode == CopyOpcodes[i]) return true; return false; } ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override; ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost = nullptr) const override; int getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override; int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const override { return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx, UseNode, UseIdx); } bool hasLowDefLatency(const TargetSchedModel &SchedModel, const MachineInstr &DefMI, unsigned DefIdx) const override { // Machine LICM should hoist all instructions in low-register-pressure // situations; none are sufficiently free to justify leaving in a loop // body. return false; } bool useMachineCombiner() const override { return true; } /// When getMachineCombinerPatterns() finds patterns, this function generates /// the instructions that could replace the original code sequence void genAlternativeCodeSequence( MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const override; /// Return true when there is potentially a faster code sequence for a fma /// chain ending in \p Root. All potential patterns are output in the \p /// P array. bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl &P) const; /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in . All potential patterns are /// output in the array. bool getMachineCombinerPatterns( MachineInstr &Root, SmallVectorImpl &P) const override; bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; /// On PowerPC, we try to reassociate FMA chain which will increase /// instruction size. Set extension resource length limit to 1 for edge case. /// Resource Length is calculated by scaled resource usage in getCycles(). /// Because of the division in getCycles(), it returns different cycles due to /// legacy scaled resource usage. So new resource length may be same with /// legacy or 1 bigger than legacy. /// We need to execlude the 1 bigger case even the resource length is not /// perserved for more FMA chain reassociations on PowerPC. int getExtendResourceLenLimit() const override { return 1; } void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const override; void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const override; bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; // Branch analysis. bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const override; unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded = nullptr) const override; // Select analysis. bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, Register, Register, Register, int &, int &, int &) const override; void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef Cond, Register TrueReg, Register FalseReg) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; // Emits a register spill without updating the register class for vector // registers. This ensures that when we spill a vector register the // element order in the register is the same as it was in memory. void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; // Emits a register reload without updating the register class for vector // registers. This ensures that when we reload a vector register the // element order in the register is the same as it was in memory. void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const; unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override; bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const; // If conversion by predication (only supported by some branch instructions). // All of the profitability checks always return true; it is always // profitable to use the predicated branches. bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override { return true; } bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, unsigned ExtraT, MachineBasicBlock &FMBB, unsigned NumF, unsigned ExtraF, BranchProbability Probability) const override; bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, BranchProbability Probability) const override { return true; } bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override { return false; } // Predication support. bool isPredicated(const MachineInstr &MI) const override; bool PredicateInstruction(MachineInstr &MI, ArrayRef Pred) const override; bool SubsumesPredicate(ArrayRef Pred1, ArrayRef Pred2) const override; bool DefinesPredicate(MachineInstr &MI, std::vector &Pred) const override; // Comparison optimization. bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int &Mask, int &Value) const override; bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const override; /// Return true if get the base operand, byte offset of an instruction and /// the memory width. Width is the size of memory that is being /// loaded/stored (e.g. 1, 2, 4, 8). bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; /// Return true if two MIs access different memory addresses and false /// otherwise bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override; /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// unsigned getInstSizeInBytes(const MachineInstr &MI) const override; void getNoop(MCInst &NopInst) const override; std::pair decomposeMachineOperandsTargetFlags(unsigned TF) const override; ArrayRef> getSerializableDirectMachineOperandTargetFlags() const override; ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; // Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction. bool expandVSXMemPseudo(MachineInstr &MI) const; // Lower pseudo instructions after register allocation. bool expandPostRAPseudo(MachineInstr &MI) const override; static bool isVFRegister(unsigned Reg) { return Reg >= PPC::VF0 && Reg <= PPC::VF31; } static bool isVRRegister(unsigned Reg) { return Reg >= PPC::V0 && Reg <= PPC::V31; } const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; static int getRecordFormOpcode(unsigned Opcode); bool isTOCSaveMI(const MachineInstr &MI) const; bool isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, const unsigned PhiDepth) const; /// Return true if the output of the instruction is always a sign-extended, /// i.e. 0 to 31-th bits are same as 32-th bit. bool isSignExtended(const MachineInstr &MI, const unsigned depth = 0) const { return isSignOrZeroExtended(MI, true, depth); } /// Return true if the output of the instruction is always zero-extended, /// i.e. 0 to 31-th bits are all zeros bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const { return isSignOrZeroExtended(MI, false, depth); } bool convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef = nullptr) const; bool foldFrameOffset(MachineInstr &MI) const; bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const; bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const; bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const; bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const; /// Fixup killed/dead flag for register \p RegNo between instructions [\p - /// StartMI, \p EndMI]. Some PostRA transformations may violate register - /// killed/dead flags semantics, this function can be called to fix up. Before - /// calling this function, + /// StartMI, \p EndMI]. Some pre-RA or post-RA transformations may violate + /// register killed/dead flags semantics, this function can be called to fix + /// up. Before calling this function, /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI. /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI) /// and possible definition for \p RegNo is \p StartMI or \p EndMI. - /// 3. Ensure that all instructions between [\p StartMI, \p EndMI] are in same - /// basic block. + /// 3. We can do accurate fixup for the case when all instructions between + /// [\p StartMI, \p EndMI] are in same basic block. + /// 4. For the case when \p StartMI and \p EndMI are not in same basic block, + /// we conservatively clear kill flag for all uses of \p RegNo for pre-RA + /// and for post-RA, we give an assertion as without reaching definition + /// analysis post-RA, \p StartMI and \p EndMI are hard to keep right. void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, unsigned RegNo) const; void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const; void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const; bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const; // In PostRA phase, try to find instruction defines \p Reg before \p MI. // \p SeenIntermediate is set to true if uses between DefMI and \p MI exist. MachineInstr *getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const; /// getRegNumForOperand - some operands use different numbering schemes /// for the same registers. For example, a VSX instruction may have any of /// vs0-vs63 allocated whereas an Altivec instruction could only have /// vs32-vs63 allocated (numbered as v0-v31). This function returns the actual /// register number needed for the opcode/operand number combination. /// The operand number argument will be useful when we need to extend this /// to instructions that use both Altivec and VSX numbering (for different /// operands). static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg, unsigned OpNo) { int16_t regClass = Desc.OpInfo[OpNo].RegClass; switch (regClass) { // We store F0-F31, VF0-VF31 in MCOperand and it should be F0-F31, // VSX32-VSX63 during encoding/disassembling case PPC::VSSRCRegClassID: case PPC::VSFRCRegClassID: if (isVFRegister(Reg)) return PPC::VSX32 + (Reg - PPC::VF0); break; // We store VSL0-VSL31, V0-V31 in MCOperand and it should be VSL0-VSL31, // VSX32-VSX63 during encoding/disassembling case PPC::VSRCRegClassID: if (isVRRegister(Reg)) return PPC::VSX32 + (Reg - PPC::V0); break; // Other RegClass doesn't need mapping default: break; } return Reg; } /// Check \p Opcode is BDNZ (Decrement CTR and branch if it is still nonzero). bool isBDNZ(unsigned Opcode) const; /// Find the hardware loop instruction used to set-up the specified loop. /// On PPC, we have two instructions used to set-up the hardware loop /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8) /// instructions to indicate the end of a loop. MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet &Visited) const; /// Analyze loop L, which must be a single-basic-block loop, and if the /// conditions can be understood enough produce a PipelinerLoopInfo object. std::unique_ptr analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; }; } #endif diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp index 8bf7ef2deb71..775693253db2 100644 --- a/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp +++ b/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp @@ -1,3945 +1,3946 @@ /* * kmp_lock.cpp -- lock-related functions */ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include #include #include "kmp.h" #include "kmp_i18n.h" #include "kmp_io.h" #include "kmp_itt.h" #include "kmp_lock.h" #include "kmp_wait_release.h" #include "kmp_wrapper_getpid.h" #include "tsan_annotations.h" #if KMP_USE_FUTEX #include #include // We should really include , but that causes compatibility problems on // different Linux* OS distributions that either require that you include (or // break when you try to include) . Since all we need is the two // macros below (which are part of the kernel ABI, so can't change) we just // define the constants here and don't include #ifndef FUTEX_WAIT #define FUTEX_WAIT 0 #endif #ifndef FUTEX_WAKE #define FUTEX_WAKE 1 #endif #endif /* Implement spin locks for internal library use. */ /* The algorithm implemented is Lamport's bakery lock [1974]. */ void __kmp_validate_locks(void) { int i; kmp_uint32 x, y; /* Check to make sure unsigned arithmetic does wraps properly */ x = ~((kmp_uint32)0) - 2; y = x - 2; for (i = 0; i < 8; ++i, ++x, ++y) { kmp_uint32 z = (x - y); KMP_ASSERT(z == 2); } KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); } /* ------------------------------------------------------------------------ */ /* test and set locks */ // For the non-nested locks, we can only assume that the first 4 bytes were // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel // compiler only allocates a 4 byte pointer on IA-32 architecture. On // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. // // gcc reserves >= 8 bytes for nested locks, so we can assume that the // entire 8 bytes were allocated for nested locks on all 64-bit platforms. static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1; } static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { return lck->lk.depth_locked != -1; } __forceinline static int __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { KMP_MB(); #ifdef USE_LOCK_PROFILE kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll); if ((curr != 0) && (curr != gtid + 1)) __kmp_printf("LOCK CONTENTION: %p\n", lck); /* else __kmp_printf( "." );*/ #endif /* USE_LOCK_PROFILE */ kmp_int32 tas_free = KMP_LOCK_FREE(tas); kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { KMP_FSYNC_ACQUIRED(lck); return KMP_LOCK_ACQUIRED_FIRST; } kmp_uint32 spins; KMP_FSYNC_PREPARE(lck); KMP_INIT_YIELD(spins); kmp_backoff_t backoff = __kmp_spin_backoff_params; do { __kmp_spin_backoff(&backoff); KMP_YIELD_OVERSUB_ELSE_SPIN(spins); } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)); KMP_FSYNC_ACQUIRED(lck); return KMP_LOCK_ACQUIRED_FIRST; } int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); ANNOTATE_TAS_ACQUIRED(lck); return retval; } static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_lock"; if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && __kmp_is_tas_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { KMP_FATAL(LockIsAlreadyOwned, func); } return __kmp_acquire_tas_lock(lck, gtid); } int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { kmp_int32 tas_free = KMP_LOCK_FREE(tas); kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { KMP_FSYNC_ACQUIRED(lck); return TRUE; } return FALSE; } static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_lock"; if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && __kmp_is_tas_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } return __kmp_test_tas_lock(lck, gtid); } int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { KMP_MB(); /* Flush all pending memory write invalidates. */ KMP_FSYNC_RELEASING(lck); ANNOTATE_TAS_RELEASED(lck); KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas)); KMP_MB(); /* Flush all pending memory write invalidates. */ KMP_YIELD_OVERSUB(); return KMP_LOCK_RELEASED; } static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_lock"; KMP_MB(); /* in case another processor initialized lock */ if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && __kmp_is_tas_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_tas_lock_owner(lck) == -1) { KMP_FATAL(LockUnsettingFree, func); } if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && (__kmp_get_tas_lock_owner(lck) != gtid)) { KMP_FATAL(LockUnsettingSetByAnother, func); } return __kmp_release_tas_lock(lck, gtid); } void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = KMP_LOCK_FREE(tas); } void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { char const *const func = "omp_destroy_lock"; if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && __kmp_is_tas_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_tas_lock_owner(lck) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_tas_lock(lck); } // nested test and set locks int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); if (__kmp_get_tas_lock_owner(lck) == gtid) { lck->lk.depth_locked += 1; return KMP_LOCK_ACQUIRED_NEXT; } else { __kmp_acquire_tas_lock_timed_template(lck, gtid); ANNOTATE_TAS_ACQUIRED(lck); lck->lk.depth_locked = 1; return KMP_LOCK_ACQUIRED_FIRST; } } static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_nest_lock"; if (!__kmp_is_tas_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } return __kmp_acquire_nested_tas_lock(lck, gtid); } int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { int retval; KMP_DEBUG_ASSERT(gtid >= 0); if (__kmp_get_tas_lock_owner(lck) == gtid) { retval = ++lck->lk.depth_locked; } else if (!__kmp_test_tas_lock(lck, gtid)) { retval = 0; } else { KMP_MB(); retval = lck->lk.depth_locked = 1; } return retval; } static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_nest_lock"; if (!__kmp_is_tas_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } return __kmp_test_nested_tas_lock(lck, gtid); } int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); KMP_MB(); if (--(lck->lk.depth_locked) == 0) { __kmp_release_tas_lock(lck, gtid); return KMP_LOCK_RELEASED; } return KMP_LOCK_STILL_HELD; } static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_nest_lock"; KMP_MB(); /* in case another processor initialized lock */ if (!__kmp_is_tas_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } if (__kmp_get_tas_lock_owner(lck) == -1) { KMP_FATAL(LockUnsettingFree, func); } if (__kmp_get_tas_lock_owner(lck) != gtid) { KMP_FATAL(LockUnsettingSetByAnother, func); } return __kmp_release_nested_tas_lock(lck, gtid); } void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { __kmp_init_tas_lock(lck); lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks } void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { __kmp_destroy_tas_lock(lck); lck->lk.depth_locked = 0; } static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { char const *const func = "omp_destroy_nest_lock"; if (!__kmp_is_tas_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } if (__kmp_get_tas_lock_owner(lck) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_nested_tas_lock(lck); } #if KMP_USE_FUTEX /* ------------------------------------------------------------------------ */ /* futex locks */ // futex locks are really just test and set locks, with a different method // of handling contention. They take the same amount of space as test and // set locks, and are allocated the same way (i.e. use the area allocated by // the compiler for non-nested locks / allocate nested locks on the heap). static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; } static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { return lck->lk.depth_locked != -1; } __forceinline static int __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { kmp_int32 gtid_code = (gtid + 1) << 1; KMP_MB(); #ifdef USE_LOCK_PROFILE kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); if ((curr != 0) && (curr != gtid_code)) __kmp_printf("LOCK CONTENTION: %p\n", lck); /* else __kmp_printf( "." );*/ #endif /* USE_LOCK_PROFILE */ KMP_FSYNC_PREPARE(lck); KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", lck, lck->lk.poll, gtid)); kmp_int32 poll_val; while ((poll_val = KMP_COMPARE_AND_STORE_RET32( &(lck->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", lck, gtid, poll_val, cond)); // NOTE: if you try to use the following condition for this branch // // if ( poll_val & 1 == 0 ) // // Then the 12.0 compiler has a bug where the following block will // always be skipped, regardless of the value of the LSB of poll_val. if (!cond) { // Try to set the lsb in the poll to indicate to the owner // thread that they need to wake this thread up. if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", lck, lck->lk.poll, gtid)); continue; } poll_val |= KMP_LOCK_BUSY(1, futex); KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, lck->lk.poll, gtid)); } KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", lck, gtid, poll_val)); kmp_int32 rc; if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " "failed (rc=%d errno=%d)\n", lck, gtid, poll_val, rc, errno)); continue; } KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", lck, gtid, poll_val)); // This thread has now done a successful futex wait call and was entered on // the OS futex queue. We must now perform a futex wake call when releasing // the lock, as we have no idea how many other threads are in the queue. gtid_code |= 1; } KMP_FSYNC_ACQUIRED(lck); KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, lck->lk.poll, gtid)); return KMP_LOCK_ACQUIRED_FIRST; } int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); ANNOTATE_FUTEX_ACQUIRED(lck); return retval; } static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_lock"; if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && __kmp_is_futex_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { KMP_FATAL(LockIsAlreadyOwned, func); } return __kmp_acquire_futex_lock(lck, gtid); } int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { KMP_FSYNC_ACQUIRED(lck); return TRUE; } return FALSE; } static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_lock"; if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && __kmp_is_futex_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } return __kmp_test_futex_lock(lck, gtid); } int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { KMP_MB(); /* Flush all pending memory write invalidates. */ KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", lck, lck->lk.poll, gtid)); KMP_FSYNC_RELEASING(lck); ANNOTATE_FUTEX_RELEASED(lck); kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", lck, gtid, poll_val)); if (KMP_LOCK_STRIP(poll_val) & 1) { KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", lck, gtid)); syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); } KMP_MB(); /* Flush all pending memory write invalidates. */ KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, lck->lk.poll, gtid)); KMP_YIELD_OVERSUB(); return KMP_LOCK_RELEASED; } static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_lock"; KMP_MB(); /* in case another processor initialized lock */ if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && __kmp_is_futex_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_futex_lock_owner(lck) == -1) { KMP_FATAL(LockUnsettingFree, func); } if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && (__kmp_get_futex_lock_owner(lck) != gtid)) { KMP_FATAL(LockUnsettingSetByAnother, func); } return __kmp_release_futex_lock(lck, gtid); } void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); } void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { char const *const func = "omp_destroy_lock"; if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && __kmp_is_futex_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_futex_lock_owner(lck) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_futex_lock(lck); } // nested futex locks int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); if (__kmp_get_futex_lock_owner(lck) == gtid) { lck->lk.depth_locked += 1; return KMP_LOCK_ACQUIRED_NEXT; } else { __kmp_acquire_futex_lock_timed_template(lck, gtid); ANNOTATE_FUTEX_ACQUIRED(lck); lck->lk.depth_locked = 1; return KMP_LOCK_ACQUIRED_FIRST; } } static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_nest_lock"; if (!__kmp_is_futex_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } return __kmp_acquire_nested_futex_lock(lck, gtid); } int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { int retval; KMP_DEBUG_ASSERT(gtid >= 0); if (__kmp_get_futex_lock_owner(lck) == gtid) { retval = ++lck->lk.depth_locked; } else if (!__kmp_test_futex_lock(lck, gtid)) { retval = 0; } else { KMP_MB(); retval = lck->lk.depth_locked = 1; } return retval; } static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_nest_lock"; if (!__kmp_is_futex_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } return __kmp_test_nested_futex_lock(lck, gtid); } int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); KMP_MB(); if (--(lck->lk.depth_locked) == 0) { __kmp_release_futex_lock(lck, gtid); return KMP_LOCK_RELEASED; } return KMP_LOCK_STILL_HELD; } static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_nest_lock"; KMP_MB(); /* in case another processor initialized lock */ if (!__kmp_is_futex_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } if (__kmp_get_futex_lock_owner(lck) == -1) { KMP_FATAL(LockUnsettingFree, func); } if (__kmp_get_futex_lock_owner(lck) != gtid) { KMP_FATAL(LockUnsettingSetByAnother, func); } return __kmp_release_nested_futex_lock(lck, gtid); } void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { __kmp_init_futex_lock(lck); lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks } void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { __kmp_destroy_futex_lock(lck); lck->lk.depth_locked = 0; } static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { char const *const func = "omp_destroy_nest_lock"; if (!__kmp_is_futex_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } if (__kmp_get_futex_lock_owner(lck) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_nested_futex_lock(lck); } #endif // KMP_USE_FUTEX /* ------------------------------------------------------------------------ */ /* ticket (bakery) locks */ static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { return std::atomic_load_explicit(&lck->lk.owner_id, std::memory_order_relaxed) - 1; } static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { return std::atomic_load_explicit(&lck->lk.depth_locked, std::memory_order_relaxed) != -1; } static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { return std::atomic_load_explicit((std::atomic *)now_serving, std::memory_order_acquire) == my_ticket; } __forceinline static int __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, kmp_int32 gtid) { kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_relaxed); #ifdef USE_LOCK_PROFILE if (std::atomic_load_explicit(&lck->lk.now_serving, std::memory_order_relaxed) != my_ticket) __kmp_printf("LOCK CONTENTION: %p\n", lck); /* else __kmp_printf( "." );*/ #endif /* USE_LOCK_PROFILE */ if (std::atomic_load_explicit(&lck->lk.now_serving, std::memory_order_acquire) == my_ticket) { return KMP_LOCK_ACQUIRED_FIRST; } KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); return KMP_LOCK_ACQUIRED_FIRST; } int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); ANNOTATE_TICKET_ACQUIRED(lck); return retval; } static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_lock"; if (!std::atomic_load_explicit(&lck->lk.initialized, std::memory_order_relaxed)) { KMP_FATAL(LockIsUninitialized, func); } if (lck->lk.self != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_ticket_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { KMP_FATAL(LockIsAlreadyOwned, func); } __kmp_acquire_ticket_lock(lck, gtid); std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, std::memory_order_relaxed); return KMP_LOCK_ACQUIRED_FIRST; } int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, std::memory_order_relaxed); if (std::atomic_load_explicit(&lck->lk.now_serving, std::memory_order_relaxed) == my_ticket) { kmp_uint32 next_ticket = my_ticket + 1; if (std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket, &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire)) { return TRUE; } } return FALSE; } static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_lock"; if (!std::atomic_load_explicit(&lck->lk.initialized, std::memory_order_relaxed)) { KMP_FATAL(LockIsUninitialized, func); } if (lck->lk.self != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_ticket_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } int retval = __kmp_test_ticket_lock(lck, gtid); if (retval) { std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, std::memory_order_relaxed); } return retval; } int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, std::memory_order_relaxed) - std::atomic_load_explicit(&lck->lk.now_serving, std::memory_order_relaxed); ANNOTATE_TICKET_RELEASED(lck); std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, std::memory_order_release); KMP_YIELD(distance > (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); return KMP_LOCK_RELEASED; } static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_lock"; if (!std::atomic_load_explicit(&lck->lk.initialized, std::memory_order_relaxed)) { KMP_FATAL(LockIsUninitialized, func); } if (lck->lk.self != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_ticket_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_ticket_lock_owner(lck) == -1) { KMP_FATAL(LockUnsettingFree, func); } if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && (__kmp_get_ticket_lock_owner(lck) != gtid)) { KMP_FATAL(LockUnsettingSetByAnother, func); } std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); return __kmp_release_ticket_lock(lck, gtid); } void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { lck->lk.location = NULL; lck->lk.self = lck; std::atomic_store_explicit(&lck->lk.next_ticket, 0U, std::memory_order_relaxed); std::atomic_store_explicit(&lck->lk.now_serving, 0U, std::memory_order_relaxed); std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed); // no thread owns the lock. std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed); // -1 => not a nested lock. std::atomic_store_explicit(&lck->lk.initialized, true, std::memory_order_release); } void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { std::atomic_store_explicit(&lck->lk.initialized, false, std::memory_order_release); lck->lk.self = NULL; lck->lk.location = NULL; std::atomic_store_explicit(&lck->lk.next_ticket, 0U, std::memory_order_relaxed); std::atomic_store_explicit(&lck->lk.now_serving, 0U, std::memory_order_relaxed); std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); std::atomic_store_explicit(&lck->lk.depth_locked, -1, std::memory_order_relaxed); } static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { char const *const func = "omp_destroy_lock"; if (!std::atomic_load_explicit(&lck->lk.initialized, std::memory_order_relaxed)) { KMP_FATAL(LockIsUninitialized, func); } if (lck->lk.self != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_ticket_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_ticket_lock_owner(lck) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_ticket_lock(lck); } // nested ticket locks int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); if (__kmp_get_ticket_lock_owner(lck) == gtid) { std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, std::memory_order_relaxed); return KMP_LOCK_ACQUIRED_NEXT; } else { __kmp_acquire_ticket_lock_timed_template(lck, gtid); ANNOTATE_TICKET_ACQUIRED(lck); std::atomic_store_explicit(&lck->lk.depth_locked, 1, std::memory_order_relaxed); std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, std::memory_order_relaxed); return KMP_LOCK_ACQUIRED_FIRST; } } static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_nest_lock"; if (!std::atomic_load_explicit(&lck->lk.initialized, std::memory_order_relaxed)) { KMP_FATAL(LockIsUninitialized, func); } if (lck->lk.self != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_ticket_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } return __kmp_acquire_nested_ticket_lock(lck, gtid); } int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { int retval; KMP_DEBUG_ASSERT(gtid >= 0); if (__kmp_get_ticket_lock_owner(lck) == gtid) { retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, std::memory_order_relaxed) + 1; } else if (!__kmp_test_ticket_lock(lck, gtid)) { retval = 0; } else { std::atomic_store_explicit(&lck->lk.depth_locked, 1, std::memory_order_relaxed); std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, std::memory_order_relaxed); retval = 1; } return retval; } static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_nest_lock"; if (!std::atomic_load_explicit(&lck->lk.initialized, std::memory_order_relaxed)) { KMP_FATAL(LockIsUninitialized, func); } if (lck->lk.self != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_ticket_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } return __kmp_test_nested_ticket_lock(lck, gtid); } int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, std::memory_order_relaxed) - 1) == 0) { std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); __kmp_release_ticket_lock(lck, gtid); return KMP_LOCK_RELEASED; } return KMP_LOCK_STILL_HELD; } static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_nest_lock"; if (!std::atomic_load_explicit(&lck->lk.initialized, std::memory_order_relaxed)) { KMP_FATAL(LockIsUninitialized, func); } if (lck->lk.self != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_ticket_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } if (__kmp_get_ticket_lock_owner(lck) == -1) { KMP_FATAL(LockUnsettingFree, func); } if (__kmp_get_ticket_lock_owner(lck) != gtid) { KMP_FATAL(LockUnsettingSetByAnother, func); } return __kmp_release_nested_ticket_lock(lck, gtid); } void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { __kmp_init_ticket_lock(lck); std::atomic_store_explicit(&lck->lk.depth_locked, 0, std::memory_order_relaxed); // >= 0 for nestable locks, -1 for simple locks } void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { __kmp_destroy_ticket_lock(lck); std::atomic_store_explicit(&lck->lk.depth_locked, 0, std::memory_order_relaxed); } static void __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { char const *const func = "omp_destroy_nest_lock"; if (!std::atomic_load_explicit(&lck->lk.initialized, std::memory_order_relaxed)) { KMP_FATAL(LockIsUninitialized, func); } if (lck->lk.self != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_ticket_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } if (__kmp_get_ticket_lock_owner(lck) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_nested_ticket_lock(lck); } // access functions to fields which don't exist for all lock kinds. static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { return lck->lk.location; } static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, const ident_t *loc) { lck->lk.location = loc; } static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { return lck->lk.flags; } static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, kmp_lock_flags_t flags) { lck->lk.flags = flags; } /* ------------------------------------------------------------------------ */ /* queuing locks */ /* First the states (head,tail) = 0, 0 means lock is unheld, nobody on queue UINT_MAX or -1, 0 means lock is held, nobody on queue h, h means lock held or about to transition, 1 element on queue h, t h <> t, means lock is held or about to transition, >1 elements on queue Now the transitions Acquire(0,0) = -1 ,0 Release(0,0) = Error Acquire(-1,0) = h ,h h > 0 Release(-1,0) = 0 ,0 Acquire(h,h) = h ,t h > 0, t > 0, h <> t Release(h,h) = -1 ,0 h > 0 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t And pictorially +-----+ | 0, 0|------- release -------> Error +-----+ | ^ acquire| |release | | | | v | +-----+ |-1, 0| +-----+ | ^ acquire| |release | | | | v | +-----+ | h, h| +-----+ | ^ acquire| |release | | | | v | +-----+ | h, t|----- acquire, release loopback ---+ +-----+ | ^ | | | +------------------------------------+ */ #ifdef DEBUG_QUEUING_LOCKS /* Stuff for circular trace buffer */ #define TRACE_BUF_ELE 1024 static char traces[TRACE_BUF_ELE][128] = {0}; static int tc = 0; #define TRACE_LOCK(X, Y) \ KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); #define TRACE_LOCK_T(X, Y, Z) \ KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); #define TRACE_LOCK_HT(X, Y, Z, Q) \ KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ Z, Q); static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id) { kmp_int32 t, i; __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); i = tc % TRACE_BUF_ELE; __kmp_printf_no_lock("%s\n", traces[i]); i = (i + 1) % TRACE_BUF_ELE; while (i != (tc % TRACE_BUF_ELE)) { __kmp_printf_no_lock("%s", traces[i]); i = (i + 1) % TRACE_BUF_ELE; } __kmp_printf_no_lock("\n"); __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " "next_wait:%d, head_id:%d, tail_id:%d\n", gtid + 1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, head_id, tail_id); __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); if (lck->lk.head_id >= 1) { t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; while (t > 0) { __kmp_printf_no_lock("-> %d ", t); t = __kmp_threads[t - 1]->th.th_next_waiting; } } __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); __kmp_printf_no_lock("\n\n"); } #endif /* DEBUG_QUEUING_LOCKS */ static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { return TCR_4(lck->lk.owner_id) - 1; } static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { return lck->lk.depth_locked != -1; } /* Acquire a lock using a the queuing lock implementation */ template /* [TLW] The unused template above is left behind because of what BEB believes is a potential compiler problem with __forceinline. */ __forceinline static int __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, kmp_int32 gtid) { kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); volatile kmp_int32 *head_id_p = &lck->lk.head_id; volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; volatile kmp_uint32 *spin_here_p; kmp_int32 need_mf = 1; #if OMPT_SUPPORT ompt_state_t prev_state = ompt_state_undefined; #endif KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); KMP_FSYNC_PREPARE(lck); KMP_DEBUG_ASSERT(this_thr != NULL); spin_here_p = &this_thr->th.th_spin_here; #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "acq ent"); if (*spin_here_p) __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); if (this_thr->th.th_next_waiting != 0) __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); #endif KMP_DEBUG_ASSERT(!*spin_here_p); KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p that may follow, not just in execution order, but also in visibility order. This way, when a releasing thread observes the changes to the queue by this thread, it can rightly assume that spin_here_p has already been set to TRUE, so that when it sets spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p to FALSE before this thread sets it to TRUE, this thread will hang. */ *spin_here_p = TRUE; /* before enqueuing to prevent race */ while (1) { kmp_int32 enqueued; kmp_int32 head; kmp_int32 tail; head = *head_id_p; switch (head) { case -1: { #ifdef DEBUG_QUEUING_LOCKS tail = *tail_id_p; TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); #endif tail = 0; /* to make sure next link asynchronously read is not set accidentally; this assignment prevents us from entering the if ( t > 0 ) condition in the enqueued case below, which is not necessary for this state transition */ need_mf = 0; /* try (-1,0)->(tid,tid) */ enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, KMP_PACK_64(-1, 0), KMP_PACK_64(gtid + 1, gtid + 1)); #ifdef DEBUG_QUEUING_LOCKS if (enqueued) TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); #endif } break; default: { tail = *tail_id_p; KMP_DEBUG_ASSERT(tail != gtid + 1); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); #endif if (tail == 0) { enqueued = FALSE; } else { need_mf = 0; /* try (h,t) or (h,h)->(h,tid) */ enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); #ifdef DEBUG_QUEUING_LOCKS if (enqueued) TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); #endif } } break; case 0: /* empty queue */ { kmp_int32 grabbed_lock; #ifdef DEBUG_QUEUING_LOCKS tail = *tail_id_p; TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); #endif /* try (0,0)->(-1,0) */ /* only legal transition out of head = 0 is head = -1 with no change to * tail */ grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); if (grabbed_lock) { *spin_here_p = FALSE; KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", lck, gtid)); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); #endif #if OMPT_SUPPORT if (ompt_enabled.enabled && prev_state != ompt_state_undefined) { /* change the state before clearing wait_id */ this_thr->th.ompt_thread_info.state = prev_state; this_thr->th.ompt_thread_info.wait_id = 0; } #endif KMP_FSYNC_ACQUIRED(lck); return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ } enqueued = FALSE; } break; } #if OMPT_SUPPORT if (ompt_enabled.enabled && prev_state == ompt_state_undefined) { /* this thread will spin; set wait_id before entering wait state */ prev_state = this_thr->th.ompt_thread_info.state; this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; } #endif if (enqueued) { if (tail > 0) { kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); KMP_ASSERT(tail_thr != NULL); tail_thr->th.th_next_waiting = gtid + 1; /* corresponding wait for this write in release code */ } KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid)); KMP_MB(); // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck); // Synchronize writes to both runtime thread structures // and writes in user code. KMP_MB(); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "acq spin"); if (this_thr->th.th_next_waiting != 0) __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); #endif KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " "waiting on queue\n", lck, gtid)); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "acq exit 2"); #endif #if OMPT_SUPPORT /* change the state before clearing wait_id */ this_thr->th.ompt_thread_info.state = prev_state; this_thr->th.ompt_thread_info.wait_id = 0; #endif /* got lock, we were dequeued by the thread that released lock */ return KMP_LOCK_ACQUIRED_FIRST; } /* Yield if number of threads > number of logical processors */ /* ToDo: Not sure why this should only be in oversubscription case, maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ KMP_YIELD_OVERSUB(); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "acq retry"); #endif } KMP_ASSERT2(0, "should not get here"); return KMP_LOCK_ACQUIRED_FIRST; } int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); int retval = __kmp_acquire_queuing_lock_timed_template(lck, gtid); ANNOTATE_QUEUING_ACQUIRED(lck); return retval; } static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_queuing_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_queuing_lock_owner(lck) == gtid) { KMP_FATAL(LockIsAlreadyOwned, func); } __kmp_acquire_queuing_lock(lck, gtid); lck->lk.owner_id = gtid + 1; return KMP_LOCK_ACQUIRED_FIRST; } int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { volatile kmp_int32 *head_id_p = &lck->lk.head_id; kmp_int32 head; #ifdef KMP_DEBUG kmp_info_t *this_thr; #endif KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); KMP_DEBUG_ASSERT(gtid >= 0); #ifdef KMP_DEBUG this_thr = __kmp_thread_from_gtid(gtid); KMP_DEBUG_ASSERT(this_thr != NULL); KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); #endif head = *head_id_p; if (head == 0) { /* nobody on queue, nobody holding */ /* try (0,0)->(-1,0) */ if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); KMP_FSYNC_ACQUIRED(lck); ANNOTATE_QUEUING_ACQUIRED(lck); return TRUE; } } KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); return FALSE; } static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_queuing_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } int retval = __kmp_test_queuing_lock(lck, gtid); if (retval) { lck->lk.owner_id = gtid + 1; } return retval; } int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { kmp_info_t *this_thr; volatile kmp_int32 *head_id_p = &lck->lk.head_id; volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); KMP_DEBUG_ASSERT(gtid >= 0); this_thr = __kmp_thread_from_gtid(gtid); KMP_DEBUG_ASSERT(this_thr != NULL); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "rel ent"); if (this_thr->th.th_spin_here) __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); if (this_thr->th.th_next_waiting != 0) __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); #endif KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); KMP_FSYNC_RELEASING(lck); ANNOTATE_QUEUING_RELEASED(lck); while (1) { kmp_int32 dequeued; kmp_int32 head; kmp_int32 tail; head = *head_id_p; #ifdef DEBUG_QUEUING_LOCKS tail = *tail_id_p; TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); if (head == 0) __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); #endif KMP_DEBUG_ASSERT(head != 0); /* holding the lock, head must be -1 or queue head */ if (head == -1) { /* nobody on queue */ /* try (-1,0)->(0,0) */ if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", lck, gtid)); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); #endif #if OMPT_SUPPORT /* nothing to do - no other thread is trying to shift blame */ #endif return KMP_LOCK_RELEASED; } dequeued = FALSE; } else { KMP_MB(); tail = *tail_id_p; if (head == tail) { /* only one thread on the queue */ #ifdef DEBUG_QUEUING_LOCKS if (head <= 0) __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); #endif KMP_DEBUG_ASSERT(head > 0); /* try (h,h)->(-1,0) */ dequeued = KMP_COMPARE_AND_STORE_REL64( RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), KMP_PACK_64(-1, 0)); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); #endif } else { volatile kmp_int32 *waiting_id_p; kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); KMP_DEBUG_ASSERT(head_thr != NULL); waiting_id_p = &head_thr->th.th_next_waiting; /* Does this require synchronous reads? */ #ifdef DEBUG_QUEUING_LOCKS if (head <= 0 || tail <= 0) __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); #endif KMP_DEBUG_ASSERT(head > 0 && tail > 0); /* try (h,t)->(h',t) or (t,t) */ KMP_MB(); /* make sure enqueuing thread has time to update next waiting thread * field */ *head_id_p = KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); #endif dequeued = TRUE; } } if (dequeued) { kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); KMP_DEBUG_ASSERT(head_thr != NULL); /* Does this require synchronous reads? */ #ifdef DEBUG_QUEUING_LOCKS if (head <= 0 || tail <= 0) __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); #endif KMP_DEBUG_ASSERT(head > 0 && tail > 0); /* For clean code only. Thread not released until next statement prevents race with acquire code. */ head_thr->th.th_next_waiting = 0; #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); #endif KMP_MB(); /* reset spin value */ head_thr->th.th_spin_here = FALSE; KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " "dequeuing\n", lck, gtid)); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "rel exit 2"); #endif return KMP_LOCK_RELEASED; } /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring threads */ #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "rel retry"); #endif } /* while */ KMP_ASSERT2(0, "should not get here"); return KMP_LOCK_RELEASED; } static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_lock"; KMP_MB(); /* in case another processor initialized lock */ if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_queuing_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_queuing_lock_owner(lck) == -1) { KMP_FATAL(LockUnsettingFree, func); } if (__kmp_get_queuing_lock_owner(lck) != gtid) { KMP_FATAL(LockUnsettingSetByAnother, func); } lck->lk.owner_id = 0; return __kmp_release_queuing_lock(lck, gtid); } void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { lck->lk.location = NULL; lck->lk.head_id = 0; lck->lk.tail_id = 0; lck->lk.next_ticket = 0; lck->lk.now_serving = 0; lck->lk.owner_id = 0; // no thread owns the lock. lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. lck->lk.initialized = lck; KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); } void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { lck->lk.initialized = NULL; lck->lk.location = NULL; lck->lk.head_id = 0; lck->lk.tail_id = 0; lck->lk.next_ticket = 0; lck->lk.now_serving = 0; lck->lk.owner_id = 0; lck->lk.depth_locked = -1; } static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { char const *const func = "omp_destroy_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_queuing_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_queuing_lock_owner(lck) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_queuing_lock(lck); } // nested queuing locks int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); if (__kmp_get_queuing_lock_owner(lck) == gtid) { lck->lk.depth_locked += 1; return KMP_LOCK_ACQUIRED_NEXT; } else { __kmp_acquire_queuing_lock_timed_template(lck, gtid); ANNOTATE_QUEUING_ACQUIRED(lck); KMP_MB(); lck->lk.depth_locked = 1; KMP_MB(); lck->lk.owner_id = gtid + 1; return KMP_LOCK_ACQUIRED_FIRST; } } static int __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_nest_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_queuing_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } return __kmp_acquire_nested_queuing_lock(lck, gtid); } int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { int retval; KMP_DEBUG_ASSERT(gtid >= 0); if (__kmp_get_queuing_lock_owner(lck) == gtid) { retval = ++lck->lk.depth_locked; } else if (!__kmp_test_queuing_lock(lck, gtid)) { retval = 0; } else { KMP_MB(); retval = lck->lk.depth_locked = 1; KMP_MB(); lck->lk.owner_id = gtid + 1; } return retval; } static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_nest_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_queuing_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } return __kmp_test_nested_queuing_lock(lck, gtid); } int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); KMP_MB(); if (--(lck->lk.depth_locked) == 0) { KMP_MB(); lck->lk.owner_id = 0; __kmp_release_queuing_lock(lck, gtid); return KMP_LOCK_RELEASED; } return KMP_LOCK_STILL_HELD; } static int __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_nest_lock"; KMP_MB(); /* in case another processor initialized lock */ if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_queuing_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } if (__kmp_get_queuing_lock_owner(lck) == -1) { KMP_FATAL(LockUnsettingFree, func); } if (__kmp_get_queuing_lock_owner(lck) != gtid) { KMP_FATAL(LockUnsettingSetByAnother, func); } return __kmp_release_nested_queuing_lock(lck, gtid); } void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { __kmp_init_queuing_lock(lck); lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks } void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { __kmp_destroy_queuing_lock(lck); lck->lk.depth_locked = 0; } static void __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { char const *const func = "omp_destroy_nest_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_queuing_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } if (__kmp_get_queuing_lock_owner(lck) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_nested_queuing_lock(lck); } // access functions to fields which don't exist for all lock kinds. static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { return lck->lk.location; } static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, const ident_t *loc) { lck->lk.location = loc; } static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { return lck->lk.flags; } static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, kmp_lock_flags_t flags) { lck->lk.flags = flags; } #if KMP_USE_ADAPTIVE_LOCKS /* RTM Adaptive locks */ #if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) || \ (KMP_COMPILER_MSVC && _MSC_VER >= 1700) || \ - (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT) + (KMP_COMPILER_CLANG && (KMP_MSVC_COMPAT || __MINGW32__)) || \ + (KMP_COMPILER_GCC && __MINGW32__) #include #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) #else // Values from the status register after failed speculation. #define _XBEGIN_STARTED (~0u) #define _XABORT_EXPLICIT (1 << 0) #define _XABORT_RETRY (1 << 1) #define _XABORT_CONFLICT (1 << 2) #define _XABORT_CAPACITY (1 << 3) #define _XABORT_DEBUG (1 << 4) #define _XABORT_NESTED (1 << 5) #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) // Aborts for which it's worth trying again immediately #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) #define STRINGIZE_INTERNAL(arg) #arg #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) // Access to RTM instructions /*A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. This is the same definition as the compiler intrinsic that will be supported at some point. */ static __inline int _xbegin() { int res = -1; #if KMP_OS_WINDOWS #if KMP_ARCH_X86_64 _asm { _emit 0xC7 _emit 0xF8 _emit 2 _emit 0 _emit 0 _emit 0 jmp L2 mov res, eax L2: } #else /* IA32 */ _asm { _emit 0xC7 _emit 0xF8 _emit 2 _emit 0 _emit 0 _emit 0 jmp L2 mov res, eax L2: } #endif // KMP_ARCH_X86_64 #else /* Note that %eax must be noted as killed (clobbered), because the XSR is returned in %eax(%rax) on abort. Other register values are restored, so don't need to be killed. We must also mark 'res' as an input and an output, since otherwise 'res=-1' may be dropped as being dead, whereas we do need the assignment on the successful (i.e., non-abort) path. */ __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" " .long 1f-1b-6\n" " jmp 2f\n" "1: movl %%eax,%0\n" "2:" : "+r"(res)::"memory", "%eax"); #endif // KMP_OS_WINDOWS return res; } /* Transaction end */ static __inline void _xend() { #if KMP_OS_WINDOWS __asm { _emit 0x0f _emit 0x01 _emit 0xd5 } #else __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); #endif } /* This is a macro, the argument must be a single byte constant which can be evaluated by the inline assembler, since it is emitted as a byte into the assembly code. */ // clang-format off #if KMP_OS_WINDOWS #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG #else #define _xabort(ARG) \ __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); #endif // clang-format on #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 // Statistics is collected for testing purpose #if KMP_DEBUG_ADAPTIVE_LOCKS // We accumulate speculative lock statistics when the lock is destroyed. We // keep locks that haven't been destroyed in the liveLocks list so that we can // grab their statistics too. static kmp_adaptive_lock_statistics_t destroyedStats; // To hold the list of live locks. static kmp_adaptive_lock_info_t liveLocks; // A lock so we can safely update the list of locks. static kmp_bootstrap_lock_t chain_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock); // Initialize the list of stats. void __kmp_init_speculative_stats() { kmp_adaptive_lock_info_t *lck = &liveLocks; memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0, sizeof(lck->stats)); lck->stats.next = lck; lck->stats.prev = lck; KMP_ASSERT(lck->stats.next->stats.prev == lck); KMP_ASSERT(lck->stats.prev->stats.next == lck); __kmp_init_bootstrap_lock(&chain_lock); } // Insert the lock into the circular list static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { __kmp_acquire_bootstrap_lock(&chain_lock); lck->stats.next = liveLocks.stats.next; lck->stats.prev = &liveLocks; liveLocks.stats.next = lck; lck->stats.next->stats.prev = lck; KMP_ASSERT(lck->stats.next->stats.prev == lck); KMP_ASSERT(lck->stats.prev->stats.next == lck); __kmp_release_bootstrap_lock(&chain_lock); } static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { KMP_ASSERT(lck->stats.next->stats.prev == lck); KMP_ASSERT(lck->stats.prev->stats.next == lck); kmp_adaptive_lock_info_t *n = lck->stats.next; kmp_adaptive_lock_info_t *p = lck->stats.prev; n->stats.prev = p; p->stats.next = n; } static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0, sizeof(lck->stats)); __kmp_remember_lock(lck); } static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, kmp_adaptive_lock_info_t *lck) { kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; t->successfulSpeculations += s->successfulSpeculations; t->hardFailedSpeculations += s->hardFailedSpeculations; t->softFailedSpeculations += s->softFailedSpeculations; t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; t->lemmingYields += s->lemmingYields; } static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { __kmp_acquire_bootstrap_lock(&chain_lock); __kmp_add_stats(&destroyedStats, lck); __kmp_forget_lock(lck); __kmp_release_bootstrap_lock(&chain_lock); } static float percent(kmp_uint32 count, kmp_uint32 total) { return (total == 0) ? 0.0 : (100.0 * count) / total; } static FILE *__kmp_open_stats_file() { if (strcmp(__kmp_speculative_statsfile, "-") == 0) return stdout; size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; char buffer[buffLen]; KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, (kmp_int32)getpid()); FILE *result = fopen(&buffer[0], "w"); // Maybe we should issue a warning here... return result ? result : stdout; } void __kmp_print_speculative_stats() { kmp_adaptive_lock_statistics_t total = destroyedStats; kmp_adaptive_lock_info_t *lck; for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { __kmp_add_stats(&total, lck); } kmp_adaptive_lock_statistics_t *t = &total; kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + t->softFailedSpeculations; if (totalSections <= 0) return; FILE *statsFile = __kmp_open_stats_file(); fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); fprintf(statsFile, " Lock parameters: \n" " max_soft_retries : %10d\n" " max_badness : %10d\n", __kmp_adaptive_backoff_params.max_soft_retries, __kmp_adaptive_backoff_params.max_badness); fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts); fprintf(statsFile, " Total critical sections : %10d\n", totalSections); fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", t->successfulSpeculations, percent(t->successfulSpeculations, totalSections)); fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", t->nonSpeculativeAcquires, percent(t->nonSpeculativeAcquires, totalSections)); fprintf(statsFile, " Lemming yields : %10d\n\n", t->lemmingYields); fprintf(statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations); fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", t->successfulSpeculations, percent(t->successfulSpeculations, totalSpeculations)); fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", t->softFailedSpeculations, percent(t->softFailedSpeculations, totalSpeculations)); fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", t->hardFailedSpeculations, percent(t->hardFailedSpeculations, totalSpeculations)); if (statsFile != stdout) fclose(statsFile); } #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) #else #define KMP_INC_STAT(lck, stat) #endif // KMP_DEBUG_ADAPTIVE_LOCKS static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { // It is enough to check that the head_id is zero. // We don't also need to check the tail. bool res = lck->lk.head_id == 0; // We need a fence here, since we must ensure that no memory operations // from later in this thread float above that read. #if KMP_COMPILER_ICC _mm_mfence(); #else __sync_synchronize(); #endif return res; } // Functions for manipulating the badness static __inline void __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { // Reset the badness to zero so we eagerly try to speculate again lck->lk.adaptive.badness = 0; KMP_INC_STAT(lck, successfulSpeculations); } // Create a bit mask with one more set bit. static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; if (newBadness > lck->lk.adaptive.max_badness) { return; } else { lck->lk.adaptive.badness = newBadness; } } // Check whether speculation should be attempted. static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { kmp_uint32 badness = lck->lk.adaptive.badness; kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; int res = (attempts & badness) == 0; return res; } // Attempt to acquire only the speculative lock. // Does not back off to the non-speculative lock. static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { int retries = lck->lk.adaptive.max_soft_retries; // We don't explicitly count the start of speculation, rather we record the // results (success, hard fail, soft fail). The sum of all of those is the // total number of times we started speculation since all speculations must // end one of those ways. do { kmp_uint32 status = _xbegin(); // Switch this in to disable actual speculation but exercise at least some // of the rest of the code. Useful for debugging... // kmp_uint32 status = _XABORT_NESTED; if (status == _XBEGIN_STARTED) { /* We have successfully started speculation. Check that no-one acquired the lock for real between when we last looked and now. This also gets the lock cache line into our read-set, which we need so that we'll abort if anyone later claims it for real. */ if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { // Lock is now visibly acquired, so someone beat us to it. Abort the // transaction so we'll restart from _xbegin with the failure status. _xabort(0x01); KMP_ASSERT2(0, "should not get here"); } return 1; // Lock has been acquired (speculatively) } else { // We have aborted, update the statistics if (status & SOFT_ABORT_MASK) { KMP_INC_STAT(lck, softFailedSpeculations); // and loop round to retry. } else { KMP_INC_STAT(lck, hardFailedSpeculations); // Give up if we had a hard failure. break; } } } while (retries--); // Loop while we have retries, and didn't fail hard. // Either we had a hard failure or we didn't succeed softly after // the full set of attempts, so back off the badness. __kmp_step_badness(lck); return 0; } // Attempt to acquire the speculative lock, or back off to the non-speculative // one if the speculative lock cannot be acquired. // We can succeed speculatively, non-speculatively, or fail. static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { // First try to acquire the lock speculatively if (__kmp_should_speculate(lck, gtid) && __kmp_test_adaptive_lock_only(lck, gtid)) return 1; // Speculative acquisition failed, so try to acquire it non-speculatively. // Count the non-speculative acquire attempt lck->lk.adaptive.acquire_attempts++; // Use base, non-speculative lock. if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { KMP_INC_STAT(lck, nonSpeculativeAcquires); return 1; // Lock is acquired (non-speculatively) } else { return 0; // Failed to acquire the lock, it's already visibly locked. } } static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_lock"; if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { KMP_FATAL(LockIsUninitialized, func); } int retval = __kmp_test_adaptive_lock(lck, gtid); if (retval) { lck->lk.qlk.owner_id = gtid + 1; } return retval; } // Block until we can acquire a speculative, adaptive lock. We check whether we // should be trying to speculate. If we should be, we check the real lock to see // if it is free, and, if not, pause without attempting to acquire it until it // is. Then we try the speculative acquire. This means that although we suffer // from lemmings a little (because all we can't acquire the lock speculatively // until the queue of threads waiting has cleared), we don't get into a state // where we can never acquire the lock speculatively (because we force the queue // to clear by preventing new arrivals from entering the queue). This does mean // that when we're trying to break lemmings, the lock is no longer fair. However // OpenMP makes no guarantee that its locks are fair, so this isn't a real // problem. static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { if (__kmp_should_speculate(lck, gtid)) { if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { if (__kmp_test_adaptive_lock_only(lck, gtid)) return; // We tried speculation and failed, so give up. } else { // We can't try speculation until the lock is free, so we pause here // (without suspending on the queueing lock, to allow it to drain, then // try again. All other threads will also see the same result for // shouldSpeculate, so will be doing the same if they try to claim the // lock from now on. while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { KMP_INC_STAT(lck, lemmingYields); KMP_YIELD(TRUE); } if (__kmp_test_adaptive_lock_only(lck, gtid)) return; } } // Speculative acquisition failed, so acquire it non-speculatively. // Count the non-speculative acquire attempt lck->lk.adaptive.acquire_attempts++; __kmp_acquire_queuing_lock_timed_template(GET_QLK_PTR(lck), gtid); // We have acquired the base lock, so count that. KMP_INC_STAT(lck, nonSpeculativeAcquires); ANNOTATE_QUEUING_ACQUIRED(lck); } static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_lock"; if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { KMP_FATAL(LockIsAlreadyOwned, func); } __kmp_acquire_adaptive_lock(lck, gtid); lck->lk.qlk.owner_id = gtid + 1; } static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( lck))) { // If the lock doesn't look claimed we must be speculating. // (Or the user's code is buggy and they're releasing without locking; // if we had XTEST we'd be able to check that case...) _xend(); // Exit speculation __kmp_update_badness_after_success(lck); } else { // Since the lock *is* visibly locked we're not speculating, // so should use the underlying lock's release scheme. __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); } return KMP_LOCK_RELEASED; } static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_lock"; KMP_MB(); /* in case another processor initialized lock */ if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { KMP_FATAL(LockUnsettingFree, func); } if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { KMP_FATAL(LockUnsettingSetByAnother, func); } lck->lk.qlk.owner_id = 0; __kmp_release_adaptive_lock(lck, gtid); return KMP_LOCK_RELEASED; } static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { __kmp_init_queuing_lock(GET_QLK_PTR(lck)); lck->lk.adaptive.badness = 0; lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; #if KMP_DEBUG_ADAPTIVE_LOCKS __kmp_zero_speculative_stats(&lck->lk.adaptive); #endif KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); } static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { #if KMP_DEBUG_ADAPTIVE_LOCKS __kmp_accumulate_speculative_stats(&lck->lk.adaptive); #endif __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); // Nothing needed for the speculative part. } static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { char const *const func = "omp_destroy_lock"; if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_adaptive_lock(lck); } #endif // KMP_USE_ADAPTIVE_LOCKS /* ------------------------------------------------------------------------ */ /* DRDPA ticket locks */ /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { return lck->lk.owner_id - 1; } static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { return lck->lk.depth_locked != -1; } __forceinline static int __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket); kmp_uint64 mask = lck->lk.mask; // atomic load std::atomic *polls = lck->lk.polls; #ifdef USE_LOCK_PROFILE if (polls[ticket & mask] != ticket) __kmp_printf("LOCK CONTENTION: %p\n", lck); /* else __kmp_printf( "." );*/ #endif /* USE_LOCK_PROFILE */ // Now spin-wait, but reload the polls pointer and mask, in case the // polling area has been reconfigured. Unless it is reconfigured, the // reloads stay in L1 cache and are cheap. // // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!! // The current implementation of KMP_WAIT doesn't allow for mask // and poll to be re-read every spin iteration. kmp_uint32 spins; KMP_FSYNC_PREPARE(lck); KMP_INIT_YIELD(spins); while (polls[ticket & mask] < ticket) { // atomic load KMP_YIELD_OVERSUB_ELSE_SPIN(spins); // Re-read the mask and the poll pointer from the lock structure. // // Make certain that "mask" is read before "polls" !!! // // If another thread picks reconfigures the polling area and updates their // values, and we get the new value of mask and the old polls pointer, we // could access memory beyond the end of the old polling area. mask = lck->lk.mask; // atomic load polls = lck->lk.polls; // atomic load } // Critical section starts here KMP_FSYNC_ACQUIRED(lck); KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", ticket, lck)); lck->lk.now_serving = ticket; // non-volatile store // Deallocate a garbage polling area if we know that we are the last // thread that could possibly access it. // // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup // ticket. if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { __kmp_free(lck->lk.old_polls); lck->lk.old_polls = NULL; lck->lk.cleanup_ticket = 0; } // Check to see if we should reconfigure the polling area. // If there is still a garbage polling area to be deallocated from a // previous reconfiguration, let a later thread reconfigure it. if (lck->lk.old_polls == NULL) { bool reconfigure = false; std::atomic *old_polls = polls; kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { // We are in oversubscription mode. Contract the polling area // down to a single location, if that hasn't been done already. if (num_polls > 1) { reconfigure = true; num_polls = TCR_4(lck->lk.num_polls); mask = 0; num_polls = 1; polls = (std::atomic *)__kmp_allocate(num_polls * sizeof(*polls)); polls[0] = ticket; } } else { // We are in under/fully subscribed mode. Check the number of // threads waiting on the lock. The size of the polling area // should be at least the number of threads waiting. kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; if (num_waiting > num_polls) { kmp_uint32 old_num_polls = num_polls; reconfigure = true; do { mask = (mask << 1) | 1; num_polls *= 2; } while (num_polls <= num_waiting); // Allocate the new polling area, and copy the relevant portion // of the old polling area to the new area. __kmp_allocate() // zeroes the memory it allocates, and most of the old area is // just zero padding, so we only copy the release counters. polls = (std::atomic *)__kmp_allocate(num_polls * sizeof(*polls)); kmp_uint32 i; for (i = 0; i < old_num_polls; i++) { polls[i].store(old_polls[i]); } } } if (reconfigure) { // Now write the updated fields back to the lock structure. // // Make certain that "polls" is written before "mask" !!! // // If another thread picks up the new value of mask and the old polls // pointer , it could access memory beyond the end of the old polling // area. // // On x86, we need memory fences. KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " "lock %p to %d polls\n", ticket, lck, num_polls)); lck->lk.old_polls = old_polls; lck->lk.polls = polls; // atomic store KMP_MB(); lck->lk.num_polls = num_polls; lck->lk.mask = mask; // atomic store KMP_MB(); // Only after the new polling area and mask have been flushed // to main memory can we update the cleanup ticket field. // // volatile load / non-volatile store lck->lk.cleanup_ticket = lck->lk.next_ticket; } } return KMP_LOCK_ACQUIRED_FIRST; } int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); ANNOTATE_DRDPA_ACQUIRED(lck); return retval; } static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_drdpa_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { KMP_FATAL(LockIsAlreadyOwned, func); } __kmp_acquire_drdpa_lock(lck, gtid); lck->lk.owner_id = gtid + 1; return KMP_LOCK_ACQUIRED_FIRST; } int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { // First get a ticket, then read the polls pointer and the mask. // The polls pointer must be read before the mask!!! (See above) kmp_uint64 ticket = lck->lk.next_ticket; // atomic load std::atomic *polls = lck->lk.polls; kmp_uint64 mask = lck->lk.mask; // atomic load if (polls[ticket & mask] == ticket) { kmp_uint64 next_ticket = ticket + 1; if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket, next_ticket)) { KMP_FSYNC_ACQUIRED(lck); KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", ticket, lck)); lck->lk.now_serving = ticket; // non-volatile store // Since no threads are waiting, there is no possibility that we would // want to reconfigure the polling area. We might have the cleanup ticket // value (which says that it is now safe to deallocate old_polls), but // we'll let a later thread which calls __kmp_acquire_lock do that - this // routine isn't supposed to block, and we would risk blocks if we called // __kmp_free() to do the deallocation. return TRUE; } } return FALSE; } static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_drdpa_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } int retval = __kmp_test_drdpa_lock(lck, gtid); if (retval) { lck->lk.owner_id = gtid + 1; } return retval; } int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { // Read the ticket value from the lock data struct, then the polls pointer and // the mask. The polls pointer must be read before the mask!!! (See above) kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load std::atomic *polls = lck->lk.polls; // atomic load kmp_uint64 mask = lck->lk.mask; // atomic load KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", ticket - 1, lck)); KMP_FSYNC_RELEASING(lck); ANNOTATE_DRDPA_RELEASED(lck); polls[ticket & mask] = ticket; // atomic store return KMP_LOCK_RELEASED; } static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_lock"; KMP_MB(); /* in case another processor initialized lock */ if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_drdpa_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_drdpa_lock_owner(lck) == -1) { KMP_FATAL(LockUnsettingFree, func); } if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && (__kmp_get_drdpa_lock_owner(lck) != gtid)) { KMP_FATAL(LockUnsettingSetByAnother, func); } lck->lk.owner_id = 0; return __kmp_release_drdpa_lock(lck, gtid); } void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { lck->lk.location = NULL; lck->lk.mask = 0; lck->lk.num_polls = 1; lck->lk.polls = (std::atomic *)__kmp_allocate( lck->lk.num_polls * sizeof(*(lck->lk.polls))); lck->lk.cleanup_ticket = 0; lck->lk.old_polls = NULL; lck->lk.next_ticket = 0; lck->lk.now_serving = 0; lck->lk.owner_id = 0; // no thread owns the lock. lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. lck->lk.initialized = lck; KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); } void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { lck->lk.initialized = NULL; lck->lk.location = NULL; if (lck->lk.polls.load() != NULL) { __kmp_free(lck->lk.polls.load()); lck->lk.polls = NULL; } if (lck->lk.old_polls != NULL) { __kmp_free(lck->lk.old_polls); lck->lk.old_polls = NULL; } lck->lk.mask = 0; lck->lk.num_polls = 0; lck->lk.cleanup_ticket = 0; lck->lk.next_ticket = 0; lck->lk.now_serving = 0; lck->lk.owner_id = 0; lck->lk.depth_locked = -1; } static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { char const *const func = "omp_destroy_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (__kmp_is_drdpa_lock_nestable(lck)) { KMP_FATAL(LockNestableUsedAsSimple, func); } if (__kmp_get_drdpa_lock_owner(lck) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_drdpa_lock(lck); } // nested drdpa ticket locks int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); if (__kmp_get_drdpa_lock_owner(lck) == gtid) { lck->lk.depth_locked += 1; return KMP_LOCK_ACQUIRED_NEXT; } else { __kmp_acquire_drdpa_lock_timed_template(lck, gtid); ANNOTATE_DRDPA_ACQUIRED(lck); KMP_MB(); lck->lk.depth_locked = 1; KMP_MB(); lck->lk.owner_id = gtid + 1; return KMP_LOCK_ACQUIRED_FIRST; } } static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_set_nest_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_drdpa_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } __kmp_acquire_nested_drdpa_lock(lck, gtid); } int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { int retval; KMP_DEBUG_ASSERT(gtid >= 0); if (__kmp_get_drdpa_lock_owner(lck) == gtid) { retval = ++lck->lk.depth_locked; } else if (!__kmp_test_drdpa_lock(lck, gtid)) { retval = 0; } else { KMP_MB(); retval = lck->lk.depth_locked = 1; KMP_MB(); lck->lk.owner_id = gtid + 1; } return retval; } static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_test_nest_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_drdpa_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } return __kmp_test_nested_drdpa_lock(lck, gtid); } int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { KMP_DEBUG_ASSERT(gtid >= 0); KMP_MB(); if (--(lck->lk.depth_locked) == 0) { KMP_MB(); lck->lk.owner_id = 0; __kmp_release_drdpa_lock(lck, gtid); return KMP_LOCK_RELEASED; } return KMP_LOCK_STILL_HELD; } static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { char const *const func = "omp_unset_nest_lock"; KMP_MB(); /* in case another processor initialized lock */ if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_drdpa_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } if (__kmp_get_drdpa_lock_owner(lck) == -1) { KMP_FATAL(LockUnsettingFree, func); } if (__kmp_get_drdpa_lock_owner(lck) != gtid) { KMP_FATAL(LockUnsettingSetByAnother, func); } return __kmp_release_nested_drdpa_lock(lck, gtid); } void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { __kmp_init_drdpa_lock(lck); lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks } void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { __kmp_destroy_drdpa_lock(lck); lck->lk.depth_locked = 0; } static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { char const *const func = "omp_destroy_nest_lock"; if (lck->lk.initialized != lck) { KMP_FATAL(LockIsUninitialized, func); } if (!__kmp_is_drdpa_lock_nestable(lck)) { KMP_FATAL(LockSimpleUsedAsNestable, func); } if (__kmp_get_drdpa_lock_owner(lck) != -1) { KMP_FATAL(LockStillOwned, func); } __kmp_destroy_nested_drdpa_lock(lck); } // access functions to fields which don't exist for all lock kinds. static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { return lck->lk.location; } static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, const ident_t *loc) { lck->lk.location = loc; } static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { return lck->lk.flags; } static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags) { lck->lk.flags = flags; } // Time stamp counter #if KMP_ARCH_X86 || KMP_ARCH_X86_64 #define __kmp_tsc() __kmp_hardware_timestamp() // Runtime's default backoff parameters kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; #else // Use nanoseconds for other platforms extern kmp_uint64 __kmp_now_nsec(); kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; #define __kmp_tsc() __kmp_now_nsec() #endif // A useful predicate for dealing with timestamps that may wrap. // Is a before b? Since the timestamps may wrap, this is asking whether it's // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. // Times where going clockwise is less distance than going anti-clockwise // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), // then a > b (true) does not mean a reached b; whereas signed(a) = -2, // signed(b) = 0 captures the actual difference static inline bool before(kmp_uint64 a, kmp_uint64 b) { return ((kmp_int64)b - (kmp_int64)a) > 0; } // Truncated binary exponential backoff function void __kmp_spin_backoff(kmp_backoff_t *boff) { // We could flatten this loop, but making it a nested loop gives better result kmp_uint32 i; for (i = boff->step; i > 0; i--) { kmp_uint64 goal = __kmp_tsc() + boff->min_tick; do { KMP_CPU_PAUSE(); } while (before(__kmp_tsc(), goal)); } boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); } #if KMP_USE_DYNAMIC_LOCK // Direct lock initializers. It simply writes a tag to the low 8 bits of the // lock word. static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { TCW_4(*lck, KMP_GET_D_TAG(seq)); KA_TRACE( 20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); } #if KMP_USE_TSX // HLE lock functions - imported from the testbed runtime. #define HLE_ACQUIRE ".byte 0xf2;" #define HLE_RELEASE ".byte 0xf3;" static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); return v; } static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { // Use gtid for KMP_LOCK_BUSY if necessary if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { int delay = 1; do { while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { for (int i = delay; i != 0; --i) KMP_CPU_PAUSE(); delay = ((delay << 1) | 1) & 7; } } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); } } static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) { __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks } static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { __asm__ volatile(HLE_RELEASE "movl %1,%0" : "=m"(*lck) : "r"(KMP_LOCK_FREE(hle)) : "memory"); return KMP_LOCK_RELEASED; } static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) { return __kmp_release_hle_lock(lck, gtid); // TODO: add checks } static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); } static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) { return __kmp_test_hle_lock(lck, gtid); // TODO: add checks } static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { __kmp_init_queuing_lock(lck); } static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { __kmp_destroy_queuing_lock(lck); } static void __kmp_destroy_rtm_lock_with_checks(kmp_queuing_lock_t *lck) { __kmp_destroy_queuing_lock_with_checks(lck); } static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { unsigned retries = 3, status; do { status = _xbegin(); if (status == _XBEGIN_STARTED) { if (__kmp_is_unlocked_queuing_lock(lck)) return; _xabort(0xff); } if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { // Wait until lock becomes free while (!__kmp_is_unlocked_queuing_lock(lck)) { KMP_YIELD(TRUE); } } else if (!(status & _XABORT_RETRY)) break; } while (retries--); // Fall-back non-speculative lock (xchg) __kmp_acquire_queuing_lock(lck, gtid); } static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) { __kmp_acquire_rtm_lock(lck, gtid); } static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { if (__kmp_is_unlocked_queuing_lock(lck)) { // Releasing from speculation _xend(); } else { // Releasing from a real lock __kmp_release_queuing_lock(lck, gtid); } return KMP_LOCK_RELEASED; } static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) { return __kmp_release_rtm_lock(lck, gtid); } static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { unsigned retries = 3, status; do { status = _xbegin(); if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { return 1; } if (!(status & _XABORT_RETRY)) break; } while (retries--); return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; } static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) { return __kmp_test_rtm_lock(lck, gtid); } #endif // KMP_USE_TSX // Entry functions for indirect locks (first element of direct lock jump tables) static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, kmp_dyna_lockseq_t tag); static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, kmp_int32); // Lock function definitions for the union parameter type #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) #define expand1(lk, op) \ static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \ __kmp_##op##_##lk##_##lock(&lock->lk); \ } #define expand2(lk, op) \ static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \ kmp_int32 gtid) { \ return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \ } #define expand3(lk, op) \ static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \ kmp_lock_flags_t flags) { \ __kmp_set_##lk##_lock_flags(&lock->lk, flags); \ } #define expand4(lk, op) \ static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \ const ident_t *loc) { \ __kmp_set_##lk##_lock_location(&lock->lk, loc); \ } KMP_FOREACH_LOCK_KIND(expand1, init) KMP_FOREACH_LOCK_KIND(expand1, init_nested) KMP_FOREACH_LOCK_KIND(expand1, destroy) KMP_FOREACH_LOCK_KIND(expand1, destroy_nested) KMP_FOREACH_LOCK_KIND(expand2, acquire) KMP_FOREACH_LOCK_KIND(expand2, acquire_nested) KMP_FOREACH_LOCK_KIND(expand2, release) KMP_FOREACH_LOCK_KIND(expand2, release_nested) KMP_FOREACH_LOCK_KIND(expand2, test) KMP_FOREACH_LOCK_KIND(expand2, test_nested) KMP_FOREACH_LOCK_KIND(expand3, ) KMP_FOREACH_LOCK_KIND(expand4, ) #undef expand1 #undef expand2 #undef expand3 #undef expand4 // Jump tables for the indirect lock functions // Only fill in the odd entries, that avoids the need to shift out the low bit // init functions #define expand(l, op) 0, __kmp_init_direct_lock, void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; #undef expand // destroy functions #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, static void (*direct_destroy[])(kmp_dyna_lock_t *) = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; #undef expand #define expand(l, op) \ 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks, static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; #undef expand // set/acquire functions #define expand(l, op) \ 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; #undef expand #define expand(l, op) \ 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; #undef expand // unset/release and test functions #define expand(l, op) \ 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; #undef expand #define expand(l, op) \ 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release)}; static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; #undef expand // Exposes only one set of jump tables (*lock or *lock_with_checks). void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0; int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0; int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0; int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0; // Jump tables for the indirect lock functions #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init)}; #undef expand #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, static void (*indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy)}; #undef expand #define expand(l, op) \ (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks, static void (*indirect_destroy_check[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy)}; #undef expand // set/acquire functions #define expand(l, op) \ (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, static int (*indirect_set[])(kmp_user_lock_p, kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; #undef expand #define expand(l, op) \ (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire)}; #undef expand // unset/release and test functions #define expand(l, op) \ (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release)}; static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; #undef expand #define expand(l, op) \ (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release)}; static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test)}; #undef expand // Exposes only one jump tables (*lock or *lock_with_checks). void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0; int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0; int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0; int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0; // Lock index table. kmp_indirect_lock_table_t __kmp_i_lock_table; // Size of indirect locks. static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; // Jump tables for lock accessor/modifier. void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = {0}; void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = {0}; const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( kmp_user_lock_p) = {0}; kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( kmp_user_lock_p) = {0}; // Use different lock pools for different lock types. static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; // User lock allocator for dynamically dispatched indirect locks. Every entry of // the indirect lock table holds the address and type of the allocated indirect // lock (kmp_indirect_lock_t), and the size of the table doubles when it is // full. A destroyed indirect lock object is returned to the reusable pool of // locks, unique to each lock type. kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) { kmp_indirect_lock_t *lck; kmp_lock_index_t idx; __kmp_acquire_lock(&__kmp_global_lock, gtid); if (__kmp_indirect_lock_pool[tag] != NULL) { // Reuse the allocated and destroyed lock object lck = __kmp_indirect_lock_pool[tag]; if (OMP_LOCK_T_SIZE < sizeof(void *)) idx = lck->lock->pool.index; __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); } else { idx = __kmp_i_lock_table.next; // Check capacity and double the size if it is full if (idx == __kmp_i_lock_table.size) { // Double up the space for block pointers int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( 2 * row * sizeof(kmp_indirect_lock_t *)); KMP_MEMCPY(new_table, __kmp_i_lock_table.table, row * sizeof(kmp_indirect_lock_t *)); kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; __kmp_i_lock_table.table = new_table; __kmp_free(old_table); // Allocate new objects in the new blocks for (int i = row; i < 2 * row; ++i) *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); __kmp_i_lock_table.size = 2 * idx; } __kmp_i_lock_table.next++; lck = KMP_GET_I_LOCK(idx); // Allocate a new base lock object lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); } __kmp_release_lock(&__kmp_global_lock, gtid); lck->type = tag; if (OMP_LOCK_T_SIZE < sizeof(void *)) { *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even } else { *((kmp_indirect_lock_t **)user_lock) = lck; } return lck; } // User lock lookup for dynamically dispatched locks. static __forceinline kmp_indirect_lock_t * __kmp_lookup_indirect_lock(void **user_lock, const char *func) { if (__kmp_env_consistency_check) { kmp_indirect_lock_t *lck = NULL; if (user_lock == NULL) { KMP_FATAL(LockIsUninitialized, func); } if (OMP_LOCK_T_SIZE < sizeof(void *)) { kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); if (idx >= __kmp_i_lock_table.size) { KMP_FATAL(LockIsUninitialized, func); } lck = KMP_GET_I_LOCK(idx); } else { lck = *((kmp_indirect_lock_t **)user_lock); } if (lck == NULL) { KMP_FATAL(LockIsUninitialized, func); } return lck; } else { if (OMP_LOCK_T_SIZE < sizeof(void *)) { return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); } else { return *((kmp_indirect_lock_t **)user_lock); } } } static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, kmp_dyna_lockseq_t seq) { #if KMP_USE_ADAPTIVE_LOCKS if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); seq = lockseq_queuing; } #endif #if KMP_USE_TSX if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { seq = lockseq_queuing; } #endif kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); KMP_I_LOCK_FUNC(l, init)(l->lock); KA_TRACE( 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq)); } static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { kmp_uint32 gtid = __kmp_entry_gtid(); kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); KMP_I_LOCK_FUNC(l, destroy)(l->lock); kmp_indirect_locktag_t tag = l->type; __kmp_acquire_lock(&__kmp_global_lock, gtid); // Use the base lock's space to keep the pool chain. l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; if (OMP_LOCK_T_SIZE < sizeof(void *)) { l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); } __kmp_indirect_lock_pool[tag] = l; __kmp_release_lock(&__kmp_global_lock, gtid); } static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); } static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); } static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); } static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, kmp_int32 gtid) { kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); } static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, kmp_int32 gtid) { kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); } static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, kmp_int32 gtid) { kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); } kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; // This is used only in kmp_error.cpp when consistency checking is on. kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { switch (seq) { case lockseq_tas: case lockseq_nested_tas: return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); #if KMP_USE_FUTEX case lockseq_futex: case lockseq_nested_futex: return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); #endif case lockseq_ticket: case lockseq_nested_ticket: return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); case lockseq_queuing: case lockseq_nested_queuing: #if KMP_USE_ADAPTIVE_LOCKS case lockseq_adaptive: #endif return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); case lockseq_drdpa: case lockseq_nested_drdpa: return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); default: return 0; } } // Initializes data for dynamic user locks. void __kmp_init_dynamic_user_locks() { // Initialize jump table for the lock functions if (__kmp_env_consistency_check) { __kmp_direct_set = direct_set_check; __kmp_direct_unset = direct_unset_check; __kmp_direct_test = direct_test_check; __kmp_direct_destroy = direct_destroy_check; __kmp_indirect_set = indirect_set_check; __kmp_indirect_unset = indirect_unset_check; __kmp_indirect_test = indirect_test_check; __kmp_indirect_destroy = indirect_destroy_check; } else { __kmp_direct_set = direct_set; __kmp_direct_unset = direct_unset; __kmp_direct_test = direct_test; __kmp_direct_destroy = direct_destroy; __kmp_indirect_set = indirect_set; __kmp_indirect_unset = indirect_unset; __kmp_indirect_test = indirect_test; __kmp_indirect_destroy = indirect_destroy; } // If the user locks have already been initialized, then return. Allow the // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate // new lock tables if they have already been allocated. if (__kmp_init_user_locks) return; // Initialize lock index table __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); __kmp_i_lock_table.next = 0; // Indirect lock size __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); #if KMP_USE_ADAPTIVE_LOCKS __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); #endif __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); #if KMP_USE_TSX __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); #endif __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); #if KMP_USE_FUTEX __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); #endif __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); // Initialize lock accessor/modifier #define fill_jumps(table, expand, sep) \ { \ table[locktag##sep##ticket] = expand(ticket); \ table[locktag##sep##queuing] = expand(queuing); \ table[locktag##sep##drdpa] = expand(drdpa); \ } #if KMP_USE_ADAPTIVE_LOCKS #define fill_table(table, expand) \ { \ fill_jumps(table, expand, _); \ table[locktag_adaptive] = expand(queuing); \ fill_jumps(table, expand, _nested_); \ } #else #define fill_table(table, expand) \ { \ fill_jumps(table, expand, _); \ fill_jumps(table, expand, _nested_); \ } #endif // KMP_USE_ADAPTIVE_LOCKS #define expand(l) \ (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location fill_table(__kmp_indirect_set_location, expand); #undef expand #define expand(l) \ (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags fill_table(__kmp_indirect_set_flags, expand); #undef expand #define expand(l) \ (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location fill_table(__kmp_indirect_get_location, expand); #undef expand #define expand(l) \ (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags fill_table(__kmp_indirect_get_flags, expand); #undef expand __kmp_init_user_locks = TRUE; } // Clean up the lock table. void __kmp_cleanup_indirect_user_locks() { kmp_lock_index_t i; int k; // Clean up locks in the pools first (they were already destroyed before going // into the pools). for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; while (l != NULL) { kmp_indirect_lock_t *ll = l; l = (kmp_indirect_lock_t *)l->lock->pool.next; KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll)); __kmp_free(ll->lock); ll->lock = NULL; } __kmp_indirect_lock_pool[k] = NULL; } // Clean up the remaining undestroyed locks. for (i = 0; i < __kmp_i_lock_table.next; i++) { kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); if (l->lock != NULL) { // Locks not destroyed explicitly need to be destroyed here. KMP_I_LOCK_FUNC(l, destroy)(l->lock); KA_TRACE( 20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l)); __kmp_free(l->lock); } } // Free the table for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) __kmp_free(__kmp_i_lock_table.table[i]); __kmp_free(__kmp_i_lock_table.table); __kmp_init_user_locks = FALSE; } enum kmp_lock_kind __kmp_user_lock_kind = lk_default; int __kmp_num_locks_in_block = 1; // FIXME - tune this value #else // KMP_USE_DYNAMIC_LOCK static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { __kmp_init_tas_lock(lck); } static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { __kmp_init_nested_tas_lock(lck); } #if KMP_USE_FUTEX static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { __kmp_init_futex_lock(lck); } static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { __kmp_init_nested_futex_lock(lck); } #endif static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { return lck == lck->lk.self; } static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { __kmp_init_ticket_lock(lck); } static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { __kmp_init_nested_ticket_lock(lck); } static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { return lck == lck->lk.initialized; } static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { __kmp_init_queuing_lock(lck); } static void __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { __kmp_init_nested_queuing_lock(lck); } #if KMP_USE_ADAPTIVE_LOCKS static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { __kmp_init_adaptive_lock(lck); } #endif static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { return lck == lck->lk.initialized; } static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { __kmp_init_drdpa_lock(lck); } static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { __kmp_init_nested_drdpa_lock(lck); } /* user locks * They are implemented as a table of function pointers which are set to the * lock functions of the appropriate kind, once that has been determined. */ enum kmp_lock_kind __kmp_user_lock_kind = lk_default; size_t __kmp_base_user_lock_size = 0; size_t __kmp_user_lock_size = 0; kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid) = NULL; int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid) = NULL; int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid) = NULL; void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid) = NULL; int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid) = NULL; int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid) = NULL; void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, const ident_t *loc) = NULL; kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, kmp_lock_flags_t flags) = NULL; void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { switch (user_lock_kind) { case lk_default: default: KMP_ASSERT(0); case lk_tas: { __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); __kmp_user_lock_size = sizeof(kmp_tas_lock_t); __kmp_get_user_lock_owner_ = (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); if (__kmp_env_consistency_check) { KMP_BIND_USER_LOCK_WITH_CHECKS(tas); KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); } else { KMP_BIND_USER_LOCK(tas); KMP_BIND_NESTED_USER_LOCK(tas); } __kmp_destroy_user_lock_ = (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; __kmp_set_user_lock_location_ = (void (*)(kmp_user_lock_p, const ident_t *))NULL; __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; } break; #if KMP_USE_FUTEX case lk_futex: { __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); __kmp_user_lock_size = sizeof(kmp_futex_lock_t); __kmp_get_user_lock_owner_ = (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); if (__kmp_env_consistency_check) { KMP_BIND_USER_LOCK_WITH_CHECKS(futex); KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); } else { KMP_BIND_USER_LOCK(futex); KMP_BIND_NESTED_USER_LOCK(futex); } __kmp_destroy_user_lock_ = (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; __kmp_set_user_lock_location_ = (void (*)(kmp_user_lock_p, const ident_t *))NULL; __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; } break; #endif // KMP_USE_FUTEX case lk_ticket: { __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); __kmp_get_user_lock_owner_ = (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); if (__kmp_env_consistency_check) { KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); } else { KMP_BIND_USER_LOCK(ticket); KMP_BIND_NESTED_USER_LOCK(ticket); } __kmp_destroy_user_lock_ = (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); __kmp_set_user_lock_location_ = (void (*)( kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( &__kmp_set_ticket_lock_flags); } break; case lk_queuing: { __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); __kmp_get_user_lock_owner_ = (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); if (__kmp_env_consistency_check) { KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); } else { KMP_BIND_USER_LOCK(queuing); KMP_BIND_NESTED_USER_LOCK(queuing); } __kmp_destroy_user_lock_ = (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); __kmp_set_user_lock_location_ = (void (*)( kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( &__kmp_set_queuing_lock_flags); } break; #if KMP_USE_ADAPTIVE_LOCKS case lk_adaptive: { __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); __kmp_get_user_lock_owner_ = (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); if (__kmp_env_consistency_check) { KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); } else { KMP_BIND_USER_LOCK(adaptive); } __kmp_destroy_user_lock_ = (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); __kmp_set_user_lock_location_ = (void (*)( kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( &__kmp_set_queuing_lock_flags); } break; #endif // KMP_USE_ADAPTIVE_LOCKS case lk_drdpa: { __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); __kmp_get_user_lock_owner_ = (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); if (__kmp_env_consistency_check) { KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); } else { KMP_BIND_USER_LOCK(drdpa); KMP_BIND_NESTED_USER_LOCK(drdpa); } __kmp_destroy_user_lock_ = (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); __kmp_set_user_lock_location_ = (void (*)( kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( &__kmp_set_drdpa_lock_flags); } break; } } // ---------------------------------------------------------------------------- // User lock table & lock allocation kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; kmp_user_lock_p __kmp_lock_pool = NULL; // Lock block-allocation support. kmp_block_of_locks *__kmp_lock_blocks = NULL; int __kmp_num_locks_in_block = 1; // FIXME - tune this value static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { // Assume that kmp_global_lock is held upon entry/exit. kmp_lock_index_t index; if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { kmp_lock_index_t size; kmp_user_lock_p *table; // Reallocate lock table. if (__kmp_user_lock_table.allocated == 0) { size = 1024; } else { size = __kmp_user_lock_table.allocated * 2; } table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; // We cannot free the previous table now, since it may be in use by other // threads. So save the pointer to the previous table in in the first // element of the new table. All the tables will be organized into a list, // and could be freed when library shutting down. __kmp_user_lock_table.table = table; __kmp_user_lock_table.allocated = size; } KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < __kmp_user_lock_table.allocated); index = __kmp_user_lock_table.used; __kmp_user_lock_table.table[index] = lck; ++__kmp_user_lock_table.used; return index; } static kmp_user_lock_p __kmp_lock_block_allocate() { // Assume that kmp_global_lock is held upon entry/exit. static int last_index = 0; if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { // Restart the index. last_index = 0; // Need to allocate a new block. KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; char *buffer = (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); // Set up the new block. kmp_block_of_locks *new_block = (kmp_block_of_locks *)(&buffer[space_for_locks]); new_block->next_block = __kmp_lock_blocks; new_block->locks = (void *)buffer; // Publish the new block. KMP_MB(); __kmp_lock_blocks = new_block; } kmp_user_lock_p ret = (kmp_user_lock_p)(&( ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); last_index++; return ret; } // Get memory for a lock. It may be freshly allocated memory or reused memory // from lock pool. kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags) { kmp_user_lock_p lck; kmp_lock_index_t index; KMP_DEBUG_ASSERT(user_lock); __kmp_acquire_lock(&__kmp_global_lock, gtid); if (__kmp_lock_pool == NULL) { // Lock pool is empty. Allocate new memory. // ANNOTATION: Found no good way to express the syncronisation // between allocation and usage, so ignore the allocation ANNOTATE_IGNORE_WRITES_BEGIN(); if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); } else { lck = __kmp_lock_block_allocate(); } ANNOTATE_IGNORE_WRITES_END(); // Insert lock in the table so that it can be freed in __kmp_cleanup, // and debugger has info on all allocated locks. index = __kmp_lock_table_insert(lck); } else { // Pick up lock from pool. lck = __kmp_lock_pool; index = __kmp_lock_pool->pool.index; __kmp_lock_pool = __kmp_lock_pool->pool.next; } // We could potentially differentiate between nested and regular locks // here, and do the lock table lookup for regular locks only. if (OMP_LOCK_T_SIZE < sizeof(void *)) { *((kmp_lock_index_t *)user_lock) = index; } else { *((kmp_user_lock_p *)user_lock) = lck; } // mark the lock if it is critical section lock. __kmp_set_user_lock_flags(lck, flags); __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper return lck; } // Put lock's memory to pool for reusing. void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck) { KMP_DEBUG_ASSERT(user_lock != NULL); KMP_DEBUG_ASSERT(lck != NULL); __kmp_acquire_lock(&__kmp_global_lock, gtid); lck->pool.next = __kmp_lock_pool; __kmp_lock_pool = lck; if (OMP_LOCK_T_SIZE < sizeof(void *)) { kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); lck->pool.index = index; } __kmp_release_lock(&__kmp_global_lock, gtid); } kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { kmp_user_lock_p lck = NULL; if (__kmp_env_consistency_check) { if (user_lock == NULL) { KMP_FATAL(LockIsUninitialized, func); } } if (OMP_LOCK_T_SIZE < sizeof(void *)) { kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); if (__kmp_env_consistency_check) { if (!(0 < index && index < __kmp_user_lock_table.used)) { KMP_FATAL(LockIsUninitialized, func); } } KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); lck = __kmp_user_lock_table.table[index]; } else { lck = *((kmp_user_lock_p *)user_lock); } if (__kmp_env_consistency_check) { if (lck == NULL) { KMP_FATAL(LockIsUninitialized, func); } } return lck; } void __kmp_cleanup_user_locks(void) { // Reset lock pool. Don't worry about lock in the pool--we will free them when // iterating through lock table (it includes all the locks, dead or alive). __kmp_lock_pool = NULL; #define IS_CRITICAL(lck) \ ((__kmp_get_user_lock_flags_ != NULL) && \ ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) // Loop through lock table, free all locks. // Do not free item [0], it is reserved for lock tables list. // // FIXME - we are iterating through a list of (pointers to) objects of type // union kmp_user_lock, but we have no way of knowing whether the base type is // currently "pool" or whatever the global user lock type is. // // We are relying on the fact that for all of the user lock types // (except "tas"), the first field in the lock struct is the "initialized" // field, which is set to the address of the lock object itself when // the lock is initialized. When the union is of type "pool", the // first field is a pointer to the next object in the free list, which // will not be the same address as the object itself. // // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail // for "pool" objects on the free list. This must happen as the "location" // field of real user locks overlaps the "index" field of "pool" objects. // // It would be better to run through the free list, and remove all "pool" // objects from the lock table before executing this loop. However, // "pool" objects do not always have their index field set (only on // lin_32e), and I don't want to search the lock table for the address // of every "pool" object on the free list. while (__kmp_user_lock_table.used > 1) { const ident *loc; // reduce __kmp_user_lock_table.used before freeing the lock, // so that state of locks is consistent kmp_user_lock_p lck = __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; if ((__kmp_is_user_lock_initialized_ != NULL) && (*__kmp_is_user_lock_initialized_)(lck)) { // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND // it is NOT a critical section (user is not responsible for destroying // criticals) AND we know source location to report. if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && ((loc = __kmp_get_user_lock_location(lck)) != NULL) && (loc->psource != NULL)) { kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); __kmp_str_loc_free(&str_loc); } #ifdef KMP_DEBUG if (IS_CRITICAL(lck)) { KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void **)lck)); } else { KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void **)lck)); } #endif // KMP_DEBUG // Cleanup internal lock dynamic resources (for drdpa locks particularly). __kmp_destroy_user_lock(lck); } // Free the lock if block allocation of locks is not used. if (__kmp_lock_blocks == NULL) { __kmp_free(lck); } } #undef IS_CRITICAL // delete lock table(s). kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; __kmp_user_lock_table.table = NULL; __kmp_user_lock_table.allocated = 0; while (table_ptr != NULL) { // In the first element we saved the pointer to the previous // (smaller) lock table. kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); __kmp_free(table_ptr); table_ptr = next; } // Free buffers allocated for blocks of locks. kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; __kmp_lock_blocks = NULL; while (block_ptr != NULL) { kmp_block_of_locks_t *next = block_ptr->next_block; __kmp_free(block_ptr->locks); // *block_ptr itself was allocated at the end of the locks vector. block_ptr = next; } TCW_4(__kmp_init_user_locks, FALSE); } #endif // KMP_USE_DYNAMIC_LOCK diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc index 948bc9471181..7e28d96de683 100644 --- a/lib/clang/include/VCSVersion.inc +++ b/lib/clang/include/VCSVersion.inc @@ -1,14 +1,14 @@ // $FreeBSD$ -#define LLVM_REVISION "llvmorg-11.0.0-rc1-25-g903c872b169" +#define LLVM_REVISION "llvmorg-11.0.0-rc1-47-gff47911ddfc" #define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git" -#define CLANG_REVISION "llvmorg-11.0.0-rc1-25-g903c872b169" +#define CLANG_REVISION "llvmorg-11.0.0-rc1-47-gff47911ddfc" #define CLANG_REPOSITORY "git@github.com:llvm/llvm-project.git" // - -#define LLD_REVISION "llvmorg-11.0.0-rc1-25-g903c872b169-1200012" +#define LLD_REVISION "llvmorg-11.0.0-rc1-47-gff47911ddfc-1200012" #define LLD_REPOSITORY "FreeBSD" -#define LLDB_REVISION "llvmorg-11.0.0-rc1-25-g903c872b169" +#define LLDB_REVISION "llvmorg-11.0.0-rc1-47-gff47911ddfc" #define LLDB_REPOSITORY "git@github.com:llvm/llvm-project.git" diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h index 2dcb3becba66..28cef1ec5e77 100644 --- a/lib/clang/include/llvm/Support/VCSRevision.h +++ b/lib/clang/include/llvm/Support/VCSRevision.h @@ -1,3 +1,3 @@ /* $FreeBSD$ */ -#define LLVM_REVISION "llvmorg-11.0.0-rc1-25-g903c872b169" +#define LLVM_REVISION "llvmorg-11.0.0-rc1-47-gff47911ddfc" #define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"