Page MenuHomeFreeBSD

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
Index: head/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S
===================================================================
--- head/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S (revision 322854)
+++ head/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S (revision 322855)
@@ -1,43 +1,52 @@
//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "../assembly.h"
// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) {
// int result = __{eq,lt,le,ge,gt}df2(a, b);
// if (result {==,<,<=,>=,>} 0) {
// return 1;
// } else {
// return 0;
// }
// }
+#if defined(COMPILER_RT_ARMHF_TARGET)
+# define CONVERT_DCMP_ARGS_TO_DF2_ARGS \
+ vmov d0, r0, r1 SEPARATOR \
+ vmov d1, r2, r3
+#else
+# define CONVERT_DCMP_ARGS_TO_DF2_ARGS
+#endif
+
#define DEFINE_AEABI_DCMP(cond) \
.syntax unified SEPARATOR \
.p2align 2 SEPARATOR \
DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \
push { r4, lr } SEPARATOR \
+ CONVERT_DCMP_ARGS_TO_DF2_ARGS SEPARATOR \
bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
cmp r0, #0 SEPARATOR \
b ## cond 1f SEPARATOR \
movs r0, #0 SEPARATOR \
pop { r4, pc } SEPARATOR \
1: SEPARATOR \
movs r0, #1 SEPARATOR \
pop { r4, pc } SEPARATOR \
END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
DEFINE_AEABI_DCMP(eq)
DEFINE_AEABI_DCMP(lt)
DEFINE_AEABI_DCMP(le)
DEFINE_AEABI_DCMP(ge)
DEFINE_AEABI_DCMP(gt)
NO_EXEC_STACK_DIRECTIVE
Index: head/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S
===================================================================
--- head/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S (revision 322854)
+++ head/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S (revision 322855)
@@ -1,43 +1,52 @@
//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "../assembly.h"
// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) {
// int result = __{eq,lt,le,ge,gt}sf2(a, b);
// if (result {==,<,<=,>=,>} 0) {
// return 1;
// } else {
// return 0;
// }
// }
+#if defined(COMPILER_RT_ARMHF_TARGET)
+# define CONVERT_FCMP_ARGS_TO_SF2_ARGS \
+ vmov s0, r0 SEPARATOR \
+ vmov s1, r1
+#else
+# define CONVERT_FCMP_ARGS_TO_SF2_ARGS
+#endif
+
#define DEFINE_AEABI_FCMP(cond) \
.syntax unified SEPARATOR \
.p2align 2 SEPARATOR \
DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \
push { r4, lr } SEPARATOR \
+ CONVERT_FCMP_ARGS_TO_SF2_ARGS SEPARATOR \
bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
cmp r0, #0 SEPARATOR \
b ## cond 1f SEPARATOR \
movs r0, #0 SEPARATOR \
pop { r4, pc } SEPARATOR \
1: SEPARATOR \
movs r0, #1 SEPARATOR \
pop { r4, pc } SEPARATOR \
END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
DEFINE_AEABI_FCMP(eq)
DEFINE_AEABI_FCMP(lt)
DEFINE_AEABI_FCMP(le)
DEFINE_AEABI_FCMP(ge)
DEFINE_AEABI_FCMP(gt)
NO_EXEC_STACK_DIRECTIVE
Index: head/contrib/compiler-rt/lib/esan/esan_sideline_linux.cpp
===================================================================
--- head/contrib/compiler-rt/lib/esan/esan_sideline_linux.cpp (revision 322854)
+++ head/contrib/compiler-rt/lib/esan/esan_sideline_linux.cpp (revision 322855)
@@ -1,177 +1,177 @@
//===-- esan_sideline_linux.cpp ---------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of EfficiencySanitizer, a family of performance tuners.
//
// Support for a separate or "sideline" tool thread on Linux.
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_platform.h"
#if SANITIZER_LINUX
#include "esan_sideline.h"
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_linux.h"
#include <errno.h>
#include <sched.h>
#include <sys/prctl.h>
#include <sys/signal.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
namespace __esan {
static const int SigAltStackSize = 4*1024;
static const int SidelineStackSize = 4*1024;
static const uptr SidelineIdUninitialized = 1;
// FIXME: we'll need some kind of TLS (can we trust that a pthread key will
// work in our non-POSIX thread?) to access our data in our signal handler
// with multiple sideline threads. For now we assume there is only one
// sideline thread and we use a dirty solution of a global var.
static SidelineThread *TheThread;
// We aren't passing SA_NODEFER so the same signal is blocked while here.
void SidelineThread::handleSidelineSignal(int SigNum, void *SigInfo,
void *Ctx) {
VPrintf(3, "Sideline signal %d\n", SigNum);
CHECK_EQ(SigNum, SIGALRM);
// See above about needing TLS to avoid this global var.
SidelineThread *Thread = TheThread;
if (atomic_load(&Thread->SidelineExit, memory_order_relaxed) != 0)
return;
Thread->sampleFunc(Thread->FuncArg);
}
void SidelineThread::registerSignal(int SigNum) {
__sanitizer_sigaction SigAct;
internal_memset(&SigAct, 0, sizeof(SigAct));
SigAct.sigaction = handleSidelineSignal;
// We do not pass SA_NODEFER as we want to block the same signal.
SigAct.sa_flags = SA_ONSTACK | SA_SIGINFO;
int Res = internal_sigaction(SigNum, &SigAct, nullptr);
CHECK_EQ(Res, 0);
}
int SidelineThread::runSideline(void *Arg) {
VPrintf(1, "Sideline thread starting\n");
SidelineThread *Thread = static_cast<SidelineThread*>(Arg);
// If the parent dies, we want to exit also.
internal_prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
// Set up a signal handler on an alternate stack for safety.
InternalScopedBuffer<char> StackMap(SigAltStackSize);
- struct sigaltstack SigAltStack;
+ stack_t SigAltStack;
SigAltStack.ss_sp = StackMap.data();
SigAltStack.ss_size = SigAltStackSize;
SigAltStack.ss_flags = 0;
internal_sigaltstack(&SigAltStack, nullptr);
// We inherit the signal mask from the app thread. In case
// we weren't created at init time, we ensure the mask is empty.
__sanitizer_sigset_t SigSet;
internal_sigfillset(&SigSet);
int Res = internal_sigprocmask(SIG_UNBLOCK, &SigSet, nullptr);
CHECK_EQ(Res, 0);
registerSignal(SIGALRM);
bool TimerSuccess = Thread->adjustTimer(Thread->Freq);
CHECK(TimerSuccess);
// We loop, doing nothing but handling itimer signals.
while (atomic_load(&TheThread->SidelineExit, memory_order_relaxed) == 0)
sched_yield();
if (!Thread->adjustTimer(0))
VPrintf(1, "Failed to disable timer\n");
VPrintf(1, "Sideline thread exiting\n");
return 0;
}
bool SidelineThread::launchThread(SidelineFunc takeSample, void *Arg,
u32 FreqMilliSec) {
// This can only be called once. However, we can't clear a field in
// the constructor and check for that here as the constructor for
// a static instance is called *after* our module_ctor and thus after
// this routine! Thus we rely on the TheThread check below.
CHECK(TheThread == nullptr); // Only one sideline thread is supported.
TheThread = this;
sampleFunc = takeSample;
FuncArg = Arg;
Freq = FreqMilliSec;
atomic_store(&SidelineExit, 0, memory_order_relaxed);
// We do without a guard page.
Stack = static_cast<char*>(MmapOrDie(SidelineStackSize, "SidelineStack"));
// We need to handle the return value from internal_clone() not having been
// assigned yet (for our CHECK in adjustTimer()) so we ensure this has a
// sentinel value.
SidelineId = SidelineIdUninitialized;
// By omitting CLONE_THREAD, the child is in its own thread group and will not
// receive any of the application's signals.
SidelineId = internal_clone(
runSideline, Stack + SidelineStackSize,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_UNTRACED,
this, nullptr /* parent_tidptr */,
nullptr /* newtls */, nullptr /* child_tidptr */);
int ErrCode;
if (internal_iserror(SidelineId, &ErrCode)) {
Printf("FATAL: EfficiencySanitizer failed to spawn a thread (code %d).\n",
ErrCode);
Die();
return false; // Not reached.
}
return true;
}
bool SidelineThread::joinThread() {
VPrintf(1, "Joining sideline thread\n");
bool Res = true;
atomic_store(&SidelineExit, 1, memory_order_relaxed);
while (true) {
uptr Status = internal_waitpid(SidelineId, nullptr, __WALL);
int ErrCode;
if (!internal_iserror(Status, &ErrCode))
break;
if (ErrCode == EINTR)
continue;
VPrintf(1, "Failed to join sideline thread (errno %d)\n", ErrCode);
Res = false;
break;
}
UnmapOrDie(Stack, SidelineStackSize);
return Res;
}
// Must be called from the sideline thread itself.
bool SidelineThread::adjustTimer(u32 FreqMilliSec) {
// The return value of internal_clone() may not have been assigned yet:
CHECK(internal_getpid() == SidelineId ||
SidelineId == SidelineIdUninitialized);
Freq = FreqMilliSec;
struct itimerval TimerVal;
TimerVal.it_interval.tv_sec = (time_t) Freq / 1000;
TimerVal.it_interval.tv_usec = (time_t) (Freq % 1000) * 1000;
TimerVal.it_value.tv_sec = (time_t) Freq / 1000;
TimerVal.it_value.tv_usec = (time_t) (Freq % 1000) * 1000;
// As we're in a different thread group, we cannot use either
// ITIMER_PROF or ITIMER_VIRTUAL without taking up scheduled
// time ourselves: thus we must use real time.
int Res = setitimer(ITIMER_REAL, &TimerVal, nullptr);
return (Res == 0);
}
} // namespace __esan
#endif // SANITIZER_LINUX
Index: head/contrib/compiler-rt/lib/profile/InstrProfilingNameVar.c
===================================================================
--- head/contrib/compiler-rt/lib/profile/InstrProfilingNameVar.c (revision 322854)
+++ head/contrib/compiler-rt/lib/profile/InstrProfilingNameVar.c (revision 322855)
@@ -1,18 +1,18 @@
-//===- InstrProfilingNameVar.c - profile name variable setup --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
+/*===- InstrProfilingNameVar.c - profile name variable setup -------------===*\
+|*
+|* The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+\*===----------------------------------------------------------------------===*/
#include "InstrProfiling.h"
/* char __llvm_profile_filename[1]
*
* The runtime should only provide its own definition of this symbol when the
* user has not specified one. Set this up by moving the runtime's copy of this
* symbol to an object file within the archive.
*/
COMPILER_RT_WEAK char INSTR_PROF_PROFILE_NAME_VAR[1] = {0};
Index: head/contrib/compiler-rt
===================================================================
--- head/contrib/compiler-rt (revision 322854)
+++ head/contrib/compiler-rt (revision 322855)
Property changes on: head/contrib/compiler-rt
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /vendor/compiler-rt/dist:r322737-322850
Index: head/contrib/libc++
===================================================================
--- head/contrib/libc++ (revision 322854)
+++ head/contrib/libc++ (revision 322855)
Property changes on: head/contrib/libc++
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /vendor/libc++/dist:r322737-322850
Index: head/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
===================================================================
--- head/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h (revision 322854)
+++ head/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h (revision 322855)
@@ -1,2329 +1,2332 @@
//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the SDNode class and derived classes, which are used to
// represent the nodes and operations present in a SelectionDAG. These nodes
// and operations are machine code level operations, with some similarities to
// the GCC RTL representation.
//
// Clients should include the SelectionDAG.h file instead of this file directly.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
#define LLVM_CODEGEN_SELECTIONDAGNODES_H
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
#include <climits>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <string>
#include <tuple>
namespace llvm {
class APInt;
class Constant;
template <typename T> struct DenseMapInfo;
class GlobalValue;
class MachineBasicBlock;
class MachineConstantPoolValue;
class MCSymbol;
class raw_ostream;
class SDNode;
class SelectionDAG;
class Type;
class Value;
void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
bool force = false);
/// This represents a list of ValueType's that has been intern'd by
/// a SelectionDAG. Instances of this simple value class are returned by
/// SelectionDAG::getVTList(...).
///
struct SDVTList {
const EVT *VTs;
unsigned int NumVTs;
};
namespace ISD {
/// Node predicates
/// If N is a BUILD_VECTOR node whose elements are all the same constant or
/// undefined, return true and return the constant value in \p SplatValue.
- bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
+ /// This sets \p SplatValue to the smallest possible splat unless AllowShrink
+ /// is set to false.
+ bool isConstantSplatVector(const SDNode *N, APInt &SplatValue,
+ bool AllowShrink = true);
/// Return true if the specified node is a BUILD_VECTOR where all of the
/// elements are ~0 or undef.
bool isBuildVectorAllOnes(const SDNode *N);
/// Return true if the specified node is a BUILD_VECTOR where all of the
/// elements are 0 or undef.
bool isBuildVectorAllZeros(const SDNode *N);
/// Return true if the specified node is a BUILD_VECTOR node of all
/// ConstantSDNode or undef.
bool isBuildVectorOfConstantSDNodes(const SDNode *N);
/// Return true if the specified node is a BUILD_VECTOR node of all
/// ConstantFPSDNode or undef.
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
/// Return true if the node has at least one operand and all operands of the
/// specified node are ISD::UNDEF.
bool allOperandsUndef(const SDNode *N);
} // end namespace ISD
//===----------------------------------------------------------------------===//
/// Unlike LLVM values, Selection DAG nodes may return multiple
/// values as the result of a computation. Many nodes return multiple values,
/// from loads (which define a token and a return value) to ADDC (which returns
/// a result and a carry value), to calls (which may return an arbitrary number
/// of values).
///
/// As such, each use of a SelectionDAG computation must indicate the node that
/// computes it as well as which return value to use from that node. This pair
/// of information is represented with the SDValue value type.
///
class SDValue {
friend struct DenseMapInfo<SDValue>;
SDNode *Node = nullptr; // The node defining the value we are using.
unsigned ResNo = 0; // Which return value of the node we are using.
public:
SDValue() = default;
SDValue(SDNode *node, unsigned resno);
/// get the index which selects a specific result in the SDNode
unsigned getResNo() const { return ResNo; }
/// get the SDNode which holds the desired result
SDNode *getNode() const { return Node; }
/// set the SDNode
void setNode(SDNode *N) { Node = N; }
inline SDNode *operator->() const { return Node; }
bool operator==(const SDValue &O) const {
return Node == O.Node && ResNo == O.ResNo;
}
bool operator!=(const SDValue &O) const {
return !operator==(O);
}
bool operator<(const SDValue &O) const {
return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
}
explicit operator bool() const {
return Node != nullptr;
}
SDValue getValue(unsigned R) const {
return SDValue(Node, R);
}
/// Return true if this node is an operand of N.
bool isOperandOf(const SDNode *N) const;
/// Return the ValueType of the referenced return value.
inline EVT getValueType() const;
/// Return the simple ValueType of the referenced return value.
MVT getSimpleValueType() const {
return getValueType().getSimpleVT();
}
/// Returns the size of the value in bits.
unsigned getValueSizeInBits() const {
return getValueType().getSizeInBits();
}
unsigned getScalarValueSizeInBits() const {
return getValueType().getScalarType().getSizeInBits();
}
// Forwarding methods - These forward to the corresponding methods in SDNode.
inline unsigned getOpcode() const;
inline unsigned getNumOperands() const;
inline const SDValue &getOperand(unsigned i) const;
inline uint64_t getConstantOperandVal(unsigned i) const;
inline bool isTargetMemoryOpcode() const;
inline bool isTargetOpcode() const;
inline bool isMachineOpcode() const;
inline bool isUndef() const;
inline unsigned getMachineOpcode() const;
inline const DebugLoc &getDebugLoc() const;
inline void dump() const;
inline void dumpr() const;
/// Return true if this operand (which must be a chain) reaches the
/// specified operand without crossing any side-effecting instructions.
/// In practice, this looks through token factors and non-volatile loads.
/// In order to remain efficient, this only
/// looks a couple of nodes in, it does not do an exhaustive search.
bool reachesChainWithoutSideEffects(SDValue Dest,
unsigned Depth = 2) const;
/// Return true if there are no nodes using value ResNo of Node.
inline bool use_empty() const;
/// Return true if there is exactly one node using value ResNo of Node.
inline bool hasOneUse() const;
};
template<> struct DenseMapInfo<SDValue> {
static inline SDValue getEmptyKey() {
SDValue V;
V.ResNo = -1U;
return V;
}
static inline SDValue getTombstoneKey() {
SDValue V;
V.ResNo = -2U;
return V;
}
static unsigned getHashValue(const SDValue &Val) {
return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
(unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
}
static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
return LHS == RHS;
}
};
template <> struct isPodLike<SDValue> { static const bool value = true; };
/// Allow casting operators to work directly on
/// SDValues as if they were SDNode*'s.
template<> struct simplify_type<SDValue> {
using SimpleType = SDNode *;
static SimpleType getSimplifiedValue(SDValue &Val) {
return Val.getNode();
}
};
template<> struct simplify_type<const SDValue> {
using SimpleType = /*const*/ SDNode *;
static SimpleType getSimplifiedValue(const SDValue &Val) {
return Val.getNode();
}
};
/// Represents a use of a SDNode. This class holds an SDValue,
/// which records the SDNode being used and the result number, a
/// pointer to the SDNode using the value, and Next and Prev pointers,
/// which link together all the uses of an SDNode.
///
class SDUse {
/// Val - The value being used.
SDValue Val;
/// User - The user of this value.
SDNode *User = nullptr;
/// Prev, Next - Pointers to the uses list of the SDNode referred by
/// this operand.
SDUse **Prev = nullptr;
SDUse *Next = nullptr;
public:
SDUse() = default;
SDUse(const SDUse &U) = delete;
SDUse &operator=(const SDUse &) = delete;
/// Normally SDUse will just implicitly convert to an SDValue that it holds.
operator const SDValue&() const { return Val; }
/// If implicit conversion to SDValue doesn't work, the get() method returns
/// the SDValue.
const SDValue &get() const { return Val; }
/// This returns the SDNode that contains this Use.
SDNode *getUser() { return User; }
/// Get the next SDUse in the use list.
SDUse *getNext() const { return Next; }
/// Convenience function for get().getNode().
SDNode *getNode() const { return Val.getNode(); }
/// Convenience function for get().getResNo().
unsigned getResNo() const { return Val.getResNo(); }
/// Convenience function for get().getValueType().
EVT getValueType() const { return Val.getValueType(); }
/// Convenience function for get().operator==
bool operator==(const SDValue &V) const {
return Val == V;
}
/// Convenience function for get().operator!=
bool operator!=(const SDValue &V) const {
return Val != V;
}
/// Convenience function for get().operator<
bool operator<(const SDValue &V) const {
return Val < V;
}
private:
friend class SelectionDAG;
friend class SDNode;
// TODO: unfriend HandleSDNode once we fix its operand handling.
friend class HandleSDNode;
void setUser(SDNode *p) { User = p; }
/// Remove this use from its existing use list, assign it the
/// given value, and add it to the new value's node's use list.
inline void set(const SDValue &V);
/// Like set, but only supports initializing a newly-allocated
/// SDUse with a non-null value.
inline void setInitial(const SDValue &V);
/// Like set, but only sets the Node portion of the value,
/// leaving the ResNo portion unmodified.
inline void setNode(SDNode *N);
void addToList(SDUse **List) {
Next = *List;
if (Next) Next->Prev = &Next;
Prev = List;
*List = this;
}
void removeFromList() {
*Prev = Next;
if (Next) Next->Prev = Prev;
}
};
/// simplify_type specializations - Allow casting operators to work directly on
/// SDValues as if they were SDNode*'s.
template<> struct simplify_type<SDUse> {
using SimpleType = SDNode *;
static SimpleType getSimplifiedValue(SDUse &Val) {
return Val.getNode();
}
};
/// These are IR-level optimization flags that may be propagated to SDNodes.
/// TODO: This data structure should be shared by the IR optimizer and the
/// the backend.
struct SDNodeFlags {
private:
// This bit is used to determine if the flags are in a defined state.
// Flag bits can only be masked out during intersection if the masking flags
// are defined.
bool AnyDefined : 1;
bool NoUnsignedWrap : 1;
bool NoSignedWrap : 1;
bool Exact : 1;
bool UnsafeAlgebra : 1;
bool NoNaNs : 1;
bool NoInfs : 1;
bool NoSignedZeros : 1;
bool AllowReciprocal : 1;
bool VectorReduction : 1;
bool AllowContract : 1;
public:
/// Default constructor turns off all optimization flags.
SDNodeFlags()
: AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
Exact(false), UnsafeAlgebra(false), NoNaNs(false), NoInfs(false),
NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
AllowContract(false) {}
/// Sets the state of the flags to the defined state.
void setDefined() { AnyDefined = true; }
/// Returns true if the flags are in a defined state.
bool isDefined() const { return AnyDefined; }
// These are mutators for each flag.
void setNoUnsignedWrap(bool b) {
setDefined();
NoUnsignedWrap = b;
}
void setNoSignedWrap(bool b) {
setDefined();
NoSignedWrap = b;
}
void setExact(bool b) {
setDefined();
Exact = b;
}
void setUnsafeAlgebra(bool b) {
setDefined();
UnsafeAlgebra = b;
}
void setNoNaNs(bool b) {
setDefined();
NoNaNs = b;
}
void setNoInfs(bool b) {
setDefined();
NoInfs = b;
}
void setNoSignedZeros(bool b) {
setDefined();
NoSignedZeros = b;
}
void setAllowReciprocal(bool b) {
setDefined();
AllowReciprocal = b;
}
void setVectorReduction(bool b) {
setDefined();
VectorReduction = b;
}
void setAllowContract(bool b) {
setDefined();
AllowContract = b;
}
// These are accessors for each flag.
bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
bool hasNoSignedWrap() const { return NoSignedWrap; }
bool hasExact() const { return Exact; }
bool hasUnsafeAlgebra() const { return UnsafeAlgebra; }
bool hasNoNaNs() const { return NoNaNs; }
bool hasNoInfs() const { return NoInfs; }
bool hasNoSignedZeros() const { return NoSignedZeros; }
bool hasAllowReciprocal() const { return AllowReciprocal; }
bool hasVectorReduction() const { return VectorReduction; }
bool hasAllowContract() const { return AllowContract; }
/// Clear any flags in this flag set that aren't also set in Flags.
/// If the given Flags are undefined then don't do anything.
void intersectWith(const SDNodeFlags Flags) {
if (!Flags.isDefined())
return;
NoUnsignedWrap &= Flags.NoUnsignedWrap;
NoSignedWrap &= Flags.NoSignedWrap;
Exact &= Flags.Exact;
UnsafeAlgebra &= Flags.UnsafeAlgebra;
NoNaNs &= Flags.NoNaNs;
NoInfs &= Flags.NoInfs;
NoSignedZeros &= Flags.NoSignedZeros;
AllowReciprocal &= Flags.AllowReciprocal;
VectorReduction &= Flags.VectorReduction;
AllowContract &= Flags.AllowContract;
}
};
/// Represents one node in the SelectionDAG.
///
class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
private:
/// The operation that this node performs.
int16_t NodeType;
protected:
// We define a set of mini-helper classes to help us interpret the bits in our
// SubclassData. These are designed to fit within a uint16_t so they pack
// with NodeType.
class SDNodeBitfields {
friend class SDNode;
friend class MemIntrinsicSDNode;
friend class MemSDNode;
uint16_t HasDebugValue : 1;
uint16_t IsMemIntrinsic : 1;
};
enum { NumSDNodeBits = 2 };
class ConstantSDNodeBitfields {
friend class ConstantSDNode;
uint16_t : NumSDNodeBits;
uint16_t IsOpaque : 1;
};
class MemSDNodeBitfields {
friend class MemSDNode;
friend class MemIntrinsicSDNode;
friend class AtomicSDNode;
uint16_t : NumSDNodeBits;
uint16_t IsVolatile : 1;
uint16_t IsNonTemporal : 1;
uint16_t IsDereferenceable : 1;
uint16_t IsInvariant : 1;
};
enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
class LSBaseSDNodeBitfields {
friend class LSBaseSDNode;
uint16_t : NumMemSDNodeBits;
uint16_t AddressingMode : 3; // enum ISD::MemIndexedMode
};
enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
class LoadSDNodeBitfields {
friend class LoadSDNode;
friend class MaskedLoadSDNode;
uint16_t : NumLSBaseSDNodeBits;
uint16_t ExtTy : 2; // enum ISD::LoadExtType
uint16_t IsExpanding : 1;
};
class StoreSDNodeBitfields {
friend class StoreSDNode;
friend class MaskedStoreSDNode;
uint16_t : NumLSBaseSDNodeBits;
uint16_t IsTruncating : 1;
uint16_t IsCompressing : 1;
};
union {
char RawSDNodeBits[sizeof(uint16_t)];
SDNodeBitfields SDNodeBits;
ConstantSDNodeBitfields ConstantSDNodeBits;
MemSDNodeBitfields MemSDNodeBits;
LSBaseSDNodeBitfields LSBaseSDNodeBits;
LoadSDNodeBitfields LoadSDNodeBits;
StoreSDNodeBitfields StoreSDNodeBits;
};
// RawSDNodeBits must cover the entirety of the union. This means that all of
// the union's members must have size <= RawSDNodeBits. We write the RHS as
// "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
static_assert(sizeof(LoadSDNodeBitfields) <= 4, "field too wide");
static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
private:
friend class SelectionDAG;
// TODO: unfriend HandleSDNode once we fix its operand handling.
friend class HandleSDNode;
/// Unique id per SDNode in the DAG.
int NodeId = -1;
/// The values that are used by this operation.
SDUse *OperandList = nullptr;
/// The types of the values this node defines. SDNode's may
/// define multiple values simultaneously.
const EVT *ValueList;
/// List of uses for this SDNode.
SDUse *UseList = nullptr;
/// The number of entries in the Operand/Value list.
unsigned short NumOperands = 0;
unsigned short NumValues;
// The ordering of the SDNodes. It roughly corresponds to the ordering of the
// original LLVM instructions.
// This is used for turning off scheduling, because we'll forgo
// the normal scheduling algorithms and output the instructions according to
// this ordering.
unsigned IROrder;
/// Source line information.
DebugLoc debugLoc;
/// Return a pointer to the specified value type.
static const EVT *getValueTypeList(EVT VT);
SDNodeFlags Flags;
public:
/// Unique and persistent id per SDNode in the DAG.
/// Used for debug printing.
uint16_t PersistentId;
//===--------------------------------------------------------------------===//
// Accessors
//
/// Return the SelectionDAG opcode value for this node. For
/// pre-isel nodes (those for which isMachineOpcode returns false), these
/// are the opcode values in the ISD and <target>ISD namespaces. For
/// post-isel opcodes, see getMachineOpcode.
unsigned getOpcode() const { return (unsigned short)NodeType; }
/// Test if this node has a target-specific opcode (in the
/// \<target\>ISD namespace).
bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
/// Test if this node has a target-specific
/// memory-referencing opcode (in the \<target\>ISD namespace and
/// greater than FIRST_TARGET_MEMORY_OPCODE).
bool isTargetMemoryOpcode() const {
return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
}
/// Return true if the type of the node type undefined.
bool isUndef() const { return NodeType == ISD::UNDEF; }
/// Test if this node is a memory intrinsic (with valid pointer information).
/// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
/// non-memory intrinsics (with chains) that are not really instances of
/// MemSDNode. For such nodes, we need some extra state to determine the
/// proper classof relationship.
bool isMemIntrinsic() const {
return (NodeType == ISD::INTRINSIC_W_CHAIN ||
NodeType == ISD::INTRINSIC_VOID) &&
SDNodeBits.IsMemIntrinsic;
}
/// Test if this node is a strict floating point pseudo-op.
bool isStrictFPOpcode() {
switch (NodeType) {
default:
return false;
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
case ISD::STRICT_FREM:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FPOW:
case ISD::STRICT_FPOWI:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
case ISD::STRICT_FEXP:
case ISD::STRICT_FEXP2:
case ISD::STRICT_FLOG:
case ISD::STRICT_FLOG10:
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
return true;
}
}
/// Test if this node has a post-isel opcode, directly
/// corresponding to a MachineInstr opcode.
bool isMachineOpcode() const { return NodeType < 0; }
/// This may only be called if isMachineOpcode returns
/// true. It returns the MachineInstr opcode value that the node's opcode
/// corresponds to.
unsigned getMachineOpcode() const {
assert(isMachineOpcode() && "Not a MachineInstr opcode!");
return ~NodeType;
}
bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
/// Return true if there are no uses of this node.
bool use_empty() const { return UseList == nullptr; }
/// Return true if there is exactly one use of this node.
bool hasOneUse() const {
return !use_empty() && std::next(use_begin()) == use_end();
}
/// Return the number of uses of this node. This method takes
/// time proportional to the number of uses.
size_t use_size() const { return std::distance(use_begin(), use_end()); }
/// Return the unique node id.
int getNodeId() const { return NodeId; }
/// Set unique node id.
void setNodeId(int Id) { NodeId = Id; }
/// Return the node ordering.
unsigned getIROrder() const { return IROrder; }
/// Set the node ordering.
void setIROrder(unsigned Order) { IROrder = Order; }
/// Return the source location info.
const DebugLoc &getDebugLoc() const { return debugLoc; }
/// Set source location info. Try to avoid this, putting
/// it in the constructor is preferable.
void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
/// This class provides iterator support for SDUse
/// operands that use a specific SDNode.
class use_iterator
: public std::iterator<std::forward_iterator_tag, SDUse, ptrdiff_t> {
friend class SDNode;
SDUse *Op = nullptr;
explicit use_iterator(SDUse *op) : Op(op) {}
public:
using reference = std::iterator<std::forward_iterator_tag,
SDUse, ptrdiff_t>::reference;
using pointer = std::iterator<std::forward_iterator_tag,
SDUse, ptrdiff_t>::pointer;
use_iterator() = default;
use_iterator(const use_iterator &I) : Op(I.Op) {}
bool operator==(const use_iterator &x) const {
return Op == x.Op;
}
bool operator!=(const use_iterator &x) const {
return !operator==(x);
}
/// Return true if this iterator is at the end of uses list.
bool atEnd() const { return Op == nullptr; }
// Iterator traversal: forward iteration only.
use_iterator &operator++() { // Preincrement
assert(Op && "Cannot increment end iterator!");
Op = Op->getNext();
return *this;
}
use_iterator operator++(int) { // Postincrement
use_iterator tmp = *this; ++*this; return tmp;
}
/// Retrieve a pointer to the current user node.
SDNode *operator*() const {
assert(Op && "Cannot dereference end iterator!");
return Op->getUser();
}
SDNode *operator->() const { return operator*(); }
SDUse &getUse() const { return *Op; }
/// Retrieve the operand # of this use in its user.
unsigned getOperandNo() const {
assert(Op && "Cannot dereference end iterator!");
return (unsigned)(Op - Op->getUser()->OperandList);
}
};
/// Provide iteration support to walk over all uses of an SDNode.
use_iterator use_begin() const {
return use_iterator(UseList);
}
static use_iterator use_end() { return use_iterator(nullptr); }
inline iterator_range<use_iterator> uses() {
return make_range(use_begin(), use_end());
}
inline iterator_range<use_iterator> uses() const {
return make_range(use_begin(), use_end());
}
/// Return true if there are exactly NUSES uses of the indicated value.
/// This method ignores uses of other values defined by this operation.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
/// Return true if there are any use of the indicated value.
/// This method ignores uses of other values defined by this operation.
bool hasAnyUseOfValue(unsigned Value) const;
/// Return true if this node is the only use of N.
bool isOnlyUserOf(const SDNode *N) const;
/// Return true if this node is an operand of N.
bool isOperandOf(const SDNode *N) const;
/// Return true if this node is a predecessor of N.
/// NOTE: Implemented on top of hasPredecessor and every bit as
/// expensive. Use carefully.
bool isPredecessorOf(const SDNode *N) const {
return N->hasPredecessor(this);
}
/// Return true if N is a predecessor of this node.
/// N is either an operand of this node, or can be reached by recursively
/// traversing up the operands.
/// NOTE: This is an expensive method. Use it carefully.
bool hasPredecessor(const SDNode *N) const;
/// Returns true if N is a predecessor of any node in Worklist. This
/// helper keeps Visited and Worklist sets externally to allow unions
/// searches to be performed in parallel, caching of results across
/// queries and incremental addition to Worklist. Stops early if N is
/// found but will resume. Remember to clear Visited and Worklists
/// if DAG changes.
static bool hasPredecessorHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode *> &Visited,
SmallVectorImpl<const SDNode *> &Worklist) {
if (Visited.count(N))
return true;
while (!Worklist.empty()) {
const SDNode *M = Worklist.pop_back_val();
bool Found = false;
for (const SDValue &OpV : M->op_values()) {
SDNode *Op = OpV.getNode();
if (Visited.insert(Op).second)
Worklist.push_back(Op);
if (Op == N)
Found = true;
}
if (Found)
return true;
}
return false;
}
/// Return true if all the users of N are contained in Nodes.
/// NOTE: Requires at least one match, but doesn't require them all.
static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
/// Return the number of values used by this operation.
unsigned getNumOperands() const { return NumOperands; }
/// Helper method returns the integer value of a ConstantSDNode operand.
inline uint64_t getConstantOperandVal(unsigned Num) const;
const SDValue &getOperand(unsigned Num) const {
assert(Num < NumOperands && "Invalid child # of SDNode!");
return OperandList[Num];
}
using op_iterator = SDUse *;
op_iterator op_begin() const { return OperandList; }
op_iterator op_end() const { return OperandList+NumOperands; }
ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
/// Iterator for directly iterating over the operand SDValue's.
struct value_op_iterator
: iterator_adaptor_base<value_op_iterator, op_iterator,
std::random_access_iterator_tag, SDValue,
ptrdiff_t, value_op_iterator *,
value_op_iterator *> {
explicit value_op_iterator(SDUse *U = nullptr)
: iterator_adaptor_base(U) {}
const SDValue &operator*() const { return I->get(); }
};
iterator_range<value_op_iterator> op_values() const {
return make_range(value_op_iterator(op_begin()),
value_op_iterator(op_end()));
}
SDVTList getVTList() const {
SDVTList X = { ValueList, NumValues };
return X;
}
/// If this node has a glue operand, return the node
/// to which the glue operand points. Otherwise return NULL.
SDNode *getGluedNode() const {
if (getNumOperands() != 0 &&
getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
return getOperand(getNumOperands()-1).getNode();
return nullptr;
}
/// If this node has a glue value with a user, return
/// the user (there is at most one). Otherwise return NULL.
SDNode *getGluedUser() const {
for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
if (UI.getUse().get().getValueType() == MVT::Glue)
return *UI;
return nullptr;
}
const SDNodeFlags getFlags() const { return Flags; }
void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
/// Clear any flags in this node that aren't also set in Flags.
/// If Flags is not in a defined state then this has no effect.
void intersectFlagsWith(const SDNodeFlags Flags);
/// Return the number of values defined/returned by this operator.
unsigned getNumValues() const { return NumValues; }
/// Return the type of a specified result.
EVT getValueType(unsigned ResNo) const {
assert(ResNo < NumValues && "Illegal result number!");
return ValueList[ResNo];
}
/// Return the type of a specified result as a simple type.
MVT getSimpleValueType(unsigned ResNo) const {
return getValueType(ResNo).getSimpleVT();
}
/// Returns MVT::getSizeInBits(getValueType(ResNo)).
unsigned getValueSizeInBits(unsigned ResNo) const {
return getValueType(ResNo).getSizeInBits();
}
using value_iterator = const EVT *;
value_iterator value_begin() const { return ValueList; }
value_iterator value_end() const { return ValueList+NumValues; }
/// Return the opcode of this operation for printing.
std::string getOperationName(const SelectionDAG *G = nullptr) const;
static const char* getIndexedModeName(ISD::MemIndexedMode AM);
void print_types(raw_ostream &OS, const SelectionDAG *G) const;
void print_details(raw_ostream &OS, const SelectionDAG *G) const;
void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
/// Print a SelectionDAG node and all children down to
/// the leaves. The given SelectionDAG allows target-specific nodes
/// to be printed in human-readable form. Unlike printr, this will
/// print the whole DAG, including children that appear multiple
/// times.
///
void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
/// Print a SelectionDAG node and children up to
/// depth "depth." The given SelectionDAG allows target-specific
/// nodes to be printed in human-readable form. Unlike printr, this
/// will print children that appear multiple times wherever they are
/// used.
///
void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
unsigned depth = 100) const;
/// Dump this node, for debugging.
void dump() const;
/// Dump (recursively) this node and its use-def subgraph.
void dumpr() const;
/// Dump this node, for debugging.
/// The given SelectionDAG allows target-specific nodes to be printed
/// in human-readable form.
void dump(const SelectionDAG *G) const;
/// Dump (recursively) this node and its use-def subgraph.
/// The given SelectionDAG allows target-specific nodes to be printed
/// in human-readable form.
void dumpr(const SelectionDAG *G) const;
/// printrFull to dbgs(). The given SelectionDAG allows
/// target-specific nodes to be printed in human-readable form.
/// Unlike dumpr, this will print the whole DAG, including children
/// that appear multiple times.
void dumprFull(const SelectionDAG *G = nullptr) const;
/// printrWithDepth to dbgs(). The given
/// SelectionDAG allows target-specific nodes to be printed in
/// human-readable form. Unlike dumpr, this will print children
/// that appear multiple times wherever they are used.
///
void dumprWithDepth(const SelectionDAG *G = nullptr,
unsigned depth = 100) const;
/// Gather unique data for the node.
void Profile(FoldingSetNodeID &ID) const;
/// This method should only be used by the SDUse class.
void addUse(SDUse &U) { U.addToList(&UseList); }
protected:
static SDVTList getSDVTList(EVT VT) {
SDVTList Ret = { getValueTypeList(VT), 1 };
return Ret;
}
/// Create an SDNode.
///
/// SDNodes are created without any operands, and never own the operand
/// storage. To add operands, see SelectionDAG::createOperands.
SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
: NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
IROrder(Order), debugLoc(std::move(dl)) {
memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
assert(NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!");
}
/// Release the operands and set this node to have zero operands.
void DropOperands();
};
/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
/// into SDNode creation functions.
/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
/// from the original Instruction, and IROrder is the ordinal position of
/// the instruction.
/// When an SDNode is created after the DAG is being built, both DebugLoc and
/// the IROrder are propagated from the original SDNode.
/// So SDLoc class provides two constructors besides the default one, one to
/// be used by the DAGBuilder, the other to be used by others.
class SDLoc {
private:
DebugLoc DL;
int IROrder = 0;
public:
SDLoc() = default;
SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
SDLoc(const Instruction *I, int Order) : IROrder(Order) {
assert(Order >= 0 && "bad IROrder");
if (I)
DL = I->getDebugLoc();
}
unsigned getIROrder() const { return IROrder; }
const DebugLoc &getDebugLoc() const { return DL; }
};
// Define inline functions from the SDValue class.
inline SDValue::SDValue(SDNode *node, unsigned resno)
: Node(node), ResNo(resno) {
// Explicitly check for !ResNo to avoid use-after-free, because there are
// callers that use SDValue(N, 0) with a deleted N to indicate successful
// combines.
assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&
"Invalid result number for the given node!");
assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.");
}
inline unsigned SDValue::getOpcode() const {
return Node->getOpcode();
}
inline EVT SDValue::getValueType() const {
return Node->getValueType(ResNo);
}
inline unsigned SDValue::getNumOperands() const {
return Node->getNumOperands();
}
inline const SDValue &SDValue::getOperand(unsigned i) const {
return Node->getOperand(i);
}
inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
return Node->getConstantOperandVal(i);
}
inline bool SDValue::isTargetOpcode() const {
return Node->isTargetOpcode();
}
inline bool SDValue::isTargetMemoryOpcode() const {
return Node->isTargetMemoryOpcode();
}
inline bool SDValue::isMachineOpcode() const {
return Node->isMachineOpcode();
}
inline unsigned SDValue::getMachineOpcode() const {
return Node->getMachineOpcode();
}
inline bool SDValue::isUndef() const {
return Node->isUndef();
}
inline bool SDValue::use_empty() const {
return !Node->hasAnyUseOfValue(ResNo);
}
inline bool SDValue::hasOneUse() const {
return Node->hasNUsesOfValue(1, ResNo);
}
inline const DebugLoc &SDValue::getDebugLoc() const {
return Node->getDebugLoc();
}
inline void SDValue::dump() const {
return Node->dump();
}
inline void SDValue::dumpr() const {
return Node->dumpr();
}
// Define inline functions from the SDUse class.
inline void SDUse::set(const SDValue &V) {
if (Val.getNode()) removeFromList();
Val = V;
if (V.getNode()) V.getNode()->addUse(*this);
}
inline void SDUse::setInitial(const SDValue &V) {
Val = V;
V.getNode()->addUse(*this);
}
inline void SDUse::setNode(SDNode *N) {
if (Val.getNode()) removeFromList();
Val.setNode(N);
if (N) N->addUse(*this);
}
/// This class is used to form a handle around another node that
/// is persistent and is updated across invocations of replaceAllUsesWith on its
/// operand. This node should be directly created by end-users and not added to
/// the AllNodes list.
class HandleSDNode : public SDNode {
SDUse Op;
public:
explicit HandleSDNode(SDValue X)
: SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
// HandleSDNodes are never inserted into the DAG, so they won't be
// auto-numbered. Use ID 65535 as a sentinel.
PersistentId = 0xffff;
// Manually set up the operand list. This node type is special in that it's
// always stack allocated and SelectionDAG does not manage its operands.
// TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
// be so special.
Op.setUser(this);
Op.setInitial(X);
NumOperands = 1;
OperandList = &Op;
}
~HandleSDNode();
const SDValue &getValue() const { return Op; }
};
class AddrSpaceCastSDNode : public SDNode {
private:
unsigned SrcAddrSpace;
unsigned DestAddrSpace;
public:
AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
unsigned SrcAS, unsigned DestAS);
unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
unsigned getDestAddressSpace() const { return DestAddrSpace; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::ADDRSPACECAST;
}
};
/// This is an abstract virtual class for memory operations.
class MemSDNode : public SDNode {
private:
// VT of in-memory value.
EVT MemoryVT;
protected:
/// Memory reference information.
MachineMemOperand *MMO;
public:
MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
EVT MemoryVT, MachineMemOperand *MMO);
bool readMem() const { return MMO->isLoad(); }
bool writeMem() const { return MMO->isStore(); }
/// Returns alignment and volatility of the memory access
unsigned getOriginalAlignment() const {
return MMO->getBaseAlignment();
}
unsigned getAlignment() const {
return MMO->getAlignment();
}
/// Return the SubclassData value, without HasDebugValue. This contains an
/// encoding of the volatile flag, as well as bits used by subclasses. This
/// function should only be used to compute a FoldingSetNodeID value.
/// The HasDebugValue bit is masked out because CSE map needs to match
/// nodes with debug info with nodes without debug info.
unsigned getRawSubclassData() const {
uint16_t Data;
union {
char RawSDNodeBits[sizeof(uint16_t)];
SDNodeBitfields SDNodeBits;
};
memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
SDNodeBits.HasDebugValue = 0;
memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
return Data;
}
bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
// Returns the offset from the location of the access.
int64_t getSrcValueOffset() const { return MMO->getOffset(); }
/// Returns the AA info that describes the dereference.
AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
/// Returns the Ranges that describes the dereference.
const MDNode *getRanges() const { return MMO->getRanges(); }
/// Returns the synchronization scope ID for this memory operation.
SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
/// Return the atomic ordering requirements for this memory operation. For
/// cmpxchg atomic operations, return the atomic ordering requirements when
/// store occurs.
AtomicOrdering getOrdering() const { return MMO->getOrdering(); }
/// Return the type of the in-memory value.
EVT getMemoryVT() const { return MemoryVT; }
/// Return a MachineMemOperand object describing the memory
/// reference performed by operation.
MachineMemOperand *getMemOperand() const { return MMO; }
const MachinePointerInfo &getPointerInfo() const {
return MMO->getPointerInfo();
}
/// Return the address space for the associated pointer
unsigned getAddressSpace() const {
return getPointerInfo().getAddrSpace();
}
/// Update this MemSDNode's MachineMemOperand information
/// to reflect the alignment of NewMMO, if it has a greater alignment.
/// This must only be used when the new alignment applies to all users of
/// this MachineMemOperand.
void refineAlignment(const MachineMemOperand *NewMMO) {
MMO->refineAlignment(NewMMO);
}
const SDValue &getChain() const { return getOperand(0); }
const SDValue &getBasePtr() const {
return getOperand(getOpcode() == ISD::STORE ? 2 : 1);
}
// Methods to support isa and dyn_cast
static bool classof(const SDNode *N) {
// For some targets, we lower some target intrinsics to a MemIntrinsicNode
// with either an intrinsic or a target opcode.
return N->getOpcode() == ISD::LOAD ||
N->getOpcode() == ISD::STORE ||
N->getOpcode() == ISD::PREFETCH ||
N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
N->getOpcode() == ISD::ATOMIC_SWAP ||
N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
N->getOpcode() == ISD::ATOMIC_LOAD ||
N->getOpcode() == ISD::ATOMIC_STORE ||
N->getOpcode() == ISD::MLOAD ||
N->getOpcode() == ISD::MSTORE ||
N->getOpcode() == ISD::MGATHER ||
N->getOpcode() == ISD::MSCATTER ||
N->isMemIntrinsic() ||
N->isTargetMemoryOpcode();
}
};
/// This is an SDNode representing atomic operations.
class AtomicSDNode : public MemSDNode {
public:
AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
EVT MemVT, MachineMemOperand *MMO)
: MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {}
const SDValue &getBasePtr() const { return getOperand(1); }
const SDValue &getVal() const { return getOperand(2); }
/// Returns true if this SDNode represents cmpxchg atomic operation, false
/// otherwise.
bool isCompareAndSwap() const {
unsigned Op = getOpcode();
return Op == ISD::ATOMIC_CMP_SWAP ||
Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
}
/// For cmpxchg atomic operations, return the atomic ordering requirements
/// when store does not occur.
AtomicOrdering getFailureOrdering() const {
assert(isCompareAndSwap() && "Must be cmpxchg operation");
return MMO->getFailureOrdering();
}
// Methods to support isa and dyn_cast
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
N->getOpcode() == ISD::ATOMIC_SWAP ||
N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
N->getOpcode() == ISD::ATOMIC_LOAD ||
N->getOpcode() == ISD::ATOMIC_STORE;
}
};
/// This SDNode is used for target intrinsics that touch
/// memory and need an associated MachineMemOperand. Its opcode may be
/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
class MemIntrinsicSDNode : public MemSDNode {
public:
MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
: MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
SDNodeBits.IsMemIntrinsic = true;
}
// Methods to support isa and dyn_cast
static bool classof(const SDNode *N) {
// We lower some target intrinsics to their target opcode
// early a node with a target opcode can be of this class
return N->isMemIntrinsic() ||
N->getOpcode() == ISD::PREFETCH ||
N->isTargetMemoryOpcode();
}
};
/// This SDNode is used to implement the code generator
/// support for the llvm IR shufflevector instruction. It combines elements
/// from two input vectors into a new input vector, with the selection and
/// ordering of elements determined by an array of integers, referred to as
/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
/// An index of -1 is treated as undef, such that the code generator may put
/// any value in the corresponding element of the result.
class ShuffleVectorSDNode : public SDNode {
// The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
// is freed when the SelectionDAG object is destroyed.
const int *Mask;
protected:
friend class SelectionDAG;
ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
: SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
public:
ArrayRef<int> getMask() const {
EVT VT = getValueType(0);
return makeArrayRef(Mask, VT.getVectorNumElements());
}
int getMaskElt(unsigned Idx) const {
assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!");
return Mask[Idx];
}
bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
int getSplatIndex() const {
assert(isSplat() && "Cannot get splat index for non-splat!");
EVT VT = getValueType(0);
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
if (Mask[i] >= 0)
return Mask[i];
}
llvm_unreachable("Splat with all undef indices?");
}
static bool isSplatMask(const int *Mask, EVT VT);
/// Change values in a shuffle permute mask assuming
/// the two vector operands have swapped position.
static void commuteMask(MutableArrayRef<int> Mask) {
unsigned NumElems = Mask.size();
for (unsigned i = 0; i != NumElems; ++i) {
int idx = Mask[i];
if (idx < 0)
continue;
else if (idx < (int)NumElems)
Mask[i] = idx + NumElems;
else
Mask[i] = idx - NumElems;
}
}
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::VECTOR_SHUFFLE;
}
};
class ConstantSDNode : public SDNode {
friend class SelectionDAG;
const ConstantInt *Value;
ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val,
const DebugLoc &DL, EVT VT)
: SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DL,
getSDVTList(VT)),
Value(val) {
ConstantSDNodeBits.IsOpaque = isOpaque;
}
public:
const ConstantInt *getConstantIntValue() const { return Value; }
const APInt &getAPIntValue() const { return Value->getValue(); }
uint64_t getZExtValue() const { return Value->getZExtValue(); }
int64_t getSExtValue() const { return Value->getSExtValue(); }
bool isOne() const { return Value->isOne(); }
bool isNullValue() const { return Value->isZero(); }
bool isAllOnesValue() const { return Value->isMinusOne(); }
bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::Constant ||
N->getOpcode() == ISD::TargetConstant;
}
};
uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
}
class ConstantFPSDNode : public SDNode {
friend class SelectionDAG;
const ConstantFP *Value;
ConstantFPSDNode(bool isTarget, const ConstantFP *val, const DebugLoc &DL,
EVT VT)
: SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, DL,
getSDVTList(VT)),
Value(val) {}
public:
const APFloat& getValueAPF() const { return Value->getValueAPF(); }
const ConstantFP *getConstantFPValue() const { return Value; }
/// Return true if the value is positive or negative zero.
bool isZero() const { return Value->isZero(); }
/// Return true if the value is a NaN.
bool isNaN() const { return Value->isNaN(); }
/// Return true if the value is an infinity
bool isInfinity() const { return Value->isInfinity(); }
/// Return true if the value is negative.
bool isNegative() const { return Value->isNegative(); }
/// We don't rely on operator== working on double values, as
/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
/// As such, this method can be used to do an exact bit-for-bit comparison of
/// two floating point values.
/// We leave the version with the double argument here because it's just so
/// convenient to write "2.0" and the like. Without this function we'd
/// have to duplicate its logic everywhere it's called.
bool isExactlyValue(double V) const {
bool ignored;
APFloat Tmp(V);
Tmp.convert(Value->getValueAPF().getSemantics(),
APFloat::rmNearestTiesToEven, &ignored);
return isExactlyValue(Tmp);
}
bool isExactlyValue(const APFloat& V) const;
static bool isValueValidForType(EVT VT, const APFloat& Val);
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::ConstantFP ||
N->getOpcode() == ISD::TargetConstantFP;
}
};
/// Returns true if \p V is a constant integer zero.
bool isNullConstant(SDValue V);
/// Returns true if \p V is an FP constant with a value of positive zero.
bool isNullFPConstant(SDValue V);
/// Returns true if \p V is an integer constant with all bits set.
bool isAllOnesConstant(SDValue V);
/// Returns true if \p V is a constant integer one.
bool isOneConstant(SDValue V);
/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
/// constant is canonicalized to be operand 1.
bool isBitwiseNot(SDValue V);
/// Returns the SDNode if it is a constant splat BuildVector or constant int.
ConstantSDNode *isConstOrConstSplat(SDValue V);
/// Returns the SDNode if it is a constant splat BuildVector or constant float.
ConstantFPSDNode *isConstOrConstSplatFP(SDValue V);
class GlobalAddressSDNode : public SDNode {
friend class SelectionDAG;
const GlobalValue *TheGlobal;
int64_t Offset;
unsigned char TargetFlags;
GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
const GlobalValue *GA, EVT VT, int64_t o,
unsigned char TargetFlags);
public:
const GlobalValue *getGlobal() const { return TheGlobal; }
int64_t getOffset() const { return Offset; }
unsigned char getTargetFlags() const { return TargetFlags; }
// Return the address space this GlobalAddress belongs to.
unsigned getAddressSpace() const;
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::GlobalAddress ||
N->getOpcode() == ISD::TargetGlobalAddress ||
N->getOpcode() == ISD::GlobalTLSAddress ||
N->getOpcode() == ISD::TargetGlobalTLSAddress;
}
};
class FrameIndexSDNode : public SDNode {
friend class SelectionDAG;
int FI;
FrameIndexSDNode(int fi, EVT VT, bool isTarg)
: SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
0, DebugLoc(), getSDVTList(VT)), FI(fi) {
}
public:
int getIndex() const { return FI; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::FrameIndex ||
N->getOpcode() == ISD::TargetFrameIndex;
}
};
class JumpTableSDNode : public SDNode {
friend class SelectionDAG;
int JTI;
unsigned char TargetFlags;
JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF)
: SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
}
public:
int getIndex() const { return JTI; }
unsigned char getTargetFlags() const { return TargetFlags; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::JumpTable ||
N->getOpcode() == ISD::TargetJumpTable;
}
};
class ConstantPoolSDNode : public SDNode {
friend class SelectionDAG;
union {
const Constant *ConstVal;
MachineConstantPoolValue *MachineCPVal;
} Val;
int Offset; // It's a MachineConstantPoolValue if top bit is set.
unsigned Alignment; // Minimum alignment requirement of CP (not log2 value).
unsigned char TargetFlags;
ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
unsigned Align, unsigned char TF)
: SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
TargetFlags(TF) {
assert(Offset >= 0 && "Offset is too large");
Val.ConstVal = c;
}
ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
EVT VT, int o, unsigned Align, unsigned char TF)
: SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
TargetFlags(TF) {
assert(Offset >= 0 && "Offset is too large");
Val.MachineCPVal = v;
Offset |= 1 << (sizeof(unsigned)*CHAR_BIT-1);
}
public:
bool isMachineConstantPoolEntry() const {
return Offset < 0;
}
const Constant *getConstVal() const {
assert(!isMachineConstantPoolEntry() && "Wrong constantpool type");
return Val.ConstVal;
}
MachineConstantPoolValue *getMachineCPVal() const {
assert(isMachineConstantPoolEntry() && "Wrong constantpool type");
return Val.MachineCPVal;
}
int getOffset() const {
return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT-1));
}
// Return the alignment of this constant pool object, which is either 0 (for
// default alignment) or the desired value.
unsigned getAlignment() const { return Alignment; }
unsigned char getTargetFlags() const { return TargetFlags; }
Type *getType() const;
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::ConstantPool ||
N->getOpcode() == ISD::TargetConstantPool;
}
};
/// Completely target-dependent object reference.
class TargetIndexSDNode : public SDNode {
friend class SelectionDAG;
unsigned char TargetFlags;
int Index;
int64_t Offset;
public:
TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned char TF)
: SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
TargetFlags(TF), Index(Idx), Offset(Ofs) {}
unsigned char getTargetFlags() const { return TargetFlags; }
int getIndex() const { return Index; }
int64_t getOffset() const { return Offset; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::TargetIndex;
}
};
class BasicBlockSDNode : public SDNode {
friend class SelectionDAG;
MachineBasicBlock *MBB;
/// Debug info is meaningful and potentially useful here, but we create
/// blocks out of order when they're jumped to, which makes it a bit
/// harder. Let's see if we need it first.
explicit BasicBlockSDNode(MachineBasicBlock *mbb)
: SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
{}
public:
MachineBasicBlock *getBasicBlock() const { return MBB; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::BasicBlock;
}
};
/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
class BuildVectorSDNode : public SDNode {
public:
// These are constructed as SDNodes and then cast to BuildVectorSDNodes.
explicit BuildVectorSDNode() = delete;
/// Check if this is a constant splat, and if so, find the
/// smallest element size that splats the vector. If MinSplatBits is
/// nonzero, the element size must be at least that large. Note that the
/// splat element may be the entire vector (i.e., a one element vector).
/// Returns the splat element value in SplatValue. Any undefined bits in
/// that value are zero, and the corresponding bits in the SplatUndef mask
/// are set. The SplatBitSize value is set to the splat element size in
/// bits. HasAnyUndefs is set to true if any bits in the vector are
/// undefined. isBigEndian describes the endianness of the target.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
unsigned &SplatBitSize, bool &HasAnyUndefs,
unsigned MinSplatBits = 0,
bool isBigEndian = false) const;
/// \brief Returns the splatted value or a null value if this is not a splat.
///
/// If passed a non-null UndefElements bitvector, it will resize it to match
/// the vector width and set the bits where elements are undef.
SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
/// \brief Returns the splatted constant or null if this is not a constant
/// splat.
///
/// If passed a non-null UndefElements bitvector, it will resize it to match
/// the vector width and set the bits where elements are undef.
ConstantSDNode *
getConstantSplatNode(BitVector *UndefElements = nullptr) const;
/// \brief Returns the splatted constant FP or null if this is not a constant
/// FP splat.
///
/// If passed a non-null UndefElements bitvector, it will resize it to match
/// the vector width and set the bits where elements are undef.
ConstantFPSDNode *
getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
/// \brief If this is a constant FP splat and the splatted constant FP is an
/// exact power or 2, return the log base 2 integer value. Otherwise,
/// return -1.
///
/// The BitWidth specifies the necessary bit precision.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
uint32_t BitWidth) const;
bool isConstant() const;
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::BUILD_VECTOR;
}
};
/// An SDNode that holds an arbitrary LLVM IR Value. This is
/// used when the SelectionDAG needs to make a simple reference to something
/// in the LLVM IR representation.
///
class SrcValueSDNode : public SDNode {
friend class SelectionDAG;
const Value *V;
/// Create a SrcValue for a general value.
explicit SrcValueSDNode(const Value *v)
: SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
public:
/// Return the contained Value.
const Value *getValue() const { return V; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::SRCVALUE;
}
};
class MDNodeSDNode : public SDNode {
friend class SelectionDAG;
const MDNode *MD;
explicit MDNodeSDNode(const MDNode *md)
: SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
{}
public:
const MDNode *getMD() const { return MD; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MDNODE_SDNODE;
}
};
class RegisterSDNode : public SDNode {
friend class SelectionDAG;
unsigned Reg;
RegisterSDNode(unsigned reg, EVT VT)
: SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
public:
unsigned getReg() const { return Reg; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::Register;
}
};
class RegisterMaskSDNode : public SDNode {
friend class SelectionDAG;
// The memory for RegMask is not owned by the node.
const uint32_t *RegMask;
RegisterMaskSDNode(const uint32_t *mask)
: SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
RegMask(mask) {}
public:
const uint32_t *getRegMask() const { return RegMask; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::RegisterMask;
}
};
class BlockAddressSDNode : public SDNode {
friend class SelectionDAG;
const BlockAddress *BA;
int64_t Offset;
unsigned char TargetFlags;
BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
int64_t o, unsigned char Flags)
: SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
BA(ba), Offset(o), TargetFlags(Flags) {}
public:
const BlockAddress *getBlockAddress() const { return BA; }
int64_t getOffset() const { return Offset; }
unsigned char getTargetFlags() const { return TargetFlags; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::BlockAddress ||
N->getOpcode() == ISD::TargetBlockAddress;
}
};
class EHLabelSDNode : public SDNode {
friend class SelectionDAG;
MCSymbol *Label;
EHLabelSDNode(unsigned Order, const DebugLoc &dl, MCSymbol *L)
: SDNode(ISD::EH_LABEL, Order, dl, getSDVTList(MVT::Other)), Label(L) {}
public:
MCSymbol *getLabel() const { return Label; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::EH_LABEL;
}
};
class ExternalSymbolSDNode : public SDNode {
friend class SelectionDAG;
const char *Symbol;
unsigned char TargetFlags;
ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT)
: SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol,
0, DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {}
public:
const char *getSymbol() const { return Symbol; }
unsigned char getTargetFlags() const { return TargetFlags; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::ExternalSymbol ||
N->getOpcode() == ISD::TargetExternalSymbol;
}
};
class MCSymbolSDNode : public SDNode {
friend class SelectionDAG;
MCSymbol *Symbol;
MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
: SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
public:
MCSymbol *getMCSymbol() const { return Symbol; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MCSymbol;
}
};
class CondCodeSDNode : public SDNode {
friend class SelectionDAG;
ISD::CondCode Condition;
explicit CondCodeSDNode(ISD::CondCode Cond)
: SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
Condition(Cond) {}
public:
ISD::CondCode get() const { return Condition; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::CONDCODE;
}
};
/// This class is used to represent EVT's, which are used
/// to parameterize some operations.
class VTSDNode : public SDNode {
friend class SelectionDAG;
EVT ValueType;
explicit VTSDNode(EVT VT)
: SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
ValueType(VT) {}
public:
EVT getVT() const { return ValueType; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::VALUETYPE;
}
};
/// Base class for LoadSDNode and StoreSDNode
class LSBaseSDNode : public MemSDNode {
public:
LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
MachineMemOperand *MMO)
: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
LSBaseSDNodeBits.AddressingMode = AM;
assert(getAddressingMode() == AM && "Value truncated");
}
const SDValue &getOffset() const {
return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
}
/// Return the addressing mode for this load or store:
/// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
ISD::MemIndexedMode getAddressingMode() const {
return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
}
/// Return true if this is a pre/post inc/dec load/store.
bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
/// Return true if this is NOT a pre/post inc/dec load/store.
bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::LOAD ||
N->getOpcode() == ISD::STORE;
}
};
/// This class is used to represent ISD::LOAD nodes.
class LoadSDNode : public LSBaseSDNode {
friend class SelectionDAG;
LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
MachineMemOperand *MMO)
: LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
LoadSDNodeBits.ExtTy = ETy;
assert(readMem() && "Load MachineMemOperand is not a load!");
assert(!writeMem() && "Load MachineMemOperand is a store!");
}
public:
/// Return whether this is a plain node,
/// or one of the varieties of value-extending loads.
ISD::LoadExtType getExtensionType() const {
return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
}
const SDValue &getBasePtr() const { return getOperand(1); }
const SDValue &getOffset() const { return getOperand(2); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::LOAD;
}
};
/// This class is used to represent ISD::STORE nodes.
class StoreSDNode : public LSBaseSDNode {
friend class SelectionDAG;
StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
MachineMemOperand *MMO)
: LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
StoreSDNodeBits.IsTruncating = isTrunc;
assert(!readMem() && "Store MachineMemOperand is a load!");
assert(writeMem() && "Store MachineMemOperand is not a store!");
}
public:
/// Return true if the op does a truncation before store.
/// For integers this is the same as doing a TRUNCATE and storing the result.
/// For floats, it is the same as doing an FP_ROUND and storing the result.
bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
const SDValue &getValue() const { return getOperand(1); }
const SDValue &getBasePtr() const { return getOperand(2); }
const SDValue &getOffset() const { return getOperand(3); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::STORE;
}
};
/// This base class is used to represent MLOAD and MSTORE nodes
class MaskedLoadStoreSDNode : public MemSDNode {
public:
friend class SelectionDAG;
MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
const DebugLoc &dl, SDVTList VTs, EVT MemVT,
MachineMemOperand *MMO)
: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
// In the both nodes address is Op1, mask is Op2:
// MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value
// MaskedStoreSDNode (Chain, ptr, mask, data)
// Mask is a vector of i1 elements
const SDValue &getBasePtr() const { return getOperand(1); }
const SDValue &getMask() const { return getOperand(2); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD ||
N->getOpcode() == ISD::MSTORE;
}
};
/// This class is used to represent an MLOAD node
class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
public:
friend class SelectionDAG;
MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
ISD::LoadExtType ETy, bool IsExpanding, EVT MemVT,
MachineMemOperand *MMO)
: MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, MemVT, MMO) {
LoadSDNodeBits.ExtTy = ETy;
LoadSDNodeBits.IsExpanding = IsExpanding;
}
ISD::LoadExtType getExtensionType() const {
return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
}
const SDValue &getSrc0() const { return getOperand(3); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD;
}
bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
};
/// This class is used to represent an MSTORE node
class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
public:
friend class SelectionDAG;
MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
bool isTrunc, bool isCompressing, EVT MemVT,
MachineMemOperand *MMO)
: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) {
StoreSDNodeBits.IsTruncating = isTrunc;
StoreSDNodeBits.IsCompressing = isCompressing;
}
/// Return true if the op does a truncation before store.
/// For integers this is the same as doing a TRUNCATE and storing the result.
/// For floats, it is the same as doing an FP_ROUND and storing the result.
bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
/// Returns true if the op does a compression to the vector before storing.
/// The node contiguously stores the active elements (integers or floats)
/// in src (those with their respective bit set in writemask k) to unaligned
/// memory at base_addr.
bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
const SDValue &getValue() const { return getOperand(3); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MSTORE;
}
};
/// This is a base class used to represent
/// MGATHER and MSCATTER nodes
///
class MaskedGatherScatterSDNode : public MemSDNode {
public:
friend class SelectionDAG;
MaskedGatherScatterSDNode(unsigned NodeTy, unsigned Order,
const DebugLoc &dl, SDVTList VTs, EVT MemVT,
MachineMemOperand *MMO)
: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
// In the both nodes address is Op1, mask is Op2:
// MaskedGatherSDNode (Chain, src0, mask, base, index), src0 is a passthru value
// MaskedScatterSDNode (Chain, value, mask, base, index)
// Mask is a vector of i1 elements
const SDValue &getBasePtr() const { return getOperand(3); }
const SDValue &getIndex() const { return getOperand(4); }
const SDValue &getMask() const { return getOperand(2); }
const SDValue &getValue() const { return getOperand(1); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MGATHER ||
N->getOpcode() == ISD::MSCATTER;
}
};
/// This class is used to represent an MGATHER node
///
class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
public:
friend class SelectionDAG;
MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
EVT MemVT, MachineMemOperand *MMO)
: MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {}
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MGATHER;
}
};
/// This class is used to represent an MSCATTER node
///
class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
public:
friend class SelectionDAG;
MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
EVT MemVT, MachineMemOperand *MMO)
: MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {}
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MSCATTER;
}
};
/// An SDNode that represents everything that will be needed
/// to construct a MachineInstr. These nodes are created during the
/// instruction selection proper phase.
class MachineSDNode : public SDNode {
public:
using mmo_iterator = MachineMemOperand **;
private:
friend class SelectionDAG;
MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
: SDNode(Opc, Order, DL, VTs) {}
/// Memory reference descriptions for this instruction.
mmo_iterator MemRefs = nullptr;
mmo_iterator MemRefsEnd = nullptr;
public:
mmo_iterator memoperands_begin() const { return MemRefs; }
mmo_iterator memoperands_end() const { return MemRefsEnd; }
bool memoperands_empty() const { return MemRefsEnd == MemRefs; }
/// Assign this MachineSDNodes's memory reference descriptor
/// list. This does not transfer ownership.
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
for (mmo_iterator MMI = NewMemRefs, MME = NewMemRefsEnd; MMI != MME; ++MMI)
assert(*MMI && "Null mem ref detected!");
MemRefs = NewMemRefs;
MemRefsEnd = NewMemRefsEnd;
}
static bool classof(const SDNode *N) {
return N->isMachineOpcode();
}
};
class SDNodeIterator : public std::iterator<std::forward_iterator_tag,
SDNode, ptrdiff_t> {
const SDNode *Node;
unsigned Operand;
SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
public:
bool operator==(const SDNodeIterator& x) const {
return Operand == x.Operand;
}
bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
pointer operator*() const {
return Node->getOperand(Operand).getNode();
}
pointer operator->() const { return operator*(); }
SDNodeIterator& operator++() { // Preincrement
++Operand;
return *this;
}
SDNodeIterator operator++(int) { // Postincrement
SDNodeIterator tmp = *this; ++*this; return tmp;
}
size_t operator-(SDNodeIterator Other) const {
assert(Node == Other.Node &&
"Cannot compare iterators of two different nodes!");
return Operand - Other.Operand;
}
static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
static SDNodeIterator end (const SDNode *N) {
return SDNodeIterator(N, N->getNumOperands());
}
unsigned getOperand() const { return Operand; }
const SDNode *getNode() const { return Node; }
};
template <> struct GraphTraits<SDNode*> {
using NodeRef = SDNode *;
using ChildIteratorType = SDNodeIterator;
static NodeRef getEntryNode(SDNode *N) { return N; }
static ChildIteratorType child_begin(NodeRef N) {
return SDNodeIterator::begin(N);
}
static ChildIteratorType child_end(NodeRef N) {
return SDNodeIterator::end(N);
}
};
/// A representation of the largest SDNode, for use in sizeof().
///
/// This needs to be a union because the largest node differs on 32 bit systems
/// with 4 and 8 byte pointer alignment, respectively.
using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
BlockAddressSDNode,
GlobalAddressSDNode>;
/// The SDNode class with the greatest alignment requirement.
using MostAlignedSDNode = GlobalAddressSDNode;
namespace ISD {
/// Returns true if the specified node is a non-extending and unindexed load.
inline bool isNormalLoad(const SDNode *N) {
const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
Ld->getAddressingMode() == ISD::UNINDEXED;
}
/// Returns true if the specified node is a non-extending load.
inline bool isNON_EXTLoad(const SDNode *N) {
return isa<LoadSDNode>(N) &&
cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
}
/// Returns true if the specified node is a EXTLOAD.
inline bool isEXTLoad(const SDNode *N) {
return isa<LoadSDNode>(N) &&
cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
}
/// Returns true if the specified node is a SEXTLOAD.
inline bool isSEXTLoad(const SDNode *N) {
return isa<LoadSDNode>(N) &&
cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
}
/// Returns true if the specified node is a ZEXTLOAD.
inline bool isZEXTLoad(const SDNode *N) {
return isa<LoadSDNode>(N) &&
cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
}
/// Returns true if the specified node is an unindexed load.
inline bool isUNINDEXEDLoad(const SDNode *N) {
return isa<LoadSDNode>(N) &&
cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
}
/// Returns true if the specified node is a non-truncating
/// and unindexed store.
inline bool isNormalStore(const SDNode *N) {
const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
return St && !St->isTruncatingStore() &&
St->getAddressingMode() == ISD::UNINDEXED;
}
/// Returns true if the specified node is a non-truncating store.
inline bool isNON_TRUNCStore(const SDNode *N) {
return isa<StoreSDNode>(N) && !cast<StoreSDNode>(N)->isTruncatingStore();
}
/// Returns true if the specified node is a truncating store.
inline bool isTRUNCStore(const SDNode *N) {
return isa<StoreSDNode>(N) && cast<StoreSDNode>(N)->isTruncatingStore();
}
/// Returns true if the specified node is an unindexed store.
inline bool isUNINDEXEDStore(const SDNode *N) {
return isa<StoreSDNode>(N) &&
cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
}
} // end namespace ISD
} // end namespace llvm
#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H
Index: head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
===================================================================
--- head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (revision 322854)
+++ head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (revision 322855)
@@ -1,872 +1,873 @@
//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the DAGTypeLegalizer class. This is a private interface
// shared between the code that implements the SelectionDAG::LegalizeTypes
// method.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
//===----------------------------------------------------------------------===//
/// This takes an arbitrary SelectionDAG as input and hacks on it until only
/// value types the target machine can handle are left. This involves promoting
/// small sizes to large sizes or splitting up large values into small values.
///
class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
const TargetLowering &TLI;
SelectionDAG &DAG;
public:
/// This pass uses the NodeId on the SDNodes to hold information about the
/// state of the node. The enum has all the values.
enum NodeIdFlags {
/// All operands have been processed, so this node is ready to be handled.
ReadyToProcess = 0,
/// This is a new node, not before seen, that was created in the process of
/// legalizing some other node.
NewNode = -1,
/// This node's ID needs to be set to the number of its unprocessed
/// operands.
Unanalyzed = -2,
/// This is a node that has already been processed.
Processed = -3
// 1+ - This is a node which has this many unprocessed operands.
};
private:
/// This is a bitvector that contains two bits for each simple value type,
/// where the two bits correspond to the LegalizeAction enum from
/// TargetLowering. This can be queried with "getTypeAction(VT)".
TargetLowering::ValueTypeActionImpl ValueTypeActions;
/// Return how we should legalize values of this type.
TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
return TLI.getTypeAction(*DAG.getContext(), VT);
}
/// Return true if this type is legal on this target.
bool isTypeLegal(EVT VT) const {
return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
}
/// Return true if this is a simple legal type.
bool isSimpleLegalType(EVT VT) const {
return VT.isSimple() && TLI.isTypeLegal(VT);
}
/// Return true if this type can be passed in registers.
/// For example, x86_64's f128, should to be legally in registers
/// and only some operations converted to library calls or integer
/// bitwise operations.
bool isLegalInHWReg(EVT VT) const {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
return VT == NVT && isSimpleLegalType(VT);
}
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
/// Pretend all of this node's results are legal.
bool IgnoreNodeResults(SDNode *N) const {
return N->getOpcode() == ISD::TargetConstant;
}
/// For integer nodes that are below legal width, this map indicates what
/// promoted value to use.
SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers;
/// For integer nodes that need to be expanded this map indicates which
/// operands are the expanded version of the input.
SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers;
/// For floating-point nodes converted to integers of the same size, this map
/// indicates the converted value to use.
SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
/// For floating-point nodes that have a smaller precision than the smallest
/// supported precision, this map indicates what promoted value to use.
SmallDenseMap<SDValue, SDValue, 8> PromotedFloats;
/// For float nodes that need to be expanded this map indicates which operands
/// are the expanded version of the input.
SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
/// For nodes that are <1 x ty>, this map indicates the scalar value of type
/// 'ty' to use.
SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors;
/// For nodes that need to be split this map indicates which operands are the
/// expanded version of the input.
SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors;
/// For vector nodes that need to be widened, indicates the widened value to
/// use.
SmallDenseMap<SDValue, SDValue, 8> WidenedVectors;
/// For values that have been replaced with another, indicates the replacement
/// value to use.
SmallDenseMap<SDValue, SDValue, 8> ReplacedValues;
/// This defines a worklist of nodes to process. In order to be pushed onto
/// this worklist, all operands of a node must have already been processed.
SmallVector<SDNode*, 128> Worklist;
public:
explicit DAGTypeLegalizer(SelectionDAG &dag)
: TLI(dag.getTargetLoweringInfo()), DAG(dag),
ValueTypeActions(TLI.getValueTypeActions()) {
static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
"Too many value types for ValueTypeActions to hold!");
}
/// This is the main entry point for the type legalizer. This does a
/// top-down traversal of the dag, legalizing types as it goes. Returns
/// "true" if it made any changes.
bool run();
void NoteDeletion(SDNode *Old, SDNode *New) {
ExpungeNode(Old);
ExpungeNode(New);
for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
}
SelectionDAG &getDAG() const { return DAG; }
private:
SDNode *AnalyzeNewNode(SDNode *N);
void AnalyzeNewValue(SDValue &Val);
void ExpungeNode(SDNode *N);
void PerformExpensiveChecks();
void RemapValue(SDValue &N);
// Common routines.
SDValue BitConvertToInteger(SDValue Op);
SDValue BitConvertVectorToIntegerVector(SDValue Op);
SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
bool CustomWidenLowerNode(SDNode *N, EVT VT);
/// Replace each result of the given MERGE_VALUES node with the corresponding
/// input operand, except for the result 'ResNo', for which the corresponding
/// input operand is returned.
SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue JoinIntegers(SDValue Lo, SDValue Hi);
SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node, bool isSigned);
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
/// Modify Bit Vector to match SetCC result type of ValVT.
/// The bit vector is widened with zeroes when WithZeroes is true.
SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
SDValue &Lo, SDValue &Hi);
void AddToWorklist(SDNode *N) {
N->setNodeId(ReadyToProcess);
Worklist.push_back(N);
}
//===--------------------------------------------------------------------===//
// Integer Promotion Support: LegalizeIntegerTypes.cpp
//===--------------------------------------------------------------------===//
/// Given a processed operand Op which was promoted to a larger integer type,
/// this returns the promoted value. The low bits of the promoted value
/// corresponding to the original type are exactly equal to Op.
/// The extra bits contain rubbish, so the promoted value may need to be zero-
/// or sign-extended from the original type before it is usable (the helpers
/// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
/// For example, if Op is an i16 and was promoted to an i32, then this method
/// returns an i32, the lower 16 bits of which coincide with Op, and the upper
/// 16 bits of which contain rubbish.
SDValue GetPromotedInteger(SDValue Op) {
SDValue &PromotedOp = PromotedIntegers[Op];
RemapValue(PromotedOp);
assert(PromotedOp.getNode() && "Operand wasn't promoted?");
return PromotedOp;
}
void SetPromotedInteger(SDValue Op, SDValue Result);
/// Get a promoted operand and sign extend it to the final size.
SDValue SExtPromotedInteger(SDValue Op) {
EVT OldVT = Op.getValueType();
SDLoc dl(Op);
Op = GetPromotedInteger(Op);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
DAG.getValueType(OldVT));
}
/// Get a promoted operand and zero extend it to the final size.
SDValue ZExtPromotedInteger(SDValue Op) {
EVT OldVT = Op.getValueType();
SDLoc dl(Op);
Op = GetPromotedInteger(Op);
return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
}
// Integer Result Promotion.
void PromoteIntegerResult(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_AssertSext(SDNode *N);
SDValue PromoteIntRes_AssertZext(SDNode *N);
SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntRes_BITCAST(SDNode *N);
SDValue PromoteIntRes_BSWAP(SDNode *N);
SDValue PromoteIntRes_BITREVERSE(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
SDValue PromoteIntRes_CTLZ(SDNode *N);
SDValue PromoteIntRes_CTPOP(SDNode *N);
SDValue PromoteIntRes_CTTZ(SDNode *N);
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_SELECT(SDNode *N);
SDValue PromoteIntRes_VSELECT(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
SDValue PromoteIntRes_SHL(SDNode *N);
SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
SDValue PromoteIntRes_SRA(SDNode *N);
SDValue PromoteIntRes_SRL(SDNode *N);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
SDValue PromoteIntOp_BITCAST(SDNode *N);
SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_Shift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_TRUNCATE(SDNode *N);
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
//===--------------------------------------------------------------------===//
// Integer Expansion Support: LegalizeIntegerTypes.cpp
//===--------------------------------------------------------------------===//
/// Given a processed operand Op which was expanded into two integers of half
/// the size, this returns the two halves. The low bits of Op are exactly
/// equal to the bits of Lo; the high bits exactly equal Hi.
/// For example, if Op is an i64 which was expanded into two i32's, then this
/// method returns the two i32's, with Lo being equal to the lower 32 bits of
/// Op, and Hi being equal to the upper 32 bits.
void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
// Integer Result Expansion.
void ExpandIntegerResult(SDNode *N, unsigned ResNo);
void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi);
bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
// Integer Operand Expansion.
bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
SDValue ExpandIntOp_BR_CC(SDNode *N);
SDValue ExpandIntOp_SELECT_CC(SDNode *N);
SDValue ExpandIntOp_SETCC(SDNode *N);
SDValue ExpandIntOp_SETCCE(SDNode *N);
SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ExpandIntOp_TRUNCATE(SDNode *N);
SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_RETURNADDR(SDNode *N);
SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, const SDLoc &dl);
//===--------------------------------------------------------------------===//
// Float to Integer Conversion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
/// Given an operand Op of Float type, returns the integer if the Op is not
/// supported in target HW and converted to the integer.
/// The integer contains exactly the same bits as Op - only the type changed.
/// For example, if Op is an f32 which was softened to an i32, then this method
/// returns an i32, the bits of which coincide with those of Op.
/// If the Op can be efficiently supported in target HW or the operand must
/// stay in a register, the Op is not converted to an integer.
/// In that case, the given op is returned.
SDValue GetSoftenedFloat(SDValue Op) {
SDValue &SoftenedOp = SoftenedFloats[Op];
if (!SoftenedOp.getNode() &&
isSimpleLegalType(Op.getValueType()))
return Op;
RemapValue(SoftenedOp);
assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
return SoftenedOp;
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
// Convert Float Results to Integer for Non-HW-supported Operations.
bool SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FCOS(SDNode *N);
SDValue SoftenFloatRes_FDIV(SDNode *N);
SDValue SoftenFloatRes_FEXP(SDNode *N);
SDValue SoftenFloatRes_FEXP2(SDNode *N);
SDValue SoftenFloatRes_FFLOOR(SDNode *N);
SDValue SoftenFloatRes_FLOG(SDNode *N);
SDValue SoftenFloatRes_FLOG2(SDNode *N);
SDValue SoftenFloatRes_FLOG10(SDNode *N);
SDValue SoftenFloatRes_FMA(SDNode *N);
SDValue SoftenFloatRes_FMUL(SDNode *N);
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
SDValue SoftenFloatRes_FPOW(SDNode *N);
SDValue SoftenFloatRes_FPOWI(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
SDValue SoftenFloatRes_FROUND(SDNode *N);
SDValue SoftenFloatRes_FSIN(SDNode *N);
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
SDValue SoftenFloatRes_FTRUNC(SDNode *N);
SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_UNDEF(SDNode *N);
SDValue SoftenFloatRes_VAARG(SDNode *N);
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
// Return true if we can skip softening the given operand or SDNode because
// either it was soften before by SoftenFloatResult and references to the
// operand were replaced by ReplaceValueWith or it's value type is legal in HW
// registers and the operand can be left unchanged.
bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
// Convert Float Operand to Integer for Non-HW-supported Operations.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
SDValue SoftenFloatOp_FABS(SDNode *N);
SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
SDValue SoftenFloatOp_FNEG(SDNode *N);
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
SDValue SoftenFloatOp_SELECT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
// Float Expansion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
/// Given a processed operand Op which was expanded into two floating-point
/// values of half the size, this returns the two halves.
/// The low bits of Op are exactly equal to the bits of Lo; the high bits
/// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
/// into two f64's, then this method returns the two f64's, with Lo being
/// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
// Float Result Expansion.
void ExpandFloatResult(SDNode *N, unsigned ResNo);
void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
// Float Operand Expansion.
bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
SDValue ExpandFloatOp_BR_CC(SDNode *N);
SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
SDValue ExpandFloatOp_SETCC(SDNode *N);
SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, const SDLoc &dl);
//===--------------------------------------------------------------------===//
// Float promotion support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
SDValue GetPromotedFloat(SDValue Op) {
SDValue &PromotedOp = PromotedFloats[Op];
RemapValue(PromotedOp);
assert(PromotedOp.getNode() && "Operand wasn't promoted?");
return PromotedOp;
}
void SetPromotedFloat(SDValue Op, SDValue Result);
void PromoteFloatResult(SDNode *N, unsigned ResNo);
SDValue PromoteFloatRes_BITCAST(SDNode *N);
SDValue PromoteFloatRes_BinOp(SDNode *N);
SDValue PromoteFloatRes_ConstantFP(SDNode *N);
SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N);
SDValue PromoteFloatRes_FMAD(SDNode *N);
SDValue PromoteFloatRes_FPOWI(SDNode *N);
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_LOAD(SDNode *N);
SDValue PromoteFloatRes_SELECT(SDNode *N);
SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
SDValue PromoteFloatRes_UnaryOp(SDNode *N);
SDValue PromoteFloatRes_UNDEF(SDNode *N);
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
bool PromoteFloatOperand(SDNode *N, unsigned ResNo);
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
// Scalarization Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
/// Given a processed one-element vector Op which was scalarized to its
/// element type, this returns the element. For example, if Op is a v1i32,
/// Op = < i32 val >, this method returns val, an i32.
SDValue GetScalarizedVector(SDValue Op) {
SDValue &ScalarizedOp = ScalarizedVectors[Op];
RemapValue(ScalarizedOp);
assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
return ScalarizedOp;
}
void SetScalarizedVector(SDValue Op, SDValue Result);
// Vector Result Scalarization: <1 x ty> -> ty.
void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_BinOp(SDNode *N);
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
SDValue ScalarizeVecRes_SETCC(SDNode *N);
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
SDValue ScalarizeVecRes_VSETCC(SDNode *N);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_VSELECT(SDNode *N);
+ SDValue ScalarizeVecOp_VSETCC(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
// Vector Splitting Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
/// Given a processed vector Op which was split into vectors of half the size,
/// this method returns the halves. The first elements of Op coincide with the
/// elements of Lo; the remaining elements of Op coincide with the elements of
/// Hi: Op is what you would get by concatenating Lo and Hi.
/// For example, if Op is a v8i32 that was split into two v4i32's, then this
/// method returns the two v4i32's, with Lo corresponding to the first 4
/// elements of Op, and Hi to the last 4 elements.
void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned OpNo);
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_UnaryOp(SDNode *N);
SDValue SplitVecOp_TruncateHelper(SDNode *N);
SDValue SplitVecOp_BITCAST(SDNode *N);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
/// Given a processed vector Op which was widened into a larger vector, this
/// method returns the larger vector. The elements of the returned vector
/// consist of the elements of Op followed by elements containing rubbish.
/// For example, if Op is a v2i32 that was widened to a v4i32, then this
/// method returns a v4i32 for which the first two elements are the same as
/// those of Op, while the last two elements contain rubbish.
SDValue GetWidenedVector(SDValue Op) {
SDValue &WidenedOp = WidenedVectors[Op];
RemapValue(WidenedOp);
assert(WidenedOp.getNode() && "Operand wasn't widened?");
return WidenedOp;
}
void SetWidenedVector(SDValue Op, SDValue Result);
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
SDValue WidenVecRes_BITCAST(SDNode* N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
SDValue WidenVSELECTAndMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
SDValue WidenVecRes_VSETCC(SDNode* N);
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
// Widen Vector Operand.
bool WidenVectorOperand(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
SDValue WidenVecOp_EXTEND(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
/// Helper function to generate a set of loads to load a vector with a
/// resulting wider type. It takes:
/// LdChain: list of chains for the load to be generated.
/// Ld: load to widen
SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
LoadSDNode *LD);
/// Helper function to generate a set of extension loads to load a vector with
/// a resulting wider type. It takes:
/// LdChain: list of chains for the load to be generated.
/// Ld: load to widen
/// ExtType: extension element type
SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
LoadSDNode *LD, ISD::LoadExtType ExtType);
/// Helper function to generate a set of stores to store a widen vector into
/// non-widen memory.
/// StChain: list of chains for the stores we have generated
/// ST: store of a widen value
void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
/// Helper function to generate a set of stores to store a truncate widen
/// vector into non-widen memory.
/// StChain: list of chains for the stores we have generated
/// ST: store of a widen value
void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST);
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
/// When FillWithZeroes is "on" the vector will be widened with zeroes.
/// By default, the vector will be widened with undefined values.
SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
/// Return a mask of vector type MaskVT to replace InMask. Also adjust
/// MaskVT to ToMaskVT if needed with vector extension or truncation.
SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT);
/// Get the target mask VT, and widen if needed.
EVT getSETCCWidenedResultTy(SDValue SetCC);
//===--------------------------------------------------------------------===//
// Generic Splitting: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
// Legalization methods which only use that the illegal type is split into two
// not necessarily identical types. As such they can be used for splitting
// vectors and expanding integers and floats.
void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
if (Op.getValueType().isVector())
GetSplitVector(Op, Lo, Hi);
else if (Op.getValueType().isInteger())
GetExpandedInteger(Op, Lo, Hi);
else
GetExpandedFloat(Op, Lo, Hi);
}
/// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the
/// given value.
void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
// Generic Result Splitting.
void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
//===--------------------------------------------------------------------===//
// Generic Expansion: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
// Legalization methods which only use that the illegal type is split into two
// identical types of half the size, and that the Lo/Hi part is stored first
// in memory on little/big-endian machines, followed by the Hi/Lo part. As
// such they can be used for expanding integers and floats.
void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
if (Op.getValueType().isInteger())
GetExpandedInteger(Op, Lo, Hi);
else
GetExpandedFloat(Op, Lo, Hi);
}
/// This function will split the integer \p Op into \p NumElements
/// operations of type \p EltVT and store them in \p Ops.
void IntegerToVector(SDValue Op, unsigned NumElements,
SmallVectorImpl<SDValue> &Ops, EVT EltVT);
// Generic Result Expansion.
void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
// Generic Operand Expansion.
SDValue ExpandOp_BITCAST (SDNode *N);
SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
};
} // end namespace llvm.
#endif
Index: head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (revision 322854)
+++ head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (revision 322855)
@@ -1,4086 +1,4119 @@
//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file performs vector type splitting and scalarization for LegalizeTypes.
// Scalarization is the act of changing a computation in an illegal one-element
// vector type to be a computation in its scalar element type. For example,
// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
// as a base case when scalarizing vector arithmetic like <4 x f32>, which
// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
// types.
// Splitting is the act of changing a computation in an invalid vector type to
// be a computation in two vectors of half the size. For example, implementing
// <128 x f32> operations in terms of two <64 x f32> operations.
//
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
//===----------------------------------------------------------------------===//
// Result Vector Scalarization: <1 x ty> -> ty.
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
N->dump(&DAG);
dbgs() << "\n");
SDValue R = SDValue();
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
N->dump(&DAG);
dbgs() << "\n";
#endif
report_fatal_error("Do not know how to scalarize the result of this "
"operator!\n");
case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
R = ScalarizeVecRes_VecInregOp(N);
break;
case ISD::ANY_EXTEND:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG10:
case ISD::FLOG2:
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::ZERO_EXTEND:
case ISD::FCANONICALIZE:
R = ScalarizeVecRes_UnaryOp(N);
break;
case ISD::ADD:
case ISD::AND:
case ISD::FADD:
case ISD::FCOPYSIGN:
case ISD::FDIV:
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FMINNAN:
case ISD::FMAXNAN:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
case ISD::FPOW:
case ISD::FREM:
case ISD::FSUB:
case ISD::MUL:
case ISD::OR:
case ISD::SDIV:
case ISD::SREM:
case ISD::SUB:
case ISD::UDIV:
case ISD::UREM:
case ISD::XOR:
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
R = ScalarizeVecRes_BinOp(N);
break;
case ISD::FMA:
R = ScalarizeVecRes_TernaryOp(N);
break;
}
// If R is null, the sub-method took care of registering the result.
if (R.getNode())
SetScalarizedVector(SDValue(N, ResNo), R);
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N),
LHS.getValueType(), LHS, RHS, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = GetScalarizedVector(N->getOperand(2));
return DAG.getNode(N->getOpcode(), SDLoc(N),
Op0.getValueType(), Op0, Op1, Op2);
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
return GetScalarizedVector(Op);
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
return DAG.getNode(ISD::BITCAST, SDLoc(N),
NewVT, N->getOperand(0));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
EVT EltVT = N->getValueType(0).getVectorElementType();
SDValue InOp = N->getOperand(0);
// The BUILD_VECTOR operands may be of wider element types and
// we may need to truncate them back to the requested return type.
if (EltVT.isInteger())
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
return InOp;
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
N->getValueType(0).getVectorElementType(),
N->getOperand(0), N->getOperand(1));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
NewVT, Op, N->getOperand(1));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FPOWI, SDLoc(N),
Op.getValueType(), Op, N->getOperand(1));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
// The value to insert may have a wider type than the vector element type,
// so be sure to truncate it to the element type if necessary.
SDValue Op = N->getOperand(1);
EVT EltVT = N->getValueType(0).getVectorElementType();
if (Op.getValueType() != EltVT)
// FIXME: Can this happen for floating point types?
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op);
return Op;
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");
SDValue Result = DAG.getLoad(
ISD::UNINDEXED, N->getExtensionType(),
N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(),
N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()),
N->getPointerInfo(), N->getMemoryVT().getVectorElementType(),
N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
N->getAAInfo());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
return Result;
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
// Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
EVT DestVT = N->getValueType(0).getVectorElementType();
SDValue Op = N->getOperand(0);
EVT OpVT = Op.getValueType();
SDLoc DL(N);
// The result needs scalarizing, but it's not a given that the source does.
// This is a workaround for targets where it's impossible to scalarize the
// result of a conversion, because the source type is legal.
// For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
// are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
// legal and was not scalarized.
// See the similar logic in ScalarizeVecRes_VSETCC
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Op = GetScalarizedVector(Op);
} else {
EVT VT = OpVT.getVectorElementType();
Op = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
EVT EltVT = N->getValueType(0).getVectorElementType();
EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
SDValue LHS = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT,
LHS, DAG.getValueType(ExtVT));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
SDLoc DL(N);
SDValue Op = N->getOperand(0);
EVT OpVT = Op.getValueType();
EVT OpEltVT = OpVT.getVectorElementType();
EVT EltVT = N->getValueType(0).getVectorElementType();
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Op = GetScalarizedVector(Op);
} else {
Op = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
switch (N->getOpcode()) {
case ISD::ANY_EXTEND_VECTOR_INREG:
return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op);
case ISD::SIGN_EXTEND_VECTOR_INREG:
return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op);
case ISD::ZERO_EXTEND_VECTOR_INREG:
return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op);
}
llvm_unreachable("Illegal extend_vector_inreg opcode");
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
// If the operand is wider than the vector element type then it is implicitly
// truncated. Make that explicit here.
EVT EltVT = N->getValueType(0).getVectorElementType();
SDValue InOp = N->getOperand(0);
if (InOp.getValueType() != EltVT)
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
return InOp;
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
SDValue Cond = N->getOperand(0);
EVT OpVT = Cond.getValueType();
SDLoc DL(N);
// The vselect result and true/value operands needs scalarizing, but it's
// not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
// See the similar logic in ScalarizeVecRes_VSETCC
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Cond = GetScalarizedVector(Cond);
} else {
EVT VT = OpVT.getVectorElementType();
Cond = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
SDValue LHS = GetScalarizedVector(N->getOperand(1));
TargetLowering::BooleanContent ScalarBool =
TLI.getBooleanContents(false, false);
TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
// If integer and float booleans have different contents then we can't
// reliably optimize in all cases. There is a full explanation for this in
// DAGCombiner::visitSELECT() where the same issue affects folding
// (select C, 0, 1) to (xor C, 1).
if (TLI.getBooleanContents(false, false) !=
TLI.getBooleanContents(false, true)) {
// At least try the common case where the boolean is generated by a
// comparison.
if (Cond->getOpcode() == ISD::SETCC) {
EVT OpVT = Cond->getOperand(0)->getValueType(0);
ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
VecBool = TLI.getBooleanContents(OpVT);
} else
ScalarBool = TargetLowering::UndefinedBooleanContent;
}
if (ScalarBool != VecBool) {
EVT CondVT = Cond.getValueType();
switch (ScalarBool) {
case TargetLowering::UndefinedBooleanContent:
break;
case TargetLowering::ZeroOrOneBooleanContent:
assert(VecBool == TargetLowering::UndefinedBooleanContent ||
VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
// Vector read from all ones, scalar expects a single 1 so mask.
Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
Cond, DAG.getConstant(1, SDLoc(N), CondVT));
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
assert(VecBool == TargetLowering::UndefinedBooleanContent ||
VecBool == TargetLowering::ZeroOrOneBooleanContent);
// Vector reads from a one, scalar from all ones so sign extend.
Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT,
Cond, DAG.getValueType(MVT::i1));
break;
}
}
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), Cond, LHS,
GetScalarizedVector(N->getOperand(2)));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(1));
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), N->getOperand(0), LHS,
GetScalarizedVector(N->getOperand(2)));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(2));
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(),
N->getOperand(0), N->getOperand(1),
LHS, GetScalarizedVector(N->getOperand(3)),
N->getOperand(4));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
assert(N->getValueType(0).isVector() ==
N->getOperand(0).getValueType().isVector() &&
"Scalar/Vector type mismatch");
if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
SDLoc DL(N);
// Turn it into a scalar SETCC.
return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
// Figure out if the scalar is the LHS or RHS and return it.
SDValue Arg = N->getOperand(2).getOperand(0);
if (Arg.isUndef())
return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
return GetScalarizedVector(N->getOperand(Op));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operand types must be vectors");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT OpVT = LHS.getValueType();
EVT NVT = N->getValueType(0).getVectorElementType();
SDLoc DL(N);
// The result needs scalarizing, but it's not a given that the source does.
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
LHS = GetScalarizedVector(LHS);
RHS = GetScalarizedVector(RHS);
} else {
EVT VT = OpVT.getVectorElementType();
LHS = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
RHS = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
// Turn it into a scalar SETCC.
SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
N->getOperand(2));
// Vectors may have a different boolean contents to scalars. Promote the
// value appropriately.
ISD::NodeType ExtendCode =
TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
return DAG.getNode(ExtendCode, DL, NVT, Res);
}
//===----------------------------------------------------------------------===//
// Operand Vector Scalarization <1 x ty> -> ty.
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
N->dump(&DAG);
dbgs() << "\n");
SDValue Res = SDValue();
if (!Res.getNode()) {
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
N->dump(&DAG);
dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to scalarize this operator's operand!");
case ISD::BITCAST:
Res = ScalarizeVecOp_BITCAST(N);
break;
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::TRUNCATE:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
Res = ScalarizeVecOp_UnaryOp(N);
break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
break;
case ISD::EXTRACT_VECTOR_ELT:
Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
break;
case ISD::VSELECT:
Res = ScalarizeVecOp_VSELECT(N);
break;
+ case ISD::SETCC:
+ Res = ScalarizeVecOp_VSETCC(N);
+ break;
case ISD::STORE:
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
case ISD::FP_ROUND:
Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
break;
}
}
// If the result is null, the sub-method took care of registering results etc.
if (!Res.getNode()) return false;
// If the result is N, the sub-method updated N in place. Tell the legalizer
// core about this.
if (Res.getNode() == N)
return true;
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
}
/// If the value to convert is a vector that needs to be scalarized, it must be
/// <1 x ty>. Convert the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
SDValue Elt = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::BITCAST, SDLoc(N),
N->getValueType(0), Elt);
}
/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
/// Do the operation on the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
assert(N->getValueType(0).getVectorNumElements() == 1 &&
"Unexpected vector type!");
SDValue Elt = GetScalarizedVector(N->getOperand(0));
SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
N->getValueType(0).getScalarType(), Elt);
// Revectorize the result so the types line up with what the uses of this
// expression expect.
return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op);
}
/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Ops(N->getNumOperands());
for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
Ops[i] = GetScalarizedVector(N->getOperand(i));
return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
}
/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
/// so just return the element, ignoring the index.
SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue Res = GetScalarizedVector(N->getOperand(0));
if (Res.getValueType() != VT)
Res = VT.isFloatingPoint()
? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res)
: DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
return Res;
}
/// If the input condition is a vector that needs to be scalarized, it must be
/// <1 x i1>, so just convert to a normal ISD::SELECT
/// (still with vector output type since that was acceptable if we got here).
SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
EVT VT = N->getValueType(0);
return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1),
N->getOperand(2));
+}
+
+/// If the operand is a vector that needs to be scalarized then the
+/// result must be v1i1, so just convert to a scalar SETCC and wrap
+/// with a scalar_to_vector since the res type is legal if we got here
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
+
+ EVT VT = N->getValueType(0);
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ EVT NVT = VT.getVectorElementType();
+ SDLoc DL(N);
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+
+ Res = DAG.getNode(ExtendCode, DL, NVT, Res);
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
}
/// If the value to store is a vector that needs to be scalarized, it must be
/// <1 x ty>. Just store the element.
SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(N->isUnindexed() && "Indexed store of one-element vector?");
assert(OpNo == 1 && "Do not know how to scalarize this operand!");
SDLoc dl(N);
if (N->isTruncatingStore())
return DAG.getTruncStore(
N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(), N->getAlignment(),
N->getMemOperand()->getFlags(), N->getAAInfo());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
N->getAAInfo());
}
/// If the value to round is a vector that needs to be scalarized, it must be
/// <1 x ty>. Convert the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
SDValue Elt = GetScalarizedVector(N->getOperand(0));
SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
N->getValueType(0).getVectorElementType(), Elt,
N->getOperand(1));
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
}
//===----------------------------------------------------------------------===//
// Result Vector Splitting
//===----------------------------------------------------------------------===//
/// This method is called when the specified result of the specified node is
/// found to need vector splitting. At this point, the node may also have
/// invalid operands or may have other results that need legalization, we just
/// know that (at least) one result needs vector splitting.
void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
DEBUG(dbgs() << "Split node result: ";
N->dump(&DAG);
dbgs() << "\n");
SDValue Lo, Hi;
// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(ResNo), true))
return;
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
dbgs() << "SplitVectorResult #" << ResNo << ": ";
N->dump(&DAG);
dbgs() << "\n";
#endif
report_fatal_error("Do not know how to split the result of this "
"operator!\n");
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::VSELECT:
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
break;
case ISD::MLOAD:
SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
break;
case ISD::MGATHER:
SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi);
break;
case ISD::SETCC:
SplitVecRes_SETCC(N, Lo, Hi);
break;
case ISD::VECTOR_SHUFFLE:
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
break;
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG10:
case ISD::FLOG2:
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
case ISD::SINT_TO_FP:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
SplitVecRes_ExtendOp(N, Lo, Hi);
break;
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::MULHS:
case ISD::MULHU:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FMINNAN:
case ISD::FMAXNAN:
case ISD::SDIV:
case ISD::UDIV:
case ISD::FDIV:
case ISD::FPOW:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::UREM:
case ISD::SREM:
case ISD::FREM:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA:
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
if (Lo.getNode())
SetSplitVector(SDValue(N, ResNo), Lo, Hi);
}
void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
SDLoc dl(N);
const SDNodeFlags Flags = N->getFlags();
unsigned Opcode = N->getOpcode();
Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
}
void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Op0Lo, Op0Hi;
GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
SDValue Op1Lo, Op1Hi;
GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
SDValue Op2Lo, Op2Hi;
GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
SDLoc dl(N);
Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
Op0Lo, Op1Lo, Op2Lo);
Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
Op0Hi, Op1Hi, Op2Hi);
}
void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
SDValue &Hi) {
// We know the result is a vector. The input may be either a vector or a
// scalar value.
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
SDLoc dl(N);
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
// Handle some special cases efficiently.
switch (getTypeAction(InVT)) {
case TargetLowering::TypeLegal:
case TargetLowering::TypePromoteInteger:
case TargetLowering::TypePromoteFloat:
case TargetLowering::TypeSoftenFloat:
case TargetLowering::TypeScalarizeVector:
case TargetLowering::TypeWidenVector:
break;
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat:
// A scalar to vector conversion, where the scalar needs expansion.
// If the vector is being split in two then we can just convert the
// expanded pieces.
if (LoVT == HiVT) {
GetExpandedOp(InOp, Lo, Hi);
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
}
break;
case TargetLowering::TypeSplitVector:
// If the input is a vector that needs to be split, convert each split
// piece of the input now.
GetSplitVector(InOp, Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
}
// In the general case, convert the input to an integer and split it by hand.
EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
if (DAG.getDataLayout().isBigEndian())
std::swap(LoIntVT, HiIntVT);
SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
}
void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
unsigned LoNumElts = LoVT.getVectorNumElements();
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
Lo = DAG.getBuildVector(LoVT, dl, LoOps);
SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
Hi = DAG.getBuildVector(HiVT, dl, HiOps);
}
void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
SDValue &Hi) {
assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
SDLoc dl(N);
unsigned NumSubvectors = N->getNumOperands() / 2;
if (NumSubvectors == 1) {
Lo = N->getOperand(0);
Hi = N->getOperand(1);
return;
}
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps);
SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps);
}
void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl,
TLI.getVectorIdxTy(DAG.getDataLayout())));
}
void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
SDValue SubVec = N->getOperand(1);
SDValue Idx = N->getOperand(2);
SDLoc dl(N);
GetSplitVector(Vec, Lo, Hi);
EVT VecVT = Vec.getValueType();
unsigned VecElems = VecVT.getVectorNumElements();
unsigned SubElems = SubVec.getValueType().getVectorNumElements();
// If we know the index is 0, and we know the subvector doesn't cross the
// boundary between the halves, we can avoid spilling the vector, and insert
// into the lower half of the split vector directly.
// TODO: The IdxVal == 0 constraint is artificial, we could do this whenever
// the index is constant and there is no boundary crossing. But those cases
// don't seem to get hit in practice.
if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) {
unsigned IdxVal = ConstIdx->getZExtValue();
if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
return;
}
}
// Spill the vector to the stack.
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
SDValue Store =
DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
// Store the new subvector into the specified index.
SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());
// Load the Lo part from the stack slot.
Lo =
DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
StackPtr =
DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
GetSplitVector(N->getOperand(0), Lo, Hi);
Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
}
void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDLoc DL(N);
SDValue RHSLo, RHSHi;
SDValue RHS = N->getOperand(1);
EVT RHSVT = RHS.getValueType();
if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
GetSplitVector(RHS, RHSLo, RHSHi);
else
std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
}
void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDLoc dl(N);
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) =
DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
DAG.getValueType(LoVT));
Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
DAG.getValueType(HiVT));
}
void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDLoc dl(N);
SDValue InLo, InHi;
if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(N0, InLo, InHi);
else
std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0);
EVT InLoVT = InLo.getValueType();
unsigned InNumElements = InLoVT.getVectorNumElements();
EVT OutLoVT, OutHiVT;
std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
unsigned OutNumElements = OutLoVT.getVectorNumElements();
assert((2 * OutNumElements) <= InNumElements &&
"Illegal extend vector in reg split");
// *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the
// input vector (i.e. we only use InLo):
// OutLo will extend the first OutNumElements from InLo.
// OutHi will extend the next OutNumElements from InLo.
// Shuffle the elements from InLo for OutHi into the bottom elements to
// create a 'fake' InHi.
SmallVector<int, 8> SplitHi(InNumElements, -1);
for (unsigned i = 0; i != OutNumElements; ++i)
SplitHi[i] = i + OutNumElements;
InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi);
Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo);
Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);
}
void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
SDValue Elt = N->getOperand(1);
SDValue Idx = N->getOperand(2);
SDLoc dl(N);
GetSplitVector(Vec, Lo, Hi);
if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
unsigned IdxVal = CIdx->getZExtValue();
unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
if (IdxVal < LoNumElts)
Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
Lo.getValueType(), Lo, Elt, Idx);
else
Hi =
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
DAG.getConstant(IdxVal - LoNumElts, dl,
TLI.getVectorIdxTy(DAG.getDataLayout())));
return;
}
// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(0), true))
return;
// Spill the vector to the stack.
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
SDValue Store =
DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
Store =
DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT);
// Load the Lo part from the stack slot.
Lo =
DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
DAG.getConstant(IncrementSize, dl,
StackPtr.getValueType()));
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
Hi = DAG.getUNDEF(HiVT);
}
void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue &Hi) {
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
EVT LoVT, HiVT;
SDLoc dl(LD);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
ISD::LoadExtType ExtType = LD->getExtensionType();
SDValue Ch = LD->getChain();
SDValue Ptr = LD->getBasePtr();
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT MemoryVT = LD->getMemoryVT();
unsigned Alignment = LD->getOriginalAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
Alignment, MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(LD, 1), Ch);
}
void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(MLD);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
SDValue Ch = MLD->getChain();
SDValue Ptr = MLD->getBasePtr();
SDValue Mask = MLD->getMask();
SDValue Src0 = MLD->getSrc0();
unsigned Alignment = MLD->getOriginalAlignment();
ISD::LoadExtType ExtType = MLD->getExtensionType();
// if Alignment is equal to the vector size,
// take the half of it for the second part
unsigned SecondHalfAlignment =
(Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
Alignment/2 : Alignment;
// Split Mask operand
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(Mask, MaskLo, MaskHi);
else
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MLD->getMemoryVT();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue Src0Lo, Src0Hi;
if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(Src0, Src0Lo, Src0Hi);
else
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
ExtType, MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ExtType, MLD->isExpandingLoad());
// Build a factor node to remember that this load is independent of the
// other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MLD, 1), Ch);
}
void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(MGT);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
SDValue Ch = MGT->getChain();
SDValue Ptr = MGT->getBasePtr();
SDValue Mask = MGT->getMask();
SDValue Src0 = MGT->getValue();
SDValue Index = MGT->getIndex();
unsigned Alignment = MGT->getOriginalAlignment();
// Split Mask operand
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(Mask, MaskLo, MaskHi);
else
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
// Split MemoryVT
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue Src0Lo, Src0Hi;
if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(Src0, Src0Lo, Src0Hi);
else
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
SDValue IndexHi, IndexLo;
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(Index, IndexLo, IndexHi);
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
MMO);
SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
MMO);
// Build a factor node to remember that this load is independent of the
// other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MGT, 1), Ch);
}
void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operand types must be vectors");
EVT LoVT, HiVT;
SDLoc DL(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// Split the input.
SDValue LL, LH, RL, RH;
std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
}
void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
// Get the dest types - they may not match the input types, e.g. int_to_fp.
EVT LoVT, HiVT;
SDLoc dl(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// If the input also splits, handle it directly for a compile time speedup.
// Otherwise split it by hand.
EVT InVT = N->getOperand(0).getValueType();
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
GetSplitVector(N->getOperand(0), Lo, Hi);
else
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
} else {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
}
}
void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
EVT SrcVT = N->getOperand(0).getValueType();
EVT DestVT = N->getValueType(0);
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
// We can do better than a generic split operation if the extend is doing
// more than just doubling the width of the elements and the following are
// true:
// - The number of vector elements is even,
// - the source type is legal,
// - the type of a split source is illegal,
// - the type of an extended (by doubling element size) source is legal, and
// - the type of that extended source when split is legal.
//
// This won't necessarily completely legalize the operation, but it will
// more effectively move in the right direction and prevent falling down
// to scalarization in many cases due to the input vector being split too
// far.
unsigned NumElements = SrcVT.getVectorNumElements();
if ((NumElements & 1) == 0 &&
SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
LLVMContext &Ctx = *DAG.getContext();
EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx);
EVT SplitLoVT, SplitHiVT;
std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
DEBUG(dbgs() << "Split vector extend via incremental extend:";
N->dump(&DAG); dbgs() << "\n");
// Extend the source vector by one step.
SDValue NewSrc =
DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
// Get the low and high halves of the new, extended one step, vector.
std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
// Extend those vector halves the rest of the way.
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
return;
}
}
// Fall back to the generic unary operator splitting otherwise.
SplitVecRes_UnaryOp(N, Lo, Hi);
}
void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
SDValue &Lo, SDValue &Hi) {
// The low and high parts of the original input give four input vectors.
SDValue Inputs[4];
SDLoc dl(N);
GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
EVT NewVT = Inputs[0].getValueType();
unsigned NewElts = NewVT.getVectorNumElements();
// If Lo or Hi uses elements from at most two of the four input vectors, then
// express it as a vector shuffle of those two inputs. Otherwise extract the
// input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
SmallVector<int, 16> Ops;
for (unsigned High = 0; High < 2; ++High) {
SDValue &Output = High ? Hi : Lo;
// Build a shuffle mask for the output, discovering on the fly which
// input vectors to use as shuffle operands (recorded in InputUsed).
// If building a suitable shuffle vector proves too hard, then bail
// out with useBuildVector set.
unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
unsigned FirstMaskIdx = High * NewElts;
bool useBuildVector = false;
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
// The mask element. This indexes into the input.
int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
// The input vector this mask element indexes into.
unsigned Input = (unsigned)Idx / NewElts;
if (Input >= array_lengthof(Inputs)) {
// The mask element does not index into any input vector.
Ops.push_back(-1);
continue;
}
// Turn the index into an offset from the start of the input vector.
Idx -= Input * NewElts;
// Find or create a shuffle vector operand to hold this input.
unsigned OpNo;
for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
if (InputUsed[OpNo] == Input) {
// This input vector is already an operand.
break;
} else if (InputUsed[OpNo] == -1U) {
// Create a new operand for this input vector.
InputUsed[OpNo] = Input;
break;
}
}
if (OpNo >= array_lengthof(InputUsed)) {
// More than two input vectors used! Give up on trying to create a
// shuffle vector. Insert all elements into a BUILD_VECTOR instead.
useBuildVector = true;
break;
}
// Add the mask index for the new shuffle vector.
Ops.push_back(Idx + OpNo * NewElts);
}
if (useBuildVector) {
EVT EltVT = NewVT.getVectorElementType();
SmallVector<SDValue, 16> SVOps;
// Extract the input elements by hand.
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
// The mask element. This indexes into the input.
int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
// The input vector this mask element indexes into.
unsigned Input = (unsigned)Idx / NewElts;
if (Input >= array_lengthof(Inputs)) {
// The mask element is "undef" or indexes off the end of the input.
SVOps.push_back(DAG.getUNDEF(EltVT));
continue;
}
// Turn the index into an offset from the start of the input vector.
Idx -= Input * NewElts;
// Extract the vector element by hand.
SVOps.push_back(DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input],
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
}
// Construct the Lo/Hi output using a BUILD_VECTOR.
Output = DAG.getBuildVector(NewVT, dl, SVOps);
} else if (InputUsed[0] == -1U) {
// No input vectors were used! The result is undefined.
Output = DAG.getUNDEF(NewVT);
} else {
SDValue Op0 = Inputs[InputUsed[0]];
// If only one input was used, use an undefined vector for the other.
SDValue Op1 = InputUsed[1] == -1U ?
DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
// At least one input vector was used. Create a new shuffle vector.
Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops);
}
Ops.clear();
}
}
//===----------------------------------------------------------------------===//
// Operand Vector Splitting
//===----------------------------------------------------------------------===//
/// This method is called when the specified operand of the specified node is
/// found to need vector splitting. At this point, all of the result types of
/// the node are known to be legal, but other operands of the node may need
/// legalization as well as the specified one.
bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
DEBUG(dbgs() << "Split node operand: ";
N->dump(&DAG);
dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom split this node.
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
return false;
if (!Res.getNode()) {
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
N->dump(&DAG);
dbgs() << "\n";
#endif
report_fatal_error("Do not know how to split this operator's "
"operand!\n");
case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
case ISD::TRUNCATE:
Res = SplitVecOp_TruncateHelper(N);
break;
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
case ISD::MSTORE:
Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
break;
case ISD::MSCATTER:
Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
break;
case ISD::MGATHER:
Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
break;
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
Res = SplitVecOp_TruncateHelper(N);
else
Res = SplitVecOp_UnaryOp(N);
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
Res = SplitVecOp_TruncateHelper(N);
else
Res = SplitVecOp_UnaryOp(N);
break;
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::FP_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::FTRUNC:
case ISD::FCANONICALIZE:
Res = SplitVecOp_UnaryOp(N);
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
Res = SplitVecOp_ExtVecInRegOp(N);
break;
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
Res = SplitVecOp_VECREDUCE(N, OpNo);
break;
}
}
// If the result is null, the sub-method took care of registering results etc.
if (!Res.getNode()) return false;
// If the result is N, the sub-method updated N in place. Tell the legalizer
// core about this.
if (Res.getNode() == N)
return true;
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
}
SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
// The only possibility for an illegal operand is the mask, since result type
// legalization would have handled this node already otherwise.
assert(OpNo == 0 && "Illegal operand must be mask");
SDValue Mask = N->getOperand(0);
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
EVT Src0VT = Src0.getValueType();
SDLoc DL(N);
assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
assert(Lo.getValueType() == Hi.getValueType() &&
"Lo and Hi have differing types");
EVT LoOpVT, HiOpVT;
std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
SDValue LoSelect =
DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
SDValue HiSelect =
DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
}
SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc dl(N);
SDValue VecOp = N->getOperand(OpNo);
EVT VecVT = VecOp.getValueType();
assert(VecVT.isVector() && "Can only split reduce vector operand");
GetSplitVector(VecOp, Lo, Hi);
EVT LoOpVT, HiOpVT;
std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
bool NoNaN = N->getFlags().hasNoNaNs();
unsigned CombineOpc = 0;
switch (N->getOpcode()) {
case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break;
case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break;
case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break;
case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break;
case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break;
case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break;
case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break;
case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break;
case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break;
case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break;
case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break;
case ISD::VECREDUCE_FMAX:
CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN;
break;
case ISD::VECREDUCE_FMIN:
CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN;
break;
default:
llvm_unreachable("Unexpected reduce ISD node");
}
// Use the appropriate scalar instruction on the split subvectors before
// reducing the now partially reduced smaller vector.
SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi);
return DAG.getNode(N->getOpcode(), dl, ResVT, Partial);
}
SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
// The result has a legal vector type, but the input needs splitting.
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc dl(N);
GetSplitVector(N->getOperand(0), Lo, Hi);
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
// For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
// end up being split all the way down to individual components. Convert the
// split pieces into integers and reassemble.
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
Lo = BitConvertToInteger(Lo);
Hi = BitConvertToInteger(Hi);
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
JoinIntegers(Lo, Hi));
}
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
// We know that the extracted result type is legal.
EVT SubVT = N->getValueType(0);
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
uint64_t LoElts = Lo.getValueType().getVectorNumElements();
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
if (IdxVal < LoElts) {
assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
"Extracted subvector crosses vector split!");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
} else {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
DAG.getConstant(IdxVal - LoElts, dl,
Idx.getValueType()));
}
}
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue Vec = N->getOperand(0);
SDValue Idx = N->getOperand(1);
EVT VecVT = Vec.getValueType();
if (isa<ConstantSDNode>(Idx)) {
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
SDValue Lo, Hi;
GetSplitVector(Vec, Lo, Hi);
uint64_t LoElts = Lo.getValueType().getVectorNumElements();
if (IdxVal < LoElts)
return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
return SDValue(DAG.UpdateNodeOperands(N, Hi,
DAG.getConstant(IdxVal - LoElts, SDLoc(N),
Idx.getValueType())), 0);
}
// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(0), true))
return SDValue();
// Make the vector elements byte-addressable if they aren't already.
SDLoc dl(N);
EVT EltVT = VecVT.getVectorElementType();
if (EltVT.getSizeInBits() < 8) {
SmallVector<SDValue, 4> ElementOps;
for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) {
ElementOps.push_back(DAG.getAnyExtOrTrunc(
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec,
DAG.getConstant(i, dl, MVT::i8)),
dl, MVT::i8));
}
EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VecVT.getVectorNumElements());
Vec = DAG.getBuildVector(VecVT, dl, ElementOps);
}
// Store the vector to the stack.
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
SDValue Store =
DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
// Load back the required element.
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
MachinePointerInfo(), EltVT);
}
SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
SDValue Lo, Hi;
// *_EXTEND_VECTOR_INREG only reference the lower half of the input, so
// splitting the result has the same effect as splitting the input operand.
SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
}
SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
unsigned OpNo) {
EVT LoVT, HiVT;
SDLoc dl(MGT);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
SDValue Ch = MGT->getChain();
SDValue Ptr = MGT->getBasePtr();
SDValue Index = MGT->getIndex();
SDValue Mask = MGT->getMask();
SDValue Src0 = MGT->getValue();
unsigned Alignment = MGT->getOriginalAlignment();
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
// Split Mask operand
GetSplitVector(Mask, MaskLo, MaskHi);
else
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue Src0Lo, Src0Hi;
if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(Src0, Src0Lo, Src0Hi);
else
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
SDValue IndexHi, IndexLo;
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(Index, IndexLo, IndexHi);
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
OpsLo, MMO);
MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(),
MGT->getRanges());
SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
OpsHi, MMO);
// Build a factor node to remember that this load is independent of the
// other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MGT, 1), Ch);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
Hi);
ReplaceValueWith(SDValue(MGT, 0), Res);
return SDValue();
}
SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
SDValue Mask = N->getMask();
SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
SDLoc DL(N);
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
// Split Data operand
GetSplitVector(Data, DataLo, DataHi);
else
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
// Split Mask operand
GetSplitVector(Mask, MaskLo, MaskHi);
else
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());
// if Alignment is equal to the vector size,
// take the half of it for the second part
unsigned SecondHalfAlignment =
(Alignment == Data->getValueType(0).getSizeInBits()/8) ?
Alignment/2 : Alignment;
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
N->isTruncatingStore(),
N->isCompressingStore());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
N->isCompressingStore());
MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, N->getAAInfo(), N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
N->isTruncatingStore(), N->isCompressingStore());
// Build a factor node to remember that this store is independent of the
// other one.
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
unsigned OpNo) {
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
SDValue Mask = N->getMask();
SDValue Index = N->getIndex();
SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
SDLoc DL(N);
// Split all operands
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
// Split Data operand
GetSplitVector(Data, DataLo, DataHi);
else
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
// Split Mask operand
GetSplitVector(Mask, MaskLo, MaskHi);
else
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
SDValue IndexHi, IndexLo;
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(Index, IndexLo, IndexHi);
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO);
MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
DL, OpsHi, MMO);
// Build a factor node to remember that this store is independent of the
// other one.
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
assert(N->isUnindexed() && "Indexed store of vector?");
assert(OpNo == 1 && "Can only split the stored value");
SDLoc DL(N);
bool isTruncating = N->isTruncatingStore();
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDValue Lo, Hi;
GetSplitVector(N->getOperand(1), Lo, Hi);
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
Alignment, MMOFlags, AAInfo);
else
Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
if (isTruncating)
Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
HiMemVT, Alignment, MMOFlags, AAInfo);
else
Hi = DAG.getStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
Alignment, MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
SDLoc DL(N);
// The input operands all must have the same type, and we know the result
// type is valid. Convert this to a buildvector which extracts all the
// input elements.
// TODO: If the input elements are power-two vectors, we could convert this to
// a new CONCAT_VECTORS node with elements that are half-wide.
SmallVector<SDValue, 32> Elts;
EVT EltVT = N->getValueType(0).getVectorElementType();
for (const SDValue &Op : N->op_values()) {
for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
i != e; ++i) {
Elts.push_back(DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
}
}
return DAG.getBuildVector(N->getValueType(0), DL, Elts);
}
SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
// The result type is legal, but the input type is illegal. If splitting
// ends up with the result type of each half still being legal, just
// do that. If, however, that would result in an illegal result type,
// we can try to get more clever with power-two vectors. Specifically,
// split the input type, but also widen the result element size, then
// concatenate the halves and truncate again. For example, consider a target
// where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
// vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
// %inlo = v4i32 extract_subvector %in, 0
// %inhi = v4i32 extract_subvector %in, 4
// %lo16 = v4i16 trunc v4i32 %inlo
// %hi16 = v4i16 trunc v4i32 %inhi
// %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
// %res = v8i8 trunc v8i16 %in16
//
// Without this transform, the original truncate would end up being
// scalarized, which is pretty much always a last resort.
SDValue InVec = N->getOperand(0);
EVT InVT = InVec->getValueType(0);
EVT OutVT = N->getValueType(0);
unsigned NumElements = OutVT.getVectorNumElements();
bool IsFloat = OutVT.isFloatingPoint();
// Widening should have already made sure this is a power-two vector
// if we're trying to split it at all. assert() that's true, just in case.
assert(!(NumElements & 1) && "Splitting vector, but not in half!");
unsigned InElementSize = InVT.getScalarSizeInBits();
unsigned OutElementSize = OutVT.getScalarSizeInBits();
// If the input elements are only 1/2 the width of the result elements,
// just use the normal splitting. Our trick only work if there's room
// to split more than once.
if (InElementSize <= OutElementSize * 2)
return SplitVecOp_UnaryOp(N);
SDLoc DL(N);
// Extract the halves of the input via extract_subvector.
SDValue InLoVec, InHiVec;
std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
// Truncate them to 1/2 the element size.
EVT HalfElementVT = IsFloat ?
EVT::getFloatingPointVT(InElementSize/2) :
EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
NumElements/2);
SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
// Concatenate them to get the full intermediate truncation result.
EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
HalfHi);
// Now finish up by truncating all the way down to the original result
// type. This should normally be something that ends up being legal directly,
// but in theory if a target has very wide vectors and an annoyingly
// restricted set of legal types, this split can chain to build things up.
return IsFloat
? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
DAG.getTargetConstant(
0, DL, TLI.getPointerTy(DAG.getDataLayout())))
: DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
}
SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operand types must be vectors");
// The result has a legal vector type, but the input needs splitting.
SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
SDLoc DL(N);
GetSplitVector(N->getOperand(0), Lo0, Hi0);
GetSplitVector(N->getOperand(1), Lo1, Hi1);
unsigned PartElements = Lo0.getValueType().getVectorNumElements();
EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
return PromoteTargetBoolean(Con, N->getValueType(0));
}
SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
// The result has a legal vector type, but the input needs splitting.
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc DL(N);
GetSplitVector(N->getOperand(0), Lo, Hi);
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
// The result (and the first input) has a legal vector type, but the second
// input needs splitting.
return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
}
//===----------------------------------------------------------------------===//
// Result Vector Widening
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
N->dump(&DAG);
dbgs() << "\n");
// See if the target wants to custom widen this node.
if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
return;
SDValue Res = SDValue();
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
dbgs() << "WidenVectorResult #" << ResNo << ": ";
N->dump(&DAG);
dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to widen the result of this operator!");
case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
case ISD::VSELECT:
case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE:
Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
break;
case ISD::MLOAD:
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
break;
case ISD::MGATHER:
Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
break;
case ISD::ADD:
case ISD::AND:
case ISD::MUL:
case ISD::MULHS:
case ISD::MULHU:
case ISD::OR:
case ISD::SUB:
case ISD::XOR:
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FMINNAN:
case ISD::FMAXNAN:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
Res = WidenVecRes_Binary(N);
break;
case ISD::FADD:
case ISD::FMUL:
case ISD::FPOW:
case ISD::FSUB:
case ISD::FDIV:
case ISD::FREM:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
Res = WidenVecRes_BinaryCanTrap(N);
break;
case ISD::FCOPYSIGN:
Res = WidenVecRes_FCOPYSIGN(N);
break;
case ISD::FPOWI:
Res = WidenVecRes_POWI(N);
break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
Res = WidenVecRes_Shift(N);
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
Res = WidenVecRes_EXTEND_VECTOR_INREG(N);
break;
case ISD::ANY_EXTEND:
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::ZERO_EXTEND:
Res = WidenVecRes_Convert(N);
break;
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::CTTZ:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG10:
case ISD::FLOG2:
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
Res = WidenVecRes_Unary(N);
break;
case ISD::FMA:
Res = WidenVecRes_Ternary(N);
break;
}
// If Res is null, the sub-method took care of registering the result.
if (Res.getNode())
SetWidenedVector(SDValue(N, ResNo), Res);
}
SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
// Ternary op widening.
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
SDValue InOp3 = GetWidenedVector(N->getOperand(2));
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
}
SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
// Binary op widening.
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
// Binary op widening for operations that can trap.
unsigned Opcode = N->getOpcode();
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
const SDNodeFlags Flags = N->getFlags();
while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
// Operation doesn't trap so just widen as normal.
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
}
// No legal vector version so unroll the vector operation and then widen.
if (NumElts == 1)
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
// Since the operation can trap, apply operation on the original vector.
EVT MaxVT = VT;
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
SmallVector<SDValue, 16> ConcatOps(CurNumElts);
unsigned ConcatEnd = 0; // Current ConcatOps index.
int Idx = 0; // Current Idx into input vectors.
// NumElts := greatest legal vector size (at most WidenVT)
// while (orig. vector has unhandled elements) {
// take munches of size NumElts from the beginning and add to ConcatOps
// NumElts := next smaller supported vector size or 1
// }
while (CurNumElts != 0) {
while (CurNumElts >= NumElts) {
SDValue EOp1 = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
SDValue EOp2 = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
Idx += NumElts;
CurNumElts -= NumElts;
}
do {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
} while (!TLI.isTypeLegal(VT) && NumElts != 1);
if (NumElts == 1) {
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
SDValue EOp1 = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
SDValue EOp2 = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
EOp1, EOp2, Flags);
}
CurNumElts = 0;
}
}
// Check to see if we have a single operation with the widen type.
if (ConcatEnd == 1) {
VT = ConcatOps[0].getValueType();
if (VT == WidenVT)
return ConcatOps[0];
}
// while (Some element of ConcatOps is not of type MaxVT) {
// From the end of ConcatOps, collect elements of the same type and put
// them into an op of the next larger supported type
// }
while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
Idx = ConcatEnd - 1;
VT = ConcatOps[Idx--].getValueType();
while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
Idx--;
int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
EVT NextVT;
do {
NextSize *= 2;
NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
} while (!TLI.isTypeLegal(NextVT));
if (!VT.isVector()) {
// Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
SDValue VecOp = DAG.getUNDEF(NextVT);
unsigned NumToInsert = ConcatEnd - Idx - 1;
for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
VecOp = DAG.getNode(
ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
ConcatOps[Idx+1] = VecOp;
ConcatEnd = Idx + 2;
} else {
// Vector type, create a CONCAT_VECTORS of type NextVT
SDValue undefVec = DAG.getUNDEF(VT);
unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
unsigned RealVals = ConcatEnd - Idx - 1;
unsigned SubConcatEnd = 0;
unsigned SubConcatIdx = Idx + 1;
while (SubConcatEnd < RealVals)
SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
while (SubConcatEnd < OpsToConcat)
SubConcatOps[SubConcatEnd++] = undefVec;
ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
NextVT, SubConcatOps);
ConcatEnd = SubConcatIdx + 1;
}
}
// Check to see if we have a single operation with the widen type.
if (ConcatEnd == 1) {
VT = ConcatOps[0].getValueType();
if (VT == WidenVT)
return ConcatOps[0];
}
// add undefs of size MaxVT until ConcatOps grows to length of WidenVT
unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
if (NumOps != ConcatEnd ) {
SDValue UndefVal = DAG.getUNDEF(MaxVT);
for (unsigned j = ConcatEnd; j < NumOps; ++j)
ConcatOps[j] = UndefVal;
}
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
makeArrayRef(ConcatOps.data(), NumOps));
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue InOp = N->getOperand(0);
SDLoc DL(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
EVT InVT = InOp.getValueType();
EVT InEltVT = InVT.getVectorElementType();
EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
unsigned Opcode = N->getOpcode();
unsigned InVTNumElts = InVT.getVectorNumElements();
const SDNodeFlags Flags = N->getFlags();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
InVTNumElts = InVT.getVectorNumElements();
if (InVTNumElts == WidenNumElts) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
// If both input and result vector types are of same width, extend
// operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
// accepts fewer elements in the result than in the input.
if (Opcode == ISD::SIGN_EXTEND)
return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
if (Opcode == ISD::ZERO_EXTEND)
return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
}
}
if (TLI.isTypeLegal(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
// and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
if (WidenNumElts % InVTNumElts == 0) {
// Widen the input and call convert on the widened input vector.
unsigned NumConcat = WidenNumElts/InVTNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
Ops[0] = InOp;
SDValue UndefVal = DAG.getUNDEF(InVT);
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVec);
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
if (InVTNumElts % WidenNumElts == 0) {
SDValue InVal = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
// Extract the input and convert the shorten input vector.
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVal);
return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
}
}
// Otherwise unroll into some nasty scalar code and rebuild the vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = WidenVT.getVectorElementType();
unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
unsigned i;
for (i=0; i < MinElts; ++i) {
SDValue Val = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
if (N->getNumOperands() == 1)
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
else
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
return DAG.getBuildVector(WidenVT, DL, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDValue InOp = N->getOperand(0);
SDLoc DL(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT WidenSVT = WidenVT.getVectorElementType();
unsigned WidenNumElts = WidenVT.getVectorNumElements();
EVT InVT = InOp.getValueType();
EVT InSVT = InVT.getVectorElementType();
unsigned InVTNumElts = InVT.getVectorNumElements();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(InOp);
InVT = InOp.getValueType();
if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
switch (Opcode) {
case ISD::ANY_EXTEND_VECTOR_INREG:
return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT);
case ISD::SIGN_EXTEND_VECTOR_INREG:
return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
case ISD::ZERO_EXTEND_VECTOR_INREG:
return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
}
}
}
// Unroll, extend the scalars and rebuild the vector.
SmallVector<SDValue, 16> Ops;
for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp,
DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
switch (Opcode) {
case ISD::ANY_EXTEND_VECTOR_INREG:
Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val);
break;
case ISD::SIGN_EXTEND_VECTOR_INREG:
Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val);
break;
case ISD::ZERO_EXTEND_VECTOR_INREG:
Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val);
break;
default:
llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected");
}
Ops.push_back(Val);
}
while (Ops.size() != WidenNumElts)
Ops.push_back(DAG.getUNDEF(WidenSVT));
return DAG.getBuildVector(WidenVT, DL, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
// If this is an FCOPYSIGN with same input types, we can treat it as a
// normal (can trap) binary op.
if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
return WidenVecRes_BinaryCanTrap(N);
// If the types are different, fall back to unrolling.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
}
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
SDValue ShOp = N->getOperand(1);
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
}
SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
SDValue ShOp = N->getOperand(1);
EVT ShVT = ShOp.getValueType();
if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
ShOp = GetWidenedVector(ShOp);
ShVT = ShOp.getValueType();
}
EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
ShVT.getVectorElementType(),
WidenVT.getVectorNumElements());
if (ShVT != ShWidenVT)
ShOp = ModifyToType(ShOp, ShWidenVT);
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
}
SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
// Unary op widening.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
}
SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
cast<VTSDNode>(N->getOperand(1))->getVT()
.getVectorElementType(),
WidenVT.getVectorNumElements());
SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N),
WidenVT, WidenLHS, DAG.getValueType(ExtVT));
}
SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
return GetWidenedVector(WidenVec);
}
SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
EVT VT = N->getValueType(0);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDLoc dl(N);
switch (getTypeAction(InVT)) {
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
// If the incoming type is a vector that is being promoted, then
// we know that the elements are arranged differently and that we
// must perform the conversion using a stack slot.
if (InVT.isVector())
break;
// If the InOp is promoted to the same size, convert it. Otherwise,
// fall out of the switch and widen the promoted input.
InOp = GetPromotedInteger(InOp);
InVT = InOp.getValueType();
if (WidenVT.bitsEq(InVT))
return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
case TargetLowering::TypeSoftenFloat:
case TargetLowering::TypePromoteFloat:
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat:
case TargetLowering::TypeScalarizeVector:
case TargetLowering::TypeSplitVector:
break;
case TargetLowering::TypeWidenVector:
// If the InOp is widened to the same size, convert it. Otherwise, fall
// out of the switch and widen the widened input.
InOp = GetWidenedVector(InOp);
InVT = InOp.getValueType();
if (WidenVT.bitsEq(InVT))
// The input widens to the same size. Convert to the widen value.
return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
}
unsigned WidenSize = WidenVT.getSizeInBits();
unsigned InSize = InVT.getSizeInBits();
// x86mmx is not an acceptable vector element type, so don't try.
if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
// Determine new input vector type. The new input vector type will use
// the same element type (if its a vector) or use the input type as a
// vector. It is the same size as the type to widen to.
EVT NewInVT;
unsigned NewNumElts = WidenSize / InSize;
if (InVT.isVector()) {
EVT InEltVT = InVT.getVectorElementType();
NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
WidenSize / InEltVT.getSizeInBits());
} else {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
}
if (TLI.isTypeLegal(NewInVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
// and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
SmallVector<SDValue, 16> Ops(NewNumElts);
SDValue UndefVal = DAG.getUNDEF(InVT);
Ops[0] = InOp;
for (unsigned i = 1; i < NewNumElts; ++i)
Ops[i] = UndefVal;
SDValue NewVec;
if (InVT.isVector())
NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
else
NewVec = DAG.getBuildVector(NewInVT, dl, Ops);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
}
}
return CreateStackStoreLoad(InOp, WidenVT);
}
SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
SDLoc dl(N);
// Build a vector with undefined for the new nodes.
EVT VT = N->getValueType(0);
// Integer BUILD_VECTOR operands may be larger than the node's vector element
// type. The UNDEFs need to have the same type as the existing operands.
EVT EltVT = N->getOperand(0).getValueType();
unsigned NumElts = VT.getVectorNumElements();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
return DAG.getBuildVector(WidenVT, dl, NewOps);
}
SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
EVT InVT = N->getOperand(0).getValueType();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
unsigned WidenNumElts = WidenVT.getVectorNumElements();
unsigned NumInElts = InVT.getVectorNumElements();
unsigned NumOperands = N->getNumOperands();
bool InputWidened = false; // Indicates we need to widen the input.
if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
// Add undef vectors to widen to correct length.
unsigned NumConcat = WidenVT.getVectorNumElements() /
InVT.getVectorNumElements();
SDValue UndefVal = DAG.getUNDEF(InVT);
SmallVector<SDValue, 16> Ops(NumConcat);
for (unsigned i=0; i < NumOperands; ++i)
Ops[i] = N->getOperand(i);
for (unsigned i = NumOperands; i != NumConcat; ++i)
Ops[i] = UndefVal;
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops);
}
} else {
InputWidened = true;
if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
// The inputs and the result are widen to the same value.
unsigned i;
for (i=1; i < NumOperands; ++i)
if (!N->getOperand(i).isUndef())
break;
if (i == NumOperands)
// Everything but the first operand is an UNDEF so just return the
// widened first operand.
return GetWidenedVector(N->getOperand(0));
if (NumOperands == 2) {
// Replace concat of two operands with a shuffle.
SmallVector<int, 16> MaskOps(WidenNumElts, -1);
for (unsigned i = 0; i < NumInElts; ++i) {
MaskOps[i] = i;
MaskOps[i + NumInElts] = i + WidenNumElts;
}
return DAG.getVectorShuffle(WidenVT, dl,
GetWidenedVector(N->getOperand(0)),
GetWidenedVector(N->getOperand(1)),
MaskOps);
}
}
}
// Fall back to use extracts and build vector.
EVT EltVT = WidenVT.getVectorElementType();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Idx = 0;
for (unsigned i=0; i < NumOperands; ++i) {
SDValue InOp = N->getOperand(i);
if (InputWidened)
InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SDValue InOp = N->getOperand(0);
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
// Check if we can just return the input vector after widening.
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
if (IdxVal == 0 && InVT == WidenVT)
return InOp;
// Check if we can extract from the vector.
unsigned InNumElts = InVT.getVectorNumElements();
if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
for (i=0; i < NumElts; ++i)
Ops[i] =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(IdxVal + i, dl,
TLI.getVectorIdxTy(DAG.getDataLayout())));
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
SDValue InOp = GetWidenedVector(N->getOperand(0));
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
InOp.getValueType(), InOp,
N->getOperand(1), N->getOperand(2));
}
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
SDValue Result;
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
if (ExtType != ISD::NON_EXTLOAD)
Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
else
Result = GenWidenVectorLoads(LdChain, LD);
// If we generate a single load, we can use that for the chain. Otherwise,
// build a factor node to remember the multiple loads are independent and
// chain to that.
SDValue NewChain;
if (LdChain.size() == 1)
NewChain = LdChain[0];
else
NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
// Modified the chain - switch anything that used the old chain to use
// the new one.
ReplaceValueWith(SDValue(N, 1), NewChain);
return Result;
}
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
SDValue Src0 = GetWidenedVector(N->getSrc0());
ISD::LoadExtType ExtType = N->getExtensionType();
SDLoc dl(N);
if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
Mask = GetWidenedVector(Mask);
else {
EVT BoolVT = getSetCCResultType(WidenVT);
// We can't use ModifyToType() because we should fill the mask with
// zeroes
unsigned WidenNumElts = BoolVT.getVectorNumElements();
unsigned MaskNumElts = MaskVT.getVectorNumElements();
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
}
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
Mask, Src0, N->getMemoryVT(),
N->getMemOperand(), ExtType,
N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Mask = N->getMask();
SDValue Src0 = GetWidenedVector(N->getValue());
unsigned NumElts = WideVT.getVectorNumElements();
SDLoc dl(N);
// The mask should be widened as well
Mask = WidenTargetBoolean(Mask, WideVT, true);
// Widen the Index operand
SDValue Index = N->getIndex();
EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
Index.getValueType().getScalarType(),
NumElts);
Index = ModifyToType(Index, WideIndexVT);
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
N->getMemOperand());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N),
WidenVT, N->getOperand(0));
}
// Return true if this is a node that could have two SETCCs as operands.
static inline bool isLogicalMaskOp(unsigned Opcode) {
switch (Opcode) {
case ISD::AND:
case ISD::OR:
case ISD::XOR:
return true;
}
return false;
}
// This is used just for the assert in convertMask(). Check that this either
// a SETCC or a previously handled SETCC by convertMask().
#ifndef NDEBUG
static inline bool isSETCCorConvertedSETCC(SDValue N) {
if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
N = N.getOperand(0);
else if (N.getOpcode() == ISD::CONCAT_VECTORS) {
for (unsigned i = 1; i < N->getNumOperands(); ++i)
if (!N->getOperand(i)->isUndef())
return false;
N = N.getOperand(0);
}
if (N.getOpcode() == ISD::TRUNCATE)
N = N.getOperand(0);
else if (N.getOpcode() == ISD::SIGN_EXTEND)
N = N.getOperand(0);
if (isLogicalMaskOp(N.getOpcode()))
return isSETCCorConvertedSETCC(N.getOperand(0)) &&
isSETCCorConvertedSETCC(N.getOperand(1));
return (N.getOpcode() == ISD::SETCC ||
ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
}
#endif
// Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT
// to ToMaskVT if needed with vector extension or truncation.
SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
EVT ToMaskVT) {
// Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
// FIXME: This code seems to be too restrictive, we might consider
// generalizing it or dropping it.
assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
// Make a new Mask node, with a legal result VT.
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0; i < InMask->getNumOperands(); ++i)
Ops.push_back(InMask->getOperand(i));
SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
// If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
// extend or truncate is needed.
LLVMContext &Ctx = *DAG.getContext();
unsigned MaskScalarBits = MaskVT.getScalarSizeInBits();
unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits();
if (MaskScalarBits < ToMaskScalBits) {
EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
MaskVT.getVectorNumElements());
Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask);
} else if (MaskScalarBits > ToMaskScalBits) {
EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
MaskVT.getVectorNumElements());
Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask);
}
assert(Mask->getValueType(0).getScalarSizeInBits() ==
ToMaskVT.getScalarSizeInBits() &&
"Mask should have the right element size by now.");
// Adjust Mask to the right number of elements.
unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy);
Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask,
ZeroIdx);
} else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
EVT SubVT = Mask->getValueType(0);
SmallVector<SDValue, 16> SubConcatOps(NumSubVecs);
SubConcatOps[0] = Mask;
for (unsigned i = 1; i < NumSubVecs; ++i)
SubConcatOps[i] = DAG.getUNDEF(SubVT);
Mask =
DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubConcatOps);
}
assert((Mask->getValueType(0) == ToMaskVT) &&
"A mask of ToMaskVT should have been produced by now.");
return Mask;
}
// Get the target mask VT, and widen if needed.
EVT DAGTypeLegalizer::getSETCCWidenedResultTy(SDValue SetCC) {
assert(SetCC->getOpcode() == ISD::SETCC);
LLVMContext &Ctx = *DAG.getContext();
EVT MaskVT = getSetCCResultType(SetCC->getOperand(0).getValueType());
if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
MaskVT = TLI.getTypeToTransformTo(Ctx, MaskVT);
return MaskVT;
}
// This method tries to handle VSELECT and its mask by legalizing operands
// (which may require widening) and if needed adjusting the mask vector type
// to match that of the VSELECT. Without it, many cases end up with
// scalarization of the SETCC, with many unnecessary instructions.
SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
LLVMContext &Ctx = *DAG.getContext();
SDValue Cond = N->getOperand(0);
if (N->getOpcode() != ISD::VSELECT)
return SDValue();
if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode()))
return SDValue();
// If this is a splitted VSELECT that was previously already handled, do
// nothing.
if (Cond->getValueType(0).getScalarSizeInBits() != 1)
return SDValue();
EVT VSelVT = N->getValueType(0);
// Only handle vector types which are a power of 2.
if (!isPowerOf2_64(VSelVT.getSizeInBits()))
return SDValue();
// Don't touch if this will be scalarized.
EVT FinalVT = VSelVT;
while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx);
if (FinalVT.getVectorNumElements() == 1)
return SDValue();
// If there is support for an i1 vector mask, don't touch.
if (Cond.getOpcode() == ISD::SETCC) {
EVT SetCCOpVT = Cond->getOperand(0).getValueType();
while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
if (SetCCResVT.getScalarSizeInBits() == 1)
return SDValue();
}
// Get the VT and operands for VSELECT, and widen if needed.
SDValue VSelOp1 = N->getOperand(1);
SDValue VSelOp2 = N->getOperand(2);
if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) {
VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
VSelOp1 = GetWidenedVector(VSelOp1);
VSelOp2 = GetWidenedVector(VSelOp2);
}
// The mask of the VSELECT should have integer elements.
EVT ToMaskVT = VSelVT;
if (!ToMaskVT.getScalarType().isInteger())
ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
SDValue Mask;
if (Cond->getOpcode() == ISD::SETCC) {
EVT MaskVT = getSETCCWidenedResultTy(Cond);
Mask = convertMask(Cond, MaskVT, ToMaskVT);
} else if (isLogicalMaskOp(Cond->getOpcode()) &&
Cond->getOperand(0).getOpcode() == ISD::SETCC &&
Cond->getOperand(1).getOpcode() == ISD::SETCC) {
// Cond is (AND/OR/XOR (SETCC, SETCC))
SDValue SETCC0 = Cond->getOperand(0);
SDValue SETCC1 = Cond->getOperand(1);
EVT VT0 = getSETCCWidenedResultTy(SETCC0);
EVT VT1 = getSETCCWidenedResultTy(SETCC1);
unsigned ScalarBits0 = VT0.getScalarSizeInBits();
unsigned ScalarBits1 = VT1.getScalarSizeInBits();
unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
EVT MaskVT;
// If the two SETCCs have different VTs, either extend/truncate one of
// them to the other "towards" ToMaskVT, or truncate one and extend the
// other to ToMaskVT.
if (ScalarBits0 != ScalarBits1) {
EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1);
EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0);
if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits())
MaskVT = WideVT;
else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits())
MaskVT = NarrowVT;
else
MaskVT = ToMaskVT;
} else
// If the two SETCCs have the same VT, don't change it.
MaskVT = VT0;
// Make new SETCCs and logical nodes.
SETCC0 = convertMask(SETCC0, VT0, MaskVT);
SETCC1 = convertMask(SETCC1, VT1, MaskVT);
Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1);
// Convert the logical op for VSELECT if needed.
Mask = convertMask(Cond, MaskVT, ToMaskVT);
} else
return SDValue();
return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2);
}
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
if (CondVT.isVector()) {
if (SDValue Res = WidenVSELECTAndMask(N))
return Res;
EVT CondEltVT = CondVT.getVectorElementType();
EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
CondEltVT, WidenNumElts);
if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
Cond1 = GetWidenedVector(Cond1);
// If we have to split the condition there is no point in widening the
// select. This would result in an cycle of widening the select ->
// widening the condition operand -> splitting the condition operand ->
// splitting the select -> widening the select. Instead split this select
// further and widen the resulting type.
if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
SDValue Res = ModifyToType(SplitSelect, WidenVT);
return Res;
}
if (Cond1.getValueType() != CondWidenVT)
Cond1 = ModifyToType(Cond1, CondWidenVT);
}
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDValue InOp2 = GetWidenedVector(N->getOperand(2));
assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
return DAG.getNode(N->getOpcode(), SDLoc(N),
WidenVT, Cond1, InOp1, InOp2);
}
SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(2));
SDValue InOp2 = GetWidenedVector(N->getOperand(3));
return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
InOp1.getValueType(), N->getOperand(0),
N->getOperand(1), InOp1, InOp2, N->getOperand(4));
}
SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
assert(N->getValueType(0).isVector() ==
N->getOperand(0).getValueType().isVector() &&
"Scalar/Vector type mismatch");
if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT,
InOp1, InOp2, N->getOperand(2));
}
SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
return DAG.getUNDEF(WidenVT);
}
SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
unsigned NumElts = VT.getVectorNumElements();
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
// Adjust mask based on new input vector length.
SmallVector<int, 16> NewMask;
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = N->getMaskElt(i);
if (Idx < (int)NumElts)
NewMask.push_back(Idx);
else
NewMask.push_back(Idx - NumElts + WidenNumElts);
}
for (unsigned i = NumElts; i != WidenNumElts; ++i)
NewMask.push_back(-1);
return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
}
SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operands must be vectors");
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SDValue InOp1 = N->getOperand(0);
EVT InVT = InOp1.getValueType();
assert(InVT.isVector() && "can not widen non-vector type");
EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
InVT.getVectorElementType(), WidenNumElts);
// The input and output types often differ here, and it could be that while
// we'd prefer to widen the result type, the input operands have been split.
// In this case, we also need to split the result of this node as well.
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
return Res;
}
InOp1 = GetWidenedVector(InOp1);
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
// Assume that the input and output will be widen appropriately. If not,
// we will have to unroll it at some point.
assert(InOp1.getValueType() == WidenInVT &&
InOp2.getValueType() == WidenInVT &&
"Input not widened to expected type!");
(void)WidenInVT;
return DAG.getNode(ISD::SETCC, SDLoc(N),
WidenVT, InOp1, InOp2, N->getOperand(2));
}
//===----------------------------------------------------------------------===//
// Widen Vector Operand
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
N->dump(&DAG);
dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom widen this node.
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
return false;
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
N->dump(&DAG);
dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to widen this operator's operand!");
case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
Res = WidenVecOp_EXTEND(N);
break;
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::TRUNCATE:
Res = WidenVecOp_Convert(N);
break;
}
// If Res is null, the sub-method took care of registering the result.
if (!Res.getNode()) return false;
// If the result is N, the sub-method updated N in place. Tell the legalizer
// core about this.
if (Res.getNode() == N)
return true;
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
}
SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue InOp = N->getOperand(0);
// If some legalization strategy other than widening is used on the operand,
// we can't safely assume that just extending the low lanes is the correct
// transformation.
if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
return WidenVecOp_Convert(N);
InOp = GetWidenedVector(InOp);
assert(VT.getVectorNumElements() <
InOp.getValueType().getVectorNumElements() &&
"Input wasn't widened!");
// We may need to further widen the operand until it has the same total
// vector size as the result.
EVT InVT = InOp.getValueType();
if (InVT.getSizeInBits() != VT.getSizeInBits()) {
EVT InEltVT = InVT.getVectorElementType();
for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
EVT FixedVT = (MVT::SimpleValueType)i;
EVT FixedEltVT = FixedVT.getVectorElementType();
if (TLI.isTypeLegal(FixedVT) &&
FixedVT.getSizeInBits() == VT.getSizeInBits() &&
FixedEltVT == InEltVT) {
assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
"Not enough elements in the fixed type for the operand!");
assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
"We can't have the same type as we started with!");
if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
InOp = DAG.getNode(
ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
else
InOp = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
break;
}
}
InVT = InOp.getValueType();
if (InVT.getSizeInBits() != VT.getSizeInBits())
// We couldn't find a legal vector type that was a widening of the input
// and could be extended in-register to the result type, so we have to
// scalarize.
return WidenVecOp_Convert(N);
}
// Use special DAG nodes to represent the operation of extending the
// low lanes.
switch (N->getOpcode()) {
default:
llvm_unreachable("Extend legalization on on extend operation!");
case ISD::ANY_EXTEND:
return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
case ISD::SIGN_EXTEND:
return DAG.getSignExtendVectorInReg(InOp, DL, VT);
case ISD::ZERO_EXTEND:
return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
}
}
SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
// The result (and first input) is legal, but the second input is illegal.
// We can't do much to fix that, so just unroll and let the extracts off of
// the second input be widened as needed later.
return DAG.UnrollVectorOp(N);
}
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal, it is unlikely that we
// can fix the input to a legal type so unroll the convert into some scalar
// code and create a nasty build vector.
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
unsigned NumElts = VT.getVectorNumElements();
SDValue InOp = N->getOperand(0);
if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
EVT InEltVT = InVT.getVectorElementType();
unsigned Opcode = N->getOpcode();
SmallVector<SDValue, 16> Ops(NumElts);
for (unsigned i=0; i < NumElts; ++i)
Ops[i] = DAG.getNode(
Opcode, dl, EltVT,
DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue InOp = GetWidenedVector(N->getOperand(0));
EVT InWidenVT = InOp.getValueType();
SDLoc dl(N);
// Check if we can convert between two legal vector types and extract.
unsigned InWidenSize = InWidenVT.getSizeInBits();
unsigned Size = VT.getSizeInBits();
// x86mmx is not an acceptable vector element type, so don't try.
if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
unsigned NewNumElts = InWidenSize / Size;
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
return DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
}
return CreateStackStoreLoad(InOp, VT);
}
SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
// If the input vector is not legal, it is likely that we will not find a
// legal vector of the same size. Replace the concatenate vector with a
// nasty build vector.
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(NumElts);
EVT InVT = N->getOperand(0).getValueType();
unsigned NumInElts = InVT.getVectorNumElements();
unsigned Idx = 0;
unsigned NumOperands = N->getNumOperands();
for (unsigned i=0; i < NumOperands; ++i) {
SDValue InOp = N->getOperand(i);
if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue InOp = GetWidenedVector(N->getOperand(0));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
N->getValueType(0), InOp, N->getOperand(1));
}
SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue InOp = GetWidenedVector(N->getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
N->getValueType(0), InOp, N->getOperand(1));
}
SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
// We have to widen the value, but we want only to store the original
// vector type.
StoreSDNode *ST = cast<StoreSDNode>(N);
SmallVector<SDValue, 16> StChain;
if (ST->isTruncatingStore())
GenWidenVectorTruncStores(StChain, ST);
else
GenWidenVectorStores(StChain, ST);
if (StChain.size() == 1)
return StChain[0];
else
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
EVT MaskVT = Mask.getValueType();
SDValue StVal = MST->getValue();
// Widen the value
SDValue WideVal = GetWidenedVector(StVal);
SDLoc dl(N);
if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
Mask = GetWidenedVector(Mask);
else {
// The mask should be widened as well.
EVT BoolVT = getSetCCResultType(WideVal.getValueType());
// We can't use ModifyToType() because we should fill the mask with
// zeroes.
unsigned WidenNumElts = BoolVT.getVectorNumElements();
unsigned MaskNumElts = MaskVT.getVectorNumElements();
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
}
assert(Mask.getValueType().getVectorNumElements() ==
WideVal.getValueType().getVectorNumElements() &&
"Mask and data vectors should have the same number of elements");
return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
Mask, MST->getMemoryVT(), MST->getMemOperand(),
false, MST->isCompressingStore());
}
SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
assert(OpNo == 1 && "Can widen only data operand of mscatter");
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue DataOp = MSC->getValue();
SDValue Mask = MSC->getMask();
// Widen the value.
SDValue WideVal = GetWidenedVector(DataOp);
EVT WideVT = WideVal.getValueType();
unsigned NumElts = WideVal.getValueType().getVectorNumElements();
SDLoc dl(N);
// The mask should be widened as well.
Mask = WidenTargetBoolean(Mask, WideVT, true);
// Widen index.
SDValue Index = MSC->getIndex();
EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
Index.getValueType().getScalarType(),
NumElts);
Index = ModifyToType(Index, WideIndexVT);
SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
MSC->getMemoryVT(), dl, Ops,
MSC->getMemOperand());
}
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDLoc dl(N);
// WARNING: In this code we widen the compare instruction with garbage.
// This garbage may contain denormal floats which may be slow. Is this a real
// concern ? Should we zero the unused lanes if this is a float compare ?
// Get a new SETCC node to compare the newly widened operands.
// Only some of the compared elements are legal.
EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
InOp0.getValueType());
SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
SVT, InOp0, InOp1, N->getOperand(2));
// Extract the needed results from the result vector.
EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
SVT.getVectorElementType(),
N->getValueType(0).getVectorNumElements());
SDValue CC = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
return PromoteTargetBoolean(CC, N->getValueType(0));
}
//===----------------------------------------------------------------------===//
// Vector Widening Utilities
//===----------------------------------------------------------------------===//
// Utility function to find the type to chop up a widen vector for load/store
// TLI: Target lowering used to determine legal types.
// Width: Width left need to load/store.
// WidenVT: The widen vector type to load to/store from
// Align: If 0, don't allow use of a wider type
// WidenEx: If Align is not 0, the amount additional we can load/store from.
static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
unsigned Width, EVT WidenVT,
unsigned Align = 0, unsigned WidenEx = 0) {
EVT WidenEltVT = WidenVT.getVectorElementType();
unsigned WidenWidth = WidenVT.getSizeInBits();
unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
unsigned AlignInBits = Align*8;
// If we have one element to load/store, return it.
EVT RetVT = WidenEltVT;
if (Width == WidenEltWidth)
return RetVT;
// See if there is larger legal integer than the element type to load/store.
unsigned VT;
for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
EVT MemVT((MVT::SimpleValueType) VT);
unsigned MemVTWidth = MemVT.getSizeInBits();
if (MemVT.getSizeInBits() <= WidenEltWidth)
break;
auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
if ((Action == TargetLowering::TypeLegal ||
Action == TargetLowering::TypePromoteInteger) &&
(WidenWidth % MemVTWidth) == 0 &&
isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
RetVT = MemVT;
break;
}
}
// See if there is a larger vector type to load/store that has the same vector
// element type and is evenly divisible with the WidenVT.
for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
unsigned MemVTWidth = MemVT.getSizeInBits();
if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
return MemVT;
}
}
return RetVT;
}
// Builds a vector type from scalar loads
// VecTy: Resulting Vector type
// LDOps: Load operators to build a vector type
// [Start,End) the list of loads to use.
static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
SmallVectorImpl<SDValue> &LdOps,
unsigned Start, unsigned End) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc dl(LdOps[Start]);
EVT LdTy = LdOps[Start].getValueType();
unsigned Width = VecTy.getSizeInBits();
unsigned NumElts = Width / LdTy.getSizeInBits();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
unsigned Idx = 1;
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
for (unsigned i = Start + 1; i != End; ++i) {
EVT NewLdTy = LdOps[i].getValueType();
if (NewLdTy != LdTy) {
NumElts = Width / NewLdTy.getSizeInBits();
NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
// Readjust position and vector position based on new load type.
Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
LdTy = NewLdTy;
}
VecOp = DAG.getNode(
ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
LoadSDNode *LD) {
// The strategy assumes that we can efficiently load power-of-two widths.
// The routine chops the vector into the largest vector loads with the same
// element type or scalar loads and then recombines it to the widen vector
// type.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
unsigned WidenWidth = WidenVT.getSizeInBits();
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
unsigned Align = LD->getAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth;
unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads.
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
Align, MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
if (LdWidth <= NewVTWidth) {
if (!NewVT.isVector()) {
unsigned NumElts = WidenWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
if (NewVT == WidenVT)
return LdOp;
assert(WidenWidth % NewVTWidth == 0);
unsigned NumConcat = WidenWidth / NewVTWidth;
SmallVector<SDValue, 16> ConcatOps(NumConcat);
SDValue UndefVal = DAG.getUNDEF(NewVT);
ConcatOps[0] = LdOp;
for (unsigned i = 1; i != NumConcat; ++i)
ConcatOps[i] = UndefVal;
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
}
// Load vector by using multiple loads from largest vector to scalar.
SmallVector<SDValue, 16> LdOps;
LdOps.push_back(LdOp);
LdWidth -= NewVTWidth;
unsigned Offset = 0;
while (LdWidth > 0) {
unsigned Increment = NewVTWidth / 8;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getConstant(Increment, dl, BasePtr.getValueType()));
SDValue L;
if (LdWidth < NewVTWidth) {
// The current type we are using is too large. Find a better size.
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
NewVTWidth = NewVT.getSizeInBits();
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
MinAlign(Align, Increment), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
// Later code assumes the vector loads produced will be mergeable, so we
// must pad the final entry up to the previous width. Scalars are
// combined separately.
SmallVector<SDValue, 16> Loads;
Loads.push_back(L);
unsigned size = L->getValueSizeInBits(0);
while (size < LdOp->getValueSizeInBits(0)) {
Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
size += L->getValueSizeInBits(0);
}
L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
}
} else {
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
MinAlign(Align, Increment), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
}
LdOps.push_back(L);
LdWidth -= NewVTWidth;
}
// Build the vector from the load operations.
unsigned End = LdOps.size();
if (!LdOps[0].getValueType().isVector())
// All the loads are scalar loads.
return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
// If the load contains vectors, build the vector using concat vector.
// All of the vectors used to load are power-of-2, and the scalar loads can be
// combined to make a power-of-2 vector.
SmallVector<SDValue, 16> ConcatOps(End);
int i = End - 1;
int Idx = End;
EVT LdTy = LdOps[i].getValueType();
// First, combine the scalar loads to a vector.
if (!LdTy.isVector()) {
for (--i; i >= 0; --i) {
LdTy = LdOps[i].getValueType();
if (LdTy.isVector())
break;
}
ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
}
ConcatOps[--Idx] = LdOps[i];
for (--i; i >= 0; --i) {
EVT NewLdTy = LdOps[i].getValueType();
if (NewLdTy != LdTy) {
// Create a larger vector.
ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
makeArrayRef(&ConcatOps[Idx], End - Idx));
Idx = End - 1;
LdTy = NewLdTy;
}
ConcatOps[--Idx] = LdOps[i];
}
if (WidenWidth == LdTy.getSizeInBits() * (End - Idx))
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
makeArrayRef(&ConcatOps[Idx], End - Idx));
// We need to fill the rest with undefs to build the vector.
unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
SmallVector<SDValue, 16> WidenOps(NumOps);
SDValue UndefVal = DAG.getUNDEF(LdTy);
{
unsigned i = 0;
for (; i != End-Idx; ++i)
WidenOps[i] = ConcatOps[Idx+i];
for (; i != NumOps; ++i)
WidenOps[i] = UndefVal;
}
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps);
}
SDValue
DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
LoadSDNode *LD,
ISD::LoadExtType ExtType) {
// For extension loads, it may not be more efficient to chop up the vector
// and then extend it. Instead, we unroll the load and build a new vector.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
unsigned Align = LD->getAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
unsigned NumElts = LdVT.getVectorNumElements();
// Load each element and widen.
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Increment = LdEltVT.getSizeInBits() / 8;
Ops[0] =
DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(),
LdEltVT, Align, MMOFlags, AAInfo);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
BasePtr,
DAG.getConstant(Offset, dl,
BasePtr.getValueType()));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
Align, MMOFlags, AAInfo);
LdChain.push_back(Ops[i].getValue(1));
}
// Fill the rest with undefs.
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i != WidenNumElts; ++i)
Ops[i] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
}
void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
// The strategy assumes that we can efficiently store power-of-two widths.
// The routine chops the vector into the largest vector stores with the same
// element type or scalar stores.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
unsigned Align = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
EVT StVT = ST->getMemoryVT();
unsigned StWidth = StVT.getSizeInBits();
EVT ValVT = ValOp.getValueType();
unsigned ValWidth = ValVT.getSizeInBits();
EVT ValEltVT = ValVT.getVectorElementType();
unsigned ValEltWidth = ValEltVT.getSizeInBits();
assert(StVT.getVectorElementType() == ValEltVT);
int Idx = 0; // current index to store
unsigned Offset = 0; // offset from base to store
while (StWidth != 0) {
// Find the largest vector type we can store with.
EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
unsigned NewVTWidth = NewVT.getSizeInBits();
unsigned Increment = NewVTWidth / 8;
if (NewVT.isVector()) {
unsigned NumVTElts = NewVT.getVectorNumElements();
do {
SDValue EOp = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
StChain.push_back(DAG.getStore(
Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
MinAlign(Align, Offset), MMOFlags, AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
Idx += NumVTElts;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getConstant(Increment, dl,
BasePtr.getValueType()));
} while (StWidth != 0 && StWidth >= NewVTWidth);
} else {
// Cast the vector to the scalar type we can store.
unsigned NumElts = ValWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
// Readjust index position based on new vector type.
Idx = Idx * ValEltWidth / NewVTWidth;
do {
SDValue EOp = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getConstant(Idx++, dl,
TLI.getVectorIdxTy(DAG.getDataLayout())));
StChain.push_back(DAG.getStore(
Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
MinAlign(Align, Offset), MMOFlags, AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getConstant(Increment, dl,
BasePtr.getValueType()));
} while (StWidth != 0 && StWidth >= NewVTWidth);
// Restore index back to be relative to the original widen element type.
Idx = Idx * NewVTWidth / ValEltWidth;
}
}
}
void
DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
// For extension loads, it may not be more efficient to truncate the vector
// and then store it. Instead, we extract each element and then store it.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
unsigned Align = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
EVT StVT = ST->getMemoryVT();
EVT ValVT = ValOp.getValueType();
// It must be true that the wide vector type is bigger than where we need to
// store.
assert(StVT.isVector() && ValOp.getValueType().isVector());
assert(StVT.bitsLT(ValOp.getValueType()));
// For truncating stores, we can not play the tricks of chopping legal vector
// types and bitcast it to the right type. Instead, we unroll the store.
EVT StEltVT = StVT.getVectorElementType();
EVT ValEltVT = ValVT.getVectorElementType();
unsigned Increment = ValEltVT.getSizeInBits() / 8;
unsigned NumElts = StVT.getVectorNumElements();
SDValue EOp = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo(), StEltVT, Align,
MMOFlags, AAInfo));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
BasePtr,
DAG.getConstant(Offset, dl,
BasePtr.getValueType()));
SDValue EOp = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
StChain.push_back(DAG.getTruncStore(
Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset),
StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo));
}
}
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
/// FillWithZeroes specifies that the vector should be widened with zeroes.
SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
bool FillWithZeroes) {
// Note that InOp might have been widened so it might already have
// the right width or it might need be narrowed.
EVT InVT = InOp.getValueType();
assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
"input and widen element type must match");
SDLoc dl(InOp);
// Check if InOp already has the right width.
if (InVT == NVT)
return InOp;
unsigned InNumElts = InVT.getVectorNumElements();
unsigned WidenNumElts = NVT.getVectorNumElements();
if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
unsigned NumConcat = WidenNumElts / InNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
DAG.getUNDEF(InVT);
Ops[0] = InOp;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = FillVal;
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
}
if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
// Fall back to extract and build.
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = NVT.getVectorElementType();
unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
unsigned Idx;
for (Idx = 0; Idx < MinNumElts; ++Idx)
Ops[Idx] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
DAG.getUNDEF(EltVT);
for ( ; Idx < WidenNumElts; ++Idx)
Ops[Idx] = FillVal;
return DAG.getBuildVector(NVT, dl, Ops);
}
Index: head/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- head/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (revision 322854)
+++ head/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (revision 322855)
@@ -1,7958 +1,7961 @@
//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements the SelectionDAG class.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
#include "SDNodeDbgValue.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <limits>
#include <set>
#include <string>
#include <utility>
#include <vector>
using namespace llvm;
/// makeVTList - Return an instance of the SDVTList struct initialized with the
/// specified members.
static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
SDVTList Res = {VTs, NumVTs};
return Res;
}
// Default null implementations of the callbacks.
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
//===----------------------------------------------------------------------===//
// ConstantFPSDNode Class
//===----------------------------------------------------------------------===//
/// isExactlyValue - We don't rely on operator== working on double values, as
/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
/// As such, this method can be used to do an exact bit-for-bit comparison of
/// two floating point values.
bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
return getValueAPF().bitwiseIsEqual(V);
}
bool ConstantFPSDNode::isValueValidForType(EVT VT,
const APFloat& Val) {
assert(VT.isFloatingPoint() && "Can only convert between FP types");
// convert modifies in place, so make a copy.
APFloat Val2 = APFloat(Val);
bool losesInfo;
(void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
APFloat::rmNearestTiesToEven,
&losesInfo);
return !losesInfo;
}
//===----------------------------------------------------------------------===//
// ISD Namespace
//===----------------------------------------------------------------------===//
-bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
+ bool AllowShrink) {
auto *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
APInt SplatUndef;
unsigned SplatBitSize;
bool HasUndefs;
- EVT EltVT = N->getValueType(0).getVectorElementType();
- return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) &&
- EltVT.getSizeInBits() >= SplatBitSize;
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ unsigned MinSplatBits = AllowShrink ? 0 : EltSize;
+ return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
+ MinSplatBits) &&
+ EltSize >= SplatBitSize;
}
// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
// specializations of the more general isConstantSplatVector()?
bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
unsigned i = 0, e = N->getNumOperands();
// Skip over all of the undef values.
while (i != e && N->getOperand(i).isUndef())
++i;
// Do not accept an all-undef vector.
if (i == e) return false;
// Do not accept build_vectors that aren't all constants or which have non-~0
// elements. We have to be a bit careful here, as the type of the constant
// may not be the same as the type of the vector elements due to type
// legalization (the elements are promoted to a legal type for the target and
// a vector of a type may be legal when the base element type is not).
// We only want to check enough bits to cover the vector elements, because
// we care if the resultant vector is all ones, not whether the individual
// constants are.
SDValue NotZero = N->getOperand(i);
unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
if (CN->getAPIntValue().countTrailingOnes() < EltSize)
return false;
} else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
return false;
} else
return false;
// Okay, we have at least one ~0 value, check to see if the rest match or are
// undefs. Even with the above element type twiddling, this should be OK, as
// the same type legalization should have applied to all the elements.
for (++i; i != e; ++i)
if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef())
return false;
return true;
}
bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
bool IsAllUndef = true;
for (const SDValue &Op : N->op_values()) {
if (Op.isUndef())
continue;
IsAllUndef = false;
// Do not accept build_vectors that aren't all constants or which have non-0
// elements. We have to be a bit careful here, as the type of the constant
// may not be the same as the type of the vector elements due to type
// legalization (the elements are promoted to a legal type for the target
// and a vector of a type may be legal when the base element type is not).
// We only want to check enough bits to cover the vector elements, because
// we care if the resultant vector is all zeros, not whether the individual
// constants are.
unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
if (CN->getAPIntValue().countTrailingZeros() < EltSize)
return false;
} else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) {
if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
return false;
} else
return false;
}
// Do not accept an all-undef vector.
if (IsAllUndef)
return false;
return true;
}
bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
for (const SDValue &Op : N->op_values()) {
if (Op.isUndef())
continue;
if (!isa<ConstantSDNode>(Op))
return false;
}
return true;
}
bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
for (const SDValue &Op : N->op_values()) {
if (Op.isUndef())
continue;
if (!isa<ConstantFPSDNode>(Op))
return false;
}
return true;
}
bool ISD::allOperandsUndef(const SDNode *N) {
// Return false if the node has no operands.
// This is "logically inconsistent" with the definition of "all" but
// is probably the desired behavior.
if (N->getNumOperands() == 0)
return false;
for (const SDValue &Op : N->op_values())
if (!Op.isUndef())
return false;
return true;
}
ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
case ISD::SEXTLOAD:
return ISD::SIGN_EXTEND;
case ISD::ZEXTLOAD:
return ISD::ZERO_EXTEND;
default:
break;
}
llvm_unreachable("Invalid LoadExtType");
}
ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
// To perform this operation, we just need to swap the L and G bits of the
// operation.
unsigned OldL = (Operation >> 2) & 1;
unsigned OldG = (Operation >> 1) & 1;
return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
(OldL << 1) | // New G bit
(OldG << 2)); // New L bit.
}
ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
unsigned Operation = Op;
if (isInteger)
Operation ^= 7; // Flip L, G, E bits, but not U.
else
Operation ^= 15; // Flip all of the condition bits.
if (Operation > ISD::SETTRUE2)
Operation &= ~8; // Don't let N and U bits get set.
return ISD::CondCode(Operation);
}
/// For an integer comparison, return 1 if the comparison is a signed operation
/// and 2 if the result is an unsigned comparison. Return zero if the operation
/// does not depend on the sign of the input (setne and seteq).
static int isSignedOp(ISD::CondCode Opcode) {
switch (Opcode) {
default: llvm_unreachable("Illegal integer setcc operation!");
case ISD::SETEQ:
case ISD::SETNE: return 0;
case ISD::SETLT:
case ISD::SETLE:
case ISD::SETGT:
case ISD::SETGE: return 1;
case ISD::SETULT:
case ISD::SETULE:
case ISD::SETUGT:
case ISD::SETUGE: return 2;
}
}
ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
bool IsInteger) {
if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed integer setcc with an unsigned integer setcc.
return ISD::SETCC_INVALID;
unsigned Op = Op1 | Op2; // Combine all of the condition bits.
// If the N and U bits get set, then the resultant comparison DOES suddenly
// care about orderedness, and it is true when ordered.
if (Op > ISD::SETTRUE2)
Op &= ~16; // Clear the U bit if the N bit is set.
// Canonicalize illegal integer setcc's.
if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
Op = ISD::SETNE;
return ISD::CondCode(Op);
}
ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
bool IsInteger) {
if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed setcc with an unsigned setcc.
return ISD::SETCC_INVALID;
// Combine all of the condition bits.
ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
// Canonicalize illegal integer setcc's.
if (IsInteger) {
switch (Result) {
default: break;
case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
case ISD::SETOEQ: // SETEQ & SETU[LG]E
case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
}
}
return Result;
}
//===----------------------------------------------------------------------===//
// SDNode Profile Support
//===----------------------------------------------------------------------===//
/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
ID.AddInteger(OpC);
}
/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
/// solely with their pointer.
static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
ID.AddPointer(VTList.VTs);
}
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
static void AddNodeIDOperands(FoldingSetNodeID &ID,
ArrayRef<SDValue> Ops) {
for (auto& Op : Ops) {
ID.AddPointer(Op.getNode());
ID.AddInteger(Op.getResNo());
}
}
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
static void AddNodeIDOperands(FoldingSetNodeID &ID,
ArrayRef<SDUse> Ops) {
for (auto& Op : Ops) {
ID.AddPointer(Op.getNode());
ID.AddInteger(Op.getResNo());
}
}
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
SDVTList VTList, ArrayRef<SDValue> OpList) {
AddNodeIDOpcode(ID, OpC);
AddNodeIDValueTypes(ID, VTList);
AddNodeIDOperands(ID, OpList);
}
/// If this is an SDNode with special info, add this info to the NodeID data.
static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
switch (N->getOpcode()) {
case ISD::TargetExternalSymbol:
case ISD::ExternalSymbol:
case ISD::MCSymbol:
llvm_unreachable("Should only be used on nodes with operands");
default: break; // Normal nodes don't need extra info.
case ISD::TargetConstant:
case ISD::Constant: {
const ConstantSDNode *C = cast<ConstantSDNode>(N);
ID.AddPointer(C->getConstantIntValue());
ID.AddBoolean(C->isOpaque());
break;
}
case ISD::TargetConstantFP:
case ISD::ConstantFP:
ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
break;
case ISD::TargetGlobalAddress:
case ISD::GlobalAddress:
case ISD::TargetGlobalTLSAddress:
case ISD::GlobalTLSAddress: {
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
ID.AddPointer(GA->getGlobal());
ID.AddInteger(GA->getOffset());
ID.AddInteger(GA->getTargetFlags());
break;
}
case ISD::BasicBlock:
ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
break;
case ISD::Register:
ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
break;
case ISD::RegisterMask:
ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
break;
case ISD::SRCVALUE:
ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
break;
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
break;
case ISD::JumpTable:
case ISD::TargetJumpTable:
ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
break;
case ISD::ConstantPool:
case ISD::TargetConstantPool: {
const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
ID.AddInteger(CP->getAlignment());
ID.AddInteger(CP->getOffset());
if (CP->isMachineConstantPoolEntry())
CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
else
ID.AddPointer(CP->getConstVal());
ID.AddInteger(CP->getTargetFlags());
break;
}
case ISD::TargetIndex: {
const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
ID.AddInteger(TI->getIndex());
ID.AddInteger(TI->getOffset());
ID.AddInteger(TI->getTargetFlags());
break;
}
case ISD::LOAD: {
const LoadSDNode *LD = cast<LoadSDNode>(N);
ID.AddInteger(LD->getMemoryVT().getRawBits());
ID.AddInteger(LD->getRawSubclassData());
ID.AddInteger(LD->getPointerInfo().getAddrSpace());
break;
}
case ISD::STORE: {
const StoreSDNode *ST = cast<StoreSDNode>(N);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: {
const AtomicSDNode *AT = cast<AtomicSDNode>(N);
ID.AddInteger(AT->getMemoryVT().getRawBits());
ID.AddInteger(AT->getRawSubclassData());
ID.AddInteger(AT->getPointerInfo().getAddrSpace());
break;
}
case ISD::PREFETCH: {
const MemSDNode *PF = cast<MemSDNode>(N);
ID.AddInteger(PF->getPointerInfo().getAddrSpace());
break;
}
case ISD::VECTOR_SHUFFLE: {
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
i != e; ++i)
ID.AddInteger(SVN->getMaskElt(i));
break;
}
case ISD::TargetBlockAddress:
case ISD::BlockAddress: {
const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
ID.AddPointer(BA->getBlockAddress());
ID.AddInteger(BA->getOffset());
ID.AddInteger(BA->getTargetFlags());
break;
}
} // end switch (N->getOpcode())
// Target specific memory nodes could also have address spaces to check.
if (N->isTargetMemoryOpcode())
ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
}
/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
/// data.
static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
AddNodeIDOpcode(ID, N->getOpcode());
// Add the return value info.
AddNodeIDValueTypes(ID, N->getVTList());
// Add the operand info.
AddNodeIDOperands(ID, N->ops());
// Handle SDNode leafs with special info.
AddNodeIDCustom(ID, N);
}
//===----------------------------------------------------------------------===//
// SelectionDAG Class
//===----------------------------------------------------------------------===//
/// doNotCSE - Return true if CSE should not be performed for this node.
static bool doNotCSE(SDNode *N) {
if (N->getValueType(0) == MVT::Glue)
return true; // Never CSE anything that produces a flag.
switch (N->getOpcode()) {
default: break;
case ISD::HANDLENODE:
case ISD::EH_LABEL:
return true; // Never CSE these nodes.
}
// Check that remaining values produced are not flags.
for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
if (N->getValueType(i) == MVT::Glue)
return true; // Never CSE anything that produces a flag.
return false;
}
/// RemoveDeadNodes - This method deletes all unreachable nodes in the
/// SelectionDAG.
void SelectionDAG::RemoveDeadNodes() {
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted.
HandleSDNode Dummy(getRoot());
SmallVector<SDNode*, 128> DeadNodes;
// Add all obviously-dead nodes to the DeadNodes worklist.
for (SDNode &Node : allnodes())
if (Node.use_empty())
DeadNodes.push_back(&Node);
RemoveDeadNodes(DeadNodes);
// If the root changed (e.g. it was a dead load, update the root).
setRoot(Dummy.getValue());
}
/// RemoveDeadNodes - This method deletes the unreachable nodes in the
/// given list, and any nodes that become unreachable as a result.
void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
// Process the worklist, deleting the nodes and adding their uses to the
// worklist.
while (!DeadNodes.empty()) {
SDNode *N = DeadNodes.pop_back_val();
// Skip to next node if we've already managed to delete the node. This could
// happen if replacing a node causes a node previously added to the node to
// be deleted.
if (N->getOpcode() == ISD::DELETED_NODE)
continue;
for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
DUL->NodeDeleted(N, nullptr);
// Take the node out of the appropriate CSE map.
RemoveNodeFromCSEMaps(N);
// Next, brutally remove the operand list. This is safe to do, as there are
// no cycles in the graph.
for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
SDUse &Use = *I++;
SDNode *Operand = Use.getNode();
Use.set(SDValue());
// Now that we removed this operand, see if there are no uses of it left.
if (Operand->use_empty())
DeadNodes.push_back(Operand);
}
DeallocateNode(N);
}
}
void SelectionDAG::RemoveDeadNode(SDNode *N){
SmallVector<SDNode*, 16> DeadNodes(1, N);
// Create a dummy node that adds a reference to the root node, preventing
// it from being deleted. (This matters if the root is an operand of the
// dead node.)
HandleSDNode Dummy(getRoot());
RemoveDeadNodes(DeadNodes);
}
void SelectionDAG::DeleteNode(SDNode *N) {
// First take this out of the appropriate CSE map.
RemoveNodeFromCSEMaps(N);
// Finally, remove uses due to operands of this node, remove from the
// AllNodes list, and delete the node.
DeleteNodeNotInCSEMaps(N);
}
void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
assert(N->getIterator() != AllNodes.begin() &&
"Cannot delete the entry node!");
assert(N->use_empty() && "Cannot delete a node that is not dead!");
// Drop all of the operands and decrement used node's use counts.
N->DropOperands();
DeallocateNode(N);
}
void SDDbgInfo::erase(const SDNode *Node) {
DbgValMapType::iterator I = DbgValMap.find(Node);
if (I == DbgValMap.end())
return;
for (auto &Val: I->second)
Val->setIsInvalidated();
DbgValMap.erase(I);
}
void SelectionDAG::DeallocateNode(SDNode *N) {
// If we have operands, deallocate them.
removeOperands(N);
NodeAllocator.Deallocate(AllNodes.remove(N));
// Set the opcode to DELETED_NODE to help catch bugs when node
// memory is reallocated.
// FIXME: There are places in SDag that have grown a dependency on the opcode
// value in the released node.
__asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType));
N->NodeType = ISD::DELETED_NODE;
// If any of the SDDbgValue nodes refer to this SDNode, invalidate
// them and forget about that node.
DbgInfo->erase(N);
}
#ifndef NDEBUG
/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
static void VerifySDNode(SDNode *N) {
switch (N->getOpcode()) {
default:
break;
case ISD::BUILD_PAIR: {
EVT VT = N->getValueType(0);
assert(N->getNumValues() == 1 && "Too many results!");
assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
"Wrong return type!");
assert(N->getNumOperands() == 2 && "Wrong number of operands!");
assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
"Mismatched operand types!");
assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
"Wrong operand type!");
assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
"Wrong return type size");
break;
}
case ISD::BUILD_VECTOR: {
assert(N->getNumValues() == 1 && "Too many results!");
assert(N->getValueType(0).isVector() && "Wrong return type!");
assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
"Wrong number of operands!");
EVT EltVT = N->getValueType(0).getVectorElementType();
for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
assert((I->getValueType() == EltVT ||
(EltVT.isInteger() && I->getValueType().isInteger() &&
EltVT.bitsLE(I->getValueType()))) &&
"Wrong operand type!");
assert(I->getValueType() == N->getOperand(0).getValueType() &&
"Operands must all have the same type");
}
break;
}
}
}
#endif // NDEBUG
/// \brief Insert a newly allocated node into the DAG.
///
/// Handles insertion into the all nodes list and CSE map, as well as
/// verification and other common operations when a new node is allocated.
void SelectionDAG::InsertNode(SDNode *N) {
AllNodes.push_back(N);
#ifndef NDEBUG
N->PersistentId = NextPersistentId++;
VerifySDNode(N);
#endif
}
/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
/// correspond to it. This is useful when we're about to delete or repurpose
/// the node. We don't want future request for structurally identical nodes
/// to return N anymore.
bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
bool Erased = false;
switch (N->getOpcode()) {
case ISD::HANDLENODE: return false; // noop.
case ISD::CONDCODE:
assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
"Cond code doesn't exist!");
Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr;
CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr;
break;
case ISD::ExternalSymbol:
Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
break;
case ISD::TargetExternalSymbol: {
ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
Erased = TargetExternalSymbols.erase(
std::pair<std::string,unsigned char>(ESN->getSymbol(),
ESN->getTargetFlags()));
break;
}
case ISD::MCSymbol: {
auto *MCSN = cast<MCSymbolSDNode>(N);
Erased = MCSymbols.erase(MCSN->getMCSymbol());
break;
}
case ISD::VALUETYPE: {
EVT VT = cast<VTSDNode>(N)->getVT();
if (VT.isExtended()) {
Erased = ExtendedValueTypeNodes.erase(VT);
} else {
Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr;
ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr;
}
break;
}
default:
// Remove it from the CSE Map.
assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
Erased = CSEMap.RemoveNode(N);
break;
}
#ifndef NDEBUG
// Verify that the node was actually in one of the CSE maps, unless it has a
// flag result (which cannot be CSE'd) or is one of the special cases that are
// not subject to CSE.
if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
!N->isMachineOpcode() && !doNotCSE(N)) {
N->dump(this);
dbgs() << "\n";
llvm_unreachable("Node is not in map!");
}
#endif
return Erased;
}
/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
/// maps and modified in place. Add it back to the CSE maps, unless an identical
/// node already exists, in which case transfer all its users to the existing
/// node. This transfer can potentially trigger recursive merging.
void
SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
// For node types that aren't CSE'd, just act as if no identical node
// already exists.
if (!doNotCSE(N)) {
SDNode *Existing = CSEMap.GetOrInsertNode(N);
if (Existing != N) {
// If there was already an existing matching node, use ReplaceAllUsesWith
// to replace the dead one with the existing one. This can cause
// recursive merging of other unrelated nodes down the line.
ReplaceAllUsesWith(N, Existing);
// N is now dead. Inform the listeners and delete it.
for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
DUL->NodeDeleted(N, Existing);
DeleteNodeNotInCSEMaps(N);
return;
}
}
// If the node doesn't already exist, we updated it. Inform listeners.
for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
DUL->NodeUpdated(N);
}
/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
/// were replaced with those specified. If this node is never memoized,
/// return null, otherwise return a pointer to the slot it would take. If a
/// node already exists with these operands, the slot will be non-null.
SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
void *&InsertPos) {
if (doNotCSE(N))
return nullptr;
SDValue Ops[] = { Op };
FoldingSetNodeID ID;
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
AddNodeIDCustom(ID, N);
SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
if (Node)
Node->intersectFlagsWith(N->getFlags());
return Node;
}
/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
/// were replaced with those specified. If this node is never memoized,
/// return null, otherwise return a pointer to the slot it would take. If a
/// node already exists with these operands, the slot will be non-null.
SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
SDValue Op1, SDValue Op2,
void *&InsertPos) {
if (doNotCSE(N))
return nullptr;
SDValue Ops[] = { Op1, Op2 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
AddNodeIDCustom(ID, N);
SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
if (Node)
Node->intersectFlagsWith(N->getFlags());
return Node;
}
/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
/// were replaced with those specified. If this node is never memoized,
/// return null, otherwise return a pointer to the slot it would take. If a
/// node already exists with these operands, the slot will be non-null.
SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
void *&InsertPos) {
if (doNotCSE(N))
return nullptr;
FoldingSetNodeID ID;
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
AddNodeIDCustom(ID, N);
SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
if (Node)
Node->intersectFlagsWith(N->getFlags());
return Node;
}
unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
Type *Ty = VT == MVT::iPTR ?
PointerType::get(Type::getInt8Ty(*getContext()), 0) :
VT.getTypeForEVT(*getContext());
return getDataLayout().getABITypeAlignment(Ty);
}
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), OptLevel(OL),
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()) {
InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
}
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE) {
MF = &NewMF;
ORE = &NewORE;
TLI = getSubtarget().getTargetLowering();
TSI = getSubtarget().getSelectionDAGInfo();
Context = &MF->getFunction()->getContext();
}
SelectionDAG::~SelectionDAG() {
assert(!UpdateListeners && "Dangling registered DAGUpdateListeners");
allnodes_clear();
OperandRecycler.clear(OperandAllocator);
delete DbgInfo;
}
void SelectionDAG::allnodes_clear() {
assert(&*AllNodes.begin() == &EntryNode);
AllNodes.remove(AllNodes.begin());
while (!AllNodes.empty())
DeallocateNode(&AllNodes.front());
#ifndef NDEBUG
NextPersistentId = 0;
#endif
}
SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
void *&InsertPos) {
SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
if (N) {
switch (N->getOpcode()) {
default: break;
case ISD::Constant:
case ISD::ConstantFP:
llvm_unreachable("Querying for Constant and ConstantFP nodes requires "
"debug location. Use another overload.");
}
}
return N;
}
SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
const SDLoc &DL, void *&InsertPos) {
SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
if (N) {
switch (N->getOpcode()) {
case ISD::Constant:
case ISD::ConstantFP:
// Erase debug location from the node if the node is used at several
// different places. Do not propagate one location to all uses as it
// will cause a worse single stepping debugging experience.
if (N->getDebugLoc() != DL.getDebugLoc())
N->setDebugLoc(DebugLoc());
break;
default:
// When the node's point of use is located earlier in the instruction
// sequence than its prior point of use, update its debug info to the
// earlier location.
if (DL.getIROrder() && DL.getIROrder() < N->getIROrder())
N->setDebugLoc(DL.getDebugLoc());
break;
}
}
return N;
}
void SelectionDAG::clear() {
allnodes_clear();
OperandRecycler.clear(OperandAllocator);
OperandAllocator.Reset();
CSEMap.clear();
ExtendedValueTypeNodes.clear();
ExternalSymbols.clear();
TargetExternalSymbols.clear();
MCSymbols.clear();
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
static_cast<CondCodeSDNode*>(nullptr));
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
static_cast<SDNode*>(nullptr));
EntryNode.UseList = nullptr;
InsertNode(&EntryNode);
Root = getEntryNode();
DbgInfo->clear();
}
SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType())
? getNode(ISD::FP_EXTEND, DL, VT, Op)
: getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
}
SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::ANY_EXTEND, DL, VT, Op) :
getNode(ISD::TRUNCATE, DL, VT, Op);
}
SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
getNode(ISD::TRUNCATE, DL, VT, Op);
}
SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
getNode(ISD::TRUNCATE, DL, VT, Op);
}
SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
EVT OpVT) {
if (VT.bitsLE(Op.getValueType()))
return getNode(ISD::TRUNCATE, SL, VT, Op);
TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT);
return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
}
SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
assert(!VT.isVector() &&
"getZeroExtendInReg should use the vector element type instead of "
"the vector type!");
if (Op.getValueType() == VT) return Op;
unsigned BitWidth = Op.getScalarValueSizeInBits();
APInt Imm = APInt::getLowBitsSet(BitWidth,
VT.getSizeInBits());
return getNode(ISD::AND, DL, Op.getValueType(), Op,
getConstant(Imm, DL, Op.getValueType()));
}
SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
EVT VT) {
assert(VT.isVector() && "This DAG node is restricted to vector types.");
assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
"The sizes of the input and result must match in order to perform the "
"extend in-register.");
assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
"The destination vector type must have fewer lanes than the input.");
return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
}
SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
EVT VT) {
assert(VT.isVector() && "This DAG node is restricted to vector types.");
assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
"The sizes of the input and result must match in order to perform the "
"extend in-register.");
assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
"The destination vector type must have fewer lanes than the input.");
return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
}
SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
EVT VT) {
assert(VT.isVector() && "This DAG node is restricted to vector types.");
assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
"The sizes of the input and result must match in order to perform the "
"extend in-register.");
assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
"The destination vector type must have fewer lanes than the input.");
return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
}
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
SDValue NegOne =
getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
return getNode(ISD::XOR, DL, VT, Val, NegOne);
}
SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
SDValue TrueValue;
switch (TLI->getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
case TargetLowering::UndefinedBooleanContent:
TrueValue = getConstant(1, DL, VT);
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL,
VT);
break;
}
return getNode(ISD::XOR, DL, VT, Val, TrueValue);
}
SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
bool isT, bool isO) {
EVT EltVT = VT.getScalarType();
assert((EltVT.getSizeInBits() >= 64 ||
(uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
"getConstant with a uint64_t value that doesn't fit in the type!");
return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
}
SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT,
bool isT, bool isO) {
return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO);
}
SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
EVT VT, bool isT, bool isO) {
assert(VT.isInteger() && "Cannot create FP integer constant!");
EVT EltVT = VT.getScalarType();
const ConstantInt *Elt = &Val;
// In some cases the vector type is legal but the element type is illegal and
// needs to be promoted, for example v8i8 on ARM. In this case, promote the
// inserted value (the type does not need to match the vector element type).
// Any extra bits introduced will be truncated away.
if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypePromoteInteger) {
EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
// In other cases the element type is illegal and needs to be expanded, for
// example v2i64 on MIPS32. In this case, find the nearest legal type, split
// the value into n parts and use a vector type with n-times the elements.
// Then bitcast to the type requested.
// Legalizing constants too early makes the DAGCombiner's job harder so we
// only legalize if the DAG tells us we must produce legal types.
else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypeExpandInteger) {
const APInt &NewVal = Elt->getValue();
EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
// Check the temporary vector is the correct size. If this fails then
// getTypeToTransformTo() probably returned a type whose size (in bits)
// isn't a power-of-2 factor of the requested type size.
assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
SmallVector<SDValue, 2> EltParts;
for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
.zextOrTrunc(ViaEltSizeInBits), DL,
ViaEltVT, isT, isO));
}
// EltParts is currently in little endian order. If we actually want
// big-endian order then reverse it now.
if (getDataLayout().isBigEndian())
std::reverse(EltParts.begin(), EltParts.end());
// The elements must be reversed when the element order is different
// to the endianness of the elements (because the BITCAST is itself a
// vector shuffle in this situation). However, we do not need any code to
// perform this reversal because getConstant() is producing a vector
// splat.
// This situation occurs in MIPS MSA.
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
}
assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
"APInt size does not match type size!");
unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
ID.AddPointer(Elt);
ID.AddBoolean(isO);
void *IP = nullptr;
SDNode *N = nullptr;
if ((N = FindNodeOrInsertPos(ID, DL, IP)))
if (!VT.isVector())
return SDValue(N, 0);
if (!N) {
N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
}
SDValue Result(N, 0);
if (VT.isVector())
Result = getSplatBuildVector(VT, DL, Result);
return Result;
}
SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
bool isTarget) {
return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
}
SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
bool isTarget) {
return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
}
SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
EVT VT, bool isTarget) {
assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
EVT EltVT = VT.getScalarType();
// Do the map lookup using the actual bit pattern for the floating point
// value, so that we don't have problems with 0.0 comparing equal to -0.0, and
// we don't have issues with SNANs.
unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
ID.AddPointer(&V);
void *IP = nullptr;
SDNode *N = nullptr;
if ((N = FindNodeOrInsertPos(ID, DL, IP)))
if (!VT.isVector())
return SDValue(N, 0);
if (!N) {
N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
}
SDValue Result(N, 0);
if (VT.isVector())
Result = getSplatBuildVector(VT, DL, Result);
return Result;
}
SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
bool isTarget) {
EVT EltVT = VT.getScalarType();
if (EltVT == MVT::f32)
return getConstantFP(APFloat((float)Val), DL, VT, isTarget);
else if (EltVT == MVT::f64)
return getConstantFP(APFloat(Val), DL, VT, isTarget);
else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 ||
EltVT == MVT::f16) {
bool Ignored;
APFloat APF = APFloat(Val);
APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
&Ignored);
return getConstantFP(APF, DL, VT, isTarget);
} else
llvm_unreachable("Unsupported type in getConstantFP");
}
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
EVT VT, int64_t Offset, bool isTargetGA,
unsigned char TargetFlags) {
assert((TargetFlags == 0 || isTargetGA) &&
"Cannot set target flags on target-independent globals");
// Truncate (with sign-extension) the offset value to the pointer size.
unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
if (BitWidth < 64)
Offset = SignExtend64(Offset, BitWidth);
unsigned Opc;
if (GV->isThreadLocal())
Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
else
Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddPointer(GV);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
auto *N = newSDNode<GlobalAddressSDNode>(
Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddInteger(FI);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
unsigned char TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent jump tables");
unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddInteger(JTI);
ID.AddInteger(TargetFlags);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
unsigned char TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
Alignment = MF->getFunction()->optForSize()
? getDataLayout().getABITypeAlignment(C->getType())
: getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddInteger(Alignment);
ID.AddInteger(Offset);
ID.AddPointer(C);
ID.AddInteger(TargetFlags);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
unsigned char TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddInteger(Alignment);
ID.AddInteger(Offset);
C->addSelectionDAGCSEId(ID);
ID.AddInteger(TargetFlags);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
unsigned char TargetFlags) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);
ID.AddInteger(Index);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None);
ID.AddPointer(MBB);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<BasicBlockSDNode>(MBB);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getValueType(EVT VT) {
if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
ValueTypeNodes.size())
ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
SDNode *&N = VT.isExtended() ?
ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
if (N) return SDValue(N, 0);
N = newSDNode<VTSDNode>(VT);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
SDNode *&N = ExternalSymbols[Sym];
if (N) return SDValue(N, 0);
N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
SDNode *&N = MCSymbols[Sym];
if (N)
return SDValue(N, 0);
N = newSDNode<MCSymbolSDNode>(Sym, VT);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
unsigned char TargetFlags) {
SDNode *&N =
TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
TargetFlags)];
if (N) return SDValue(N, 0);
N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
if ((unsigned)Cond >= CondCodeNodes.size())
CondCodeNodes.resize(Cond+1);
if (!CondCodeNodes[Cond]) {
auto *N = newSDNode<CondCodeSDNode>(Cond);
CondCodeNodes[Cond] = N;
InsertNode(N);
}
return SDValue(CondCodeNodes[Cond], 0);
}
/// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that
/// point at N1 to point at N2 and indices that point at N2 to point at N1.
static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) {
std::swap(N1, N2);
ShuffleVectorSDNode::commuteMask(M);
}
SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
SDValue N2, ArrayRef<int> Mask) {
assert(VT.getVectorNumElements() == Mask.size() &&
"Must have the same number of vector elements as mask elements!");
assert(VT == N1.getValueType() && VT == N2.getValueType() &&
"Invalid VECTOR_SHUFFLE");
// Canonicalize shuffle undef, undef -> undef
if (N1.isUndef() && N2.isUndef())
return getUNDEF(VT);
// Validate that all indices in Mask are within the range of the elements
// input to the shuffle.
int NElts = Mask.size();
assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
"Index out of range");
// Copy the mask so we can do any needed cleanup.
SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end());
// Canonicalize shuffle v, v -> v, undef
if (N1 == N2) {
N2 = getUNDEF(VT);
for (int i = 0; i != NElts; ++i)
if (MaskVec[i] >= NElts) MaskVec[i] -= NElts;
}
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
if (N1.isUndef())
commuteShuffle(N1, N2, MaskVec);
// If shuffling a splat, try to blend the splat instead. We do this here so
// that even when this arises during lowering we don't have to re-handle it.
auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
BitVector UndefElements;
SDValue Splat = BV->getSplatValue(&UndefElements);
if (!Splat)
return;
for (int i = 0; i < NElts; ++i) {
if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
continue;
// If this input comes from undef, mark it as such.
if (UndefElements[MaskVec[i] - Offset]) {
MaskVec[i] = -1;
continue;
}
// If we can blend a non-undef lane, use that instead.
if (!UndefElements[i])
MaskVec[i] = i + Offset;
}
};
if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
BlendSplat(N1BV, 0);
if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
BlendSplat(N2BV, NElts);
// Canonicalize all index into lhs, -> shuffle lhs, undef
// Canonicalize all index into rhs, -> shuffle rhs, undef
bool AllLHS = true, AllRHS = true;
bool N2Undef = N2.isUndef();
for (int i = 0; i != NElts; ++i) {
if (MaskVec[i] >= NElts) {
if (N2Undef)
MaskVec[i] = -1;
else
AllLHS = false;
} else if (MaskVec[i] >= 0) {
AllRHS = false;
}
}
if (AllLHS && AllRHS)
return getUNDEF(VT);
if (AllLHS && !N2Undef)
N2 = getUNDEF(VT);
if (AllRHS) {
N1 = getUNDEF(VT);
commuteShuffle(N1, N2, MaskVec);
}
// Reset our undef status after accounting for the mask.
N2Undef = N2.isUndef();
// Re-check whether both sides ended up undef.
if (N1.isUndef() && N2Undef)
return getUNDEF(VT);
// If Identity shuffle return that node.
bool Identity = true, AllSame = true;
for (int i = 0; i != NElts; ++i) {
if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false;
if (MaskVec[i] != MaskVec[0]) AllSame = false;
}
if (Identity && NElts)
return N1;
// Shuffling a constant splat doesn't change the result.
if (N2Undef) {
SDValue V = N1;
// Look through any bitcasts. We check that these don't change the number
// (and size) of elements and just changes their types.
while (V.getOpcode() == ISD::BITCAST)
V = V->getOperand(0);
// A splat should always show up as a build vector node.
if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
BitVector UndefElements;
SDValue Splat = BV->getSplatValue(&UndefElements);
// If this is a splat of an undef, shuffling it is also undef.
if (Splat && Splat.isUndef())
return getUNDEF(VT);
bool SameNumElts =
V.getValueType().getVectorNumElements() == VT.getVectorNumElements();
// We only have a splat which can skip shuffles if there is a splatted
// value and no undef lanes rearranged by the shuffle.
if (Splat && UndefElements.none()) {
// Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
// number of elements match or the value splatted is a zero constant.
if (SameNumElts)
return N1;
if (auto *C = dyn_cast<ConstantSDNode>(Splat))
if (C->isNullValue())
return N1;
}
// If the shuffle itself creates a splat, build the vector directly.
if (AllSame && SameNumElts) {
EVT BuildVT = BV->getValueType(0);
const SDValue &Splatted = BV->getOperand(MaskVec[0]);
SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted);
// We may have jumped through bitcasts, so the type of the
// BUILD_VECTOR may not match the type of the shuffle.
if (BuildVT != VT)
NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
return NewBV;
}
}
}
FoldingSetNodeID ID;
SDValue Ops[2] = { N1, N2 };
AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops);
for (int i = 0; i != NElts; ++i)
ID.AddInteger(MaskVec[i]);
void* IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
// Allocate the mask array for the node out of the BumpPtrAllocator, since
// SDNode doesn't have access to it. This memory will be "leaked" when
// the node is deallocated, but recovered when the NodeAllocator is released.
int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
std::copy(MaskVec.begin(), MaskVec.end(), MaskAlloc);
auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(),
dl.getDebugLoc(), MaskAlloc);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
MVT VT = SV.getSimpleValueType(0);
SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
ShuffleVectorSDNode::commuteMask(MaskVec);
SDValue Op0 = SV.getOperand(0);
SDValue Op1 = SV.getOperand(1);
return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec);
}
SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
ID.AddInteger(RegNo);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None);
ID.AddPointer(RegMask);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<RegisterMaskSDNode>(RegMask);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root,
MCSymbol *Label) {
FoldingSetNodeID ID;
SDValue Ops[] = { Root };
AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);
ID.AddPointer(Label);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
int64_t Offset,
bool isTarget,
unsigned char TargetFlags) {
unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddPointer(BA);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getSrcValue(const Value *V) {
assert((!V || V->getType()->isPointerTy()) &&
"SrcValue is not a pointer?");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
ID.AddPointer(V);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<SrcValueSDNode>(V);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getMDNode(const MDNode *MD) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None);
ID.AddPointer(MD);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
auto *N = newSDNode<MDNodeSDNode>(MD);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) {
if (VT == V.getValueType())
return V;
return getNode(ISD::BITCAST, SDLoc(V), VT, V);
}
SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr,
unsigned SrcAS, unsigned DestAS) {
SDValue Ops[] = {Ptr};
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops);
ID.AddInteger(SrcAS);
ID.AddInteger(DestAS);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(),
VT, SrcAS, DestAS);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
/// getShiftAmountOperand - Return the specified value casted to
/// the target's desired shift amount type.
SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT OpTy = Op.getValueType();
EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
if (OpTy == ShTy || OpTy.isVector()) return Op;
return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
}
SDValue SelectionDAG::expandVAArg(SDNode *Node) {
SDLoc dl(Node);
const TargetLowering &TLI = getTargetLoweringInfo();
const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
EVT VT = Node->getValueType(0);
SDValue Tmp1 = Node->getOperand(0);
SDValue Tmp2 = Node->getOperand(1);
unsigned Align = Node->getConstantOperandVal(3);
SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1,
Tmp2, MachinePointerInfo(V));
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
getConstant(Align - 1, dl, VAList.getValueType()));
VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
getConstant(-(int64_t)Align, dl, VAList.getValueType()));
}
// Increment the pointer, VAList, to the next vaarg
Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
getConstant(getDataLayout().getTypeAllocSize(
VT.getTypeForEVT(*getContext())),
dl, VAList.getValueType()));
// Store the incremented VAList to the legalized pointer
Tmp1 =
getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V));
// Load the actual argument out of the pointer VAList
return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo());
}
SDValue SelectionDAG::expandVACopy(SDNode *Node) {
SDLoc dl(Node);
const TargetLowering &TLI = getTargetLoweringInfo();
// This defaults to loading a pointer from the input and storing it to the
// output, returning the chain.
const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
SDValue Tmp1 =
getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0),
Node->getOperand(2), MachinePointerInfo(VS));
return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
MachinePointerInfo(VD));
}
SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
unsigned ByteSize = VT.getStoreSize();
Type *Ty = VT.getTypeForEVT(*getContext());
unsigned StackAlign =
std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
const DataLayout &DL = getDataLayout();
unsigned Align =
std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));
MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
ISD::CondCode Cond, const SDLoc &dl) {
// These setcc operations always fold.
switch (Cond) {
default: break;
case ISD::SETFALSE:
case ISD::SETFALSE2: return getConstant(0, dl, VT);
case ISD::SETTRUE:
case ISD::SETTRUE2: {
TargetLowering::BooleanContent Cnt =
TLI->getBooleanContents(N1->getValueType(0));
return getConstant(
Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
VT);
}
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETONE:
case ISD::SETO:
case ISD::SETUO:
case ISD::SETUEQ:
case ISD::SETUNE:
assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
break;
}
if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {
const APInt &C2 = N2C->getAPIntValue();
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
const APInt &C1 = N1C->getAPIntValue();
switch (Cond) {
default: llvm_unreachable("Unknown integer setcc!");
case ISD::SETEQ: return getConstant(C1 == C2, dl, VT);
case ISD::SETNE: return getConstant(C1 != C2, dl, VT);
case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT);
case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT);
case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT);
case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT);
case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT);
case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT);
case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT);
case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT);
}
}
}
if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) {
if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) {
APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
switch (Cond) {
default: break;
case ISD::SETEQ: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
case ISD::SETNE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpLessThan, dl, VT);
case ISD::SETLT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
case ISD::SETGT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
case ISD::SETLE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
R==APFloat::cmpEqual, dl, VT);
case ISD::SETGE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpEqual, dl, VT);
case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT);
case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT);
case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
R==APFloat::cmpEqual, dl, VT);
case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT);
case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
R==APFloat::cmpLessThan, dl, VT);
case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpUnordered, dl, VT);
case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT);
case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT);
}
} else {
// Ensure that the constant occurs on the RHS.
ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
MVT CompVT = N1.getValueType().getSimpleVT();
if (!TLI->isCondCodeLegal(SwappedCond, CompVT))
return SDValue();
return getSetCC(dl, VT, N2, N1, SwappedCond);
}
}
// Could not fold it.
return SDValue();
}
/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
/// use this predicate to simplify operations downstream.
bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
unsigned BitWidth = Op.getScalarValueSizeInBits();
return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth);
}
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
/// this predicate to simplify operations downstream. Mask is known to be zero
/// for bits that V cannot have.
bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
unsigned Depth) const {
KnownBits Known;
computeKnownBits(Op, Known, Depth);
return Mask.isSubsetOf(Known.Zero);
}
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
/// is less than the element bit-width of the shift node, return it.
static const APInt *getValidShiftAmountConstant(SDValue V) {
if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
// Shifting more than the bitwidth is not valid.
const APInt &ShAmt = SA->getAPIntValue();
if (ShAmt.ult(V.getScalarValueSizeInBits()))
return &ShAmt;
}
return nullptr;
}
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. For vectors, the known bits are those that are shared by
/// every vector element.
void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
computeKnownBits(Op, Known, DemandedElts, Depth);
}
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. The DemandedElts argument allows us to only collect the known
/// bits that are shared by the requested vector elements.
void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
const APInt &DemandedElts,
unsigned Depth) const {
unsigned BitWidth = Op.getScalarValueSizeInBits();
Known = KnownBits(BitWidth); // Don't know anything.
if (Depth == 6)
return; // Limit search depth.
KnownBits Known2;
unsigned NumElts = DemandedElts.getBitWidth();
if (!DemandedElts)
return; // No demanded elts, better to assume we don't know anything.
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case ISD::Constant:
// We know all of the bits for a constant!
Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
Known.Zero = ~Known.One;
break;
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded vector element.
assert(NumElts == Op.getValueType().getVectorNumElements() &&
"Unexpected vector size");
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
if (!DemandedElts[i])
continue;
SDValue SrcOp = Op.getOperand(i);
computeKnownBits(SrcOp, Known2, Depth + 1);
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
if (SrcOp.getValueSizeInBits() != BitWidth) {
assert(SrcOp.getValueSizeInBits() > BitWidth &&
"Expected BUILD_VECTOR implicit truncation");
Known2 = Known2.trunc(BitWidth);
}
// Known bits are the values that are shared by every demanded element.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
// If we don't know any bits, early out.
if (!Known.One && !Known.Zero)
break;
}
break;
case ISD::VECTOR_SHUFFLE: {
// Collect the known bits that are shared by every vector element referenced
// by the shuffle.
APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
Known.Zero.setAllBits(); Known.One.setAllBits();
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
for (unsigned i = 0; i != NumElts; ++i) {
if (!DemandedElts[i])
continue;
int M = SVN->getMaskElt(i);
if (M < 0) {
// For UNDEF elements, we don't know anything about the common state of
// the shuffle result.
Known.resetAll();
DemandedLHS.clearAllBits();
DemandedRHS.clearAllBits();
break;
}
if ((unsigned)M < NumElts)
DemandedLHS.setBit((unsigned)M % NumElts);
else
DemandedRHS.setBit((unsigned)M % NumElts);
}
// Known bits are the values that are shared by every demanded element.
if (!!DemandedLHS) {
SDValue LHS = Op.getOperand(0);
computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
// If we don't know any bits, early out.
if (!Known.One && !Known.Zero)
break;
if (!!DemandedRHS) {
SDValue RHS = Op.getOperand(1);
computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
break;
}
case ISD::CONCAT_VECTORS: {
// Split DemandedElts and test each of the demanded subvectors.
Known.Zero.setAllBits(); Known.One.setAllBits();
EVT SubVectorVT = Op.getOperand(0).getValueType();
unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
unsigned NumSubVectors = Op.getNumOperands();
for (unsigned i = 0; i != NumSubVectors; ++i) {
APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
DemandedSub = DemandedSub.trunc(NumSubVectorElts);
if (!!DemandedSub) {
SDValue Sub = Op.getOperand(i);
computeKnownBits(Sub, Known2, DemandedSub, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
// If we don't know any bits, early out.
if (!Known.One && !Known.Zero)
break;
}
break;
}
case ISD::EXTRACT_SUBVECTOR: {
// If we know the element index, just demand that subvector elements,
// otherwise demand them all.
SDValue Src = Op.getOperand(0);
ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
computeKnownBits(Src, Known, DemandedSrc, Depth + 1);
} else {
computeKnownBits(Src, Known, Depth + 1);
}
break;
}
case ISD::BITCAST: {
SDValue N0 = Op.getOperand(0);
unsigned SubBitWidth = N0.getScalarValueSizeInBits();
// Ignore bitcasts from floating point.
if (!N0.getValueType().isInteger())
break;
// Fast handling of 'identity' bitcasts.
if (BitWidth == SubBitWidth) {
computeKnownBits(N0, Known, DemandedElts, Depth + 1);
break;
}
// Support big-endian targets when it becomes useful.
bool IsLE = getDataLayout().isLittleEndian();
if (!IsLE)
break;
// Bitcast 'small element' vector to 'large element' scalar/vector.
if ((BitWidth % SubBitWidth) == 0) {
assert(N0.getValueType().isVector() && "Expected bitcast from vector");
// Collect known bits for the (larger) output by collecting the known
// bits from each set of sub elements and shift these into place.
// We need to separately call computeKnownBits for each set of
// sub elements as the knownbits for each is likely to be different.
unsigned SubScale = BitWidth / SubBitWidth;
APInt SubDemandedElts(NumElts * SubScale, 0);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i])
SubDemandedElts.setBit(i * SubScale);
for (unsigned i = 0; i != SubScale; ++i) {
computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
Depth + 1);
Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i);
Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i);
}
}
// Bitcast 'large element' scalar/vector to 'small element' vector.
if ((SubBitWidth % BitWidth) == 0) {
assert(Op.getValueType().isVector() && "Expected bitcast to vector");
// Collect known bits for the (smaller) output by collecting the known
// bits from the overlapping larger input elements and extracting the
// sub sections we actually care about.
unsigned SubScale = SubBitWidth / BitWidth;
APInt SubDemandedElts(NumElts / SubScale, 0);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i])
SubDemandedElts.setBit(i / SubScale);
computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1);
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % SubScale) * BitWidth;
Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
// If we don't know any bits, early out.
if (!Known.One && !Known.Zero)
break;
}
}
break;
}
case ISD::AND:
// If either the LHS or the RHS are Zero, the result is zero.
computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// Output known-1 bits are only known if set in both the LHS & RHS.
Known.One &= Known2.One;
// Output known-0 are known to be clear if zero in either the LHS | RHS.
Known.Zero |= Known2.Zero;
break;
case ISD::OR:
computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// Output known-0 bits are only known if clear in both the LHS & RHS.
Known.Zero &= Known2.Zero;
// Output known-1 are known to be set if set in either the LHS | RHS.
Known.One |= Known2.One;
break;
case ISD::XOR: {
computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
Known.Zero = KnownZeroOut;
break;
}
case ISD::MUL: {
computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If low bits are zero in either operand, output low known-0 bits.
// Also compute a conservative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
unsigned TrailZ = Known.countMinTrailingZeros() +
Known2.countMinTrailingZeros();
unsigned LeadZ = std::max(Known.countMinLeadingZeros() +
Known2.countMinLeadingZeros(),
BitWidth) - BitWidth;
Known.resetAll();
Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
break;
}
case ISD::UDIV: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
unsigned LeadZ = Known2.countMinLeadingZeros();
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
if (RHSMaxLeadingZeros != BitWidth)
LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
Known.Zero.setHighBits(LeadZ);
break;
}
case ISD::SELECT:
computeKnownBits(Op.getOperand(2), Known, Depth+1);
// If we don't know any bits, early out.
if (!Known.One && !Known.Zero)
break;
computeKnownBits(Op.getOperand(1), Known2, Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
break;
case ISD::SELECT_CC:
computeKnownBits(Op.getOperand(3), Known, Depth+1);
// If we don't know any bits, early out.
if (!Known.One && !Known.Zero)
break;
computeKnownBits(Op.getOperand(2), Known2, Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
break;
case ISD::SMULO:
case ISD::UMULO:
if (Op.getResNo() != 1)
break;
// The boolean result conforms to getBooleanContents.
// If we know the result of a setcc has the top bits zero, use this info.
// We know that we have an integer-based boolean since these operations
// are only available for integer.
if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
case ISD::SETCC:
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
case ISD::SHL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
Known.Zero <<= *ShAmt;
Known.One <<= *ShAmt;
// Low bits are known zero.
Known.Zero.setLowBits(ShAmt->getZExtValue());
}
break;
case ISD::SRL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
Known.Zero.lshrInPlace(*ShAmt);
Known.One.lshrInPlace(*ShAmt);
// High bits are known zero.
Known.Zero.setHighBits(ShAmt->getZExtValue());
}
break;
case ISD::SRA:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
Known.Zero.lshrInPlace(*ShAmt);
Known.One.lshrInPlace(*ShAmt);
// If we know the value of the sign bit, then we know it is copied across
// the high bits by the shift amount.
APInt SignMask = APInt::getSignMask(BitWidth);
SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask.
if (Known.Zero.intersects(SignMask)) {
Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
} else if (Known.One.intersects(SignMask)) {
Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
}
}
break;
case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned EBits = EVT.getScalarSizeInBits();
// Sign extension. Compute the demanded bits in the result that are not
// present in the input.
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
APInt InSignMask = APInt::getSignMask(EBits);
APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
// If the sign extended bits are demanded, we know that the sign
// bit is demanded.
InSignMask = InSignMask.zext(BitWidth);
if (NewBits.getBoolValue())
InputDemandedBits |= InSignMask;
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
Known.One &= InputDemandedBits;
Known.Zero &= InputDemandedBits;
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear
Known.Zero |= NewBits;
Known.One &= ~NewBits;
} else if (Known.One.intersects(InSignMask)) { // Input sign bit known set
Known.One |= NewBits;
Known.Zero &= ~NewBits;
} else { // Input sign bit unknown
Known.Zero &= ~NewBits;
Known.One &= ~NewBits;
}
break;
}
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleTZ = Known2.countMaxTrailingZeros();
unsigned LowBits = Log2_32(PossibleTZ) + 1;
Known.Zero.setBitsFrom(LowBits);
break;
}
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleLZ = Known2.countMaxLeadingZeros();
unsigned LowBits = Log2_32(PossibleLZ) + 1;
Known.Zero.setBitsFrom(LowBits);
break;
}
case ISD::CTPOP: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If we know some of the bits are zero, they can't be one.
unsigned PossibleOnes = Known2.countMaxPopulation();
Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
break;
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
// If this is a ZEXTLoad and we are looking at the loaded value.
if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero.setBitsFrom(MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
if (LD->getExtensionType() == ISD::NON_EXTLOAD)
computeKnownBitsFromRangeMetadata(*Ranges, Known);
}
break;
}
case ISD::ZERO_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
Known = Known.trunc(InBits);
computeKnownBits(Op.getOperand(0), Known,
DemandedElts.zext(InVT.getVectorNumElements()),
Depth + 1);
Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(InBits);
break;
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
Known = Known.trunc(InBits);
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(InBits);
break;
}
// TODO ISD::SIGN_EXTEND_VECTOR_INREG
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
Known = Known.trunc(InBits);
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
// If the sign bit is known to be zero or one, then sext will extend
// it to the top bits, else it will just zext.
Known = Known.sext(BitWidth);
break;
}
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
Known = Known.trunc(InBits);
computeKnownBits(Op.getOperand(0), Known, Depth+1);
Known = Known.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
Known = Known.zext(InBits);
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
Known = Known.trunc(BitWidth);
break;
}
case ISD::AssertZext: {
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
computeKnownBits(Op.getOperand(0), Known, Depth+1);
Known.Zero |= (~InMask);
Known.One &= (~Known.Zero);
break;
}
case ISD::FGETSIGN:
// All bits are zero except the low bit.
Known.Zero.setBitsFrom(1);
break;
case ISD::USUBO:
case ISD::SSUBO:
if (Op.getResNo() == 1) {
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
}
LLVM_FALLTHROUGH;
case ISD::SUB:
case ISD::SUBC: {
if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
// We know that the top bits of C-X are clear if X contains less bits
// than C (i.e. no wrap-around can happen). For example, 20-X is
// positive if we can prove that X is >= 0 and < 16.
if (CLHS->getAPIntValue().isNonNegative()) {
unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
Depth + 1);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
// from [0-C].
if ((Known2.Zero & MaskV) == MaskV) {
unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
// Top bits known zero.
Known.Zero.setHighBits(NLZ2);
}
}
}
// If low bits are know to be zero in both operands, then we know they are
// going to be 0 in the result. Both addition and complement operations
// preserve the low zero bits.
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
unsigned KnownZeroLow = Known2.countMinTrailingZeros();
if (KnownZeroLow == 0)
break;
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
Known.Zero.setLowBits(KnownZeroLow);
break;
}
case ISD::UADDO:
case ISD::SADDO:
case ISD::ADDCARRY:
if (Op.getResNo() == 1) {
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
}
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE: {
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
// Output known-0 bits are also known if the top bits of each input are
// known to be clear. For example, if one input has the top 10 bits clear
// and the other has the top 8 bits clear, we know the top 7 bits of the
// output must be clear.
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
unsigned KnownZeroLow = Known2.countMinTrailingZeros();
computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
Depth + 1);
KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) {
// With ADDE and ADDCARRY, a carry bit may be added in, so we can only
// use this information if we know (at least) that the low two bits are
// clear. We then return to the caller that the low bit is unknown but
// that other bits are known zero.
if (KnownZeroLow >= 2)
Known.Zero.setBits(1, KnownZeroLow);
break;
}
Known.Zero.setLowBits(KnownZeroLow);
if (KnownZeroHigh > 1)
Known.Zero.setHighBits(KnownZeroHigh - 1);
break;
}
case ISD::SREM:
if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
const APInt &RA = Rem->getAPIntValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// The low bits of the first operand are unchanged by the srem.
Known.Zero = Known2.Zero & LowBits;
Known.One = Known2.One & LowBits;
// If the first operand is non-negative or has all low bits zero, then
// the upper bits are all zero.
if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits))
Known.Zero |= ~LowBits;
// If the first operand is negative and not all low bits are zero, then
// the upper bits are all one.
if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0))
Known.One |= ~LowBits;
assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");
}
}
break;
case ISD::UREM: {
if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// The upper bits are all zero, the lower ones are unchanged.
Known.Zero = Known2.Zero | ~LowBits;
Known.One = Known2.One & LowBits;
break;
}
}
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
uint32_t Leaders =
std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
Known.resetAll();
Known.Zero.setHighBits(Leaders);
break;
}
case ISD::EXTRACT_ELEMENT: {
computeKnownBits(Op.getOperand(0), Known, Depth+1);
const unsigned Index = Op.getConstantOperandVal(1);
const unsigned BitWidth = Op.getValueSizeInBits();
// Remove low part of known bits mask
Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth);
Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth);
// Remove high part of known bit mask
Known = Known.trunc(BitWidth);
break;
}
case ISD::EXTRACT_VECTOR_ELT: {
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
const unsigned BitWidth = Op.getValueSizeInBits();
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
// anything about the extended bits.
if (BitWidth > EltBitWidth)
Known = Known.trunc(EltBitWidth);
ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
// If we know the element index, just demand that vector element.
unsigned Idx = ConstEltNo->getZExtValue();
APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
computeKnownBits(InVec, Known, DemandedElt, Depth + 1);
} else {
// Unknown element index, so ignore DemandedElts and demand them all.
computeKnownBits(InVec, Known, Depth + 1);
}
if (BitWidth > EltBitWidth)
Known = Known.zext(BitWidth);
break;
}
case ISD::INSERT_VECTOR_ELT: {
SDValue InVec = Op.getOperand(0);
SDValue InVal = Op.getOperand(1);
SDValue EltNo = Op.getOperand(2);
ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
// If we know the element index, split the demand between the
// source vector and the inserted element.
Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth);
unsigned EltIdx = CEltNo->getZExtValue();
// If we demand the inserted element then add its common known bits.
if (DemandedElts[EltIdx]) {
computeKnownBits(InVal, Known2, Depth + 1);
Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
}
// If we demand the source vector then add its common known bits, ensuring
// that we don't demand the inserted element.
APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
if (!!VectorElts) {
computeKnownBits(InVec, Known2, VectorElts, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
} else {
// Unknown element index, so ignore DemandedElts and demand them all.
computeKnownBits(InVec, Known, Depth + 1);
computeKnownBits(InVal, Known2, Depth + 1);
Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
}
break;
}
case ISD::BITREVERSE: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
Known.Zero = Known2.Zero.reverseBits();
Known.One = Known2.One.reverseBits();
break;
}
case ISD::BSWAP: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
Known.Zero = Known2.Zero.byteSwap();
Known.One = Known2.One.byteSwap();
break;
}
case ISD::ABS: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If the source's MSB is zero then we know the rest of the bits already.
if (Known2.isNonNegative()) {
Known.Zero = Known2.Zero;
Known.One = Known2.One;
break;
}
// We only know that the absolute values's MSB will be zero iff there is
// a set bit that isn't the sign bit (otherwise it could be INT_MIN).
Known2.One.clearSignBit();
if (Known2.One.getBoolValue()) {
Known.Zero = APInt::getSignMask(BitWidth);
break;
}
break;
}
case ISD::UMIN: {
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
// UMIN - we know that the result will have the maximum of the
// known zero leading bits of the inputs.
unsigned LeadZero = Known.countMinLeadingZeros();
LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros());
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
Known.Zero.setHighBits(LeadZero);
break;
}
case ISD::UMAX: {
computeKnownBits(Op.getOperand(0), Known, DemandedElts,
Depth + 1);
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
// UMAX - we know that the result will have the maximum of the
// known one leading bits of the inputs.
unsigned LeadOne = Known.countMinLeadingOnes();
LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes());
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
Known.One.setHighBits(LeadOne);
break;
}
case ISD::SMIN:
case ISD::SMAX: {
computeKnownBits(Op.getOperand(0), Known, DemandedElts,
Depth + 1);
// If we don't know any bits, early out.
if (!Known.One && !Known.Zero)
break;
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
break;
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
if (unsigned Align = InferPtrAlignment(Op)) {
// The low bits are known zero if the pointer is aligned.
Known.Zero.setLowBits(Log2_32(Align));
break;
}
break;
default:
if (Opcode < ISD::BUILTIN_OP_END)
break;
LLVM_FALLTHROUGH;
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
// Allow the target to implement this method for its nodes.
TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth);
break;
}
assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
}
SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
SDValue N1) const {
// X + 0 never overflow
if (isNullConstant(N1))
return OFK_Never;
KnownBits N1Known;
computeKnownBits(N1, N1Known);
if (N1Known.Zero.getBoolValue()) {
KnownBits N0Known;
computeKnownBits(N0, N0Known);
bool overflow;
(void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
if (!overflow)
return OFK_Never;
}
// mulhi + 1 never overflow
if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
(~N1Known.Zero & 0x01) == ~N1Known.Zero)
return OFK_Never;
if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
KnownBits N0Known;
computeKnownBits(N0, N0Known);
if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
return OFK_Never;
}
return OFK_Sometime;
}
bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
EVT OpVT = Val.getValueType();
unsigned BitWidth = OpVT.getScalarSizeInBits();
// Is the constant a known power of 2?
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val))
return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
// A left-shift of a constant one will have exactly one bit set because
// shifting the bit off the end is undefined.
if (Val.getOpcode() == ISD::SHL) {
auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue() == 1)
return true;
}
// Similarly, a logical right-shift of a constant sign-bit will have exactly
// one bit set.
if (Val.getOpcode() == ISD::SRL) {
auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue().isSignMask())
return true;
}
// Are all operands of a build vector constant powers of two?
if (Val.getOpcode() == ISD::BUILD_VECTOR)
if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E))
return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
return false;
}))
return true;
// More could be done here, though the above checks are enough
// to handle some common cases.
// Fall back to computeKnownBits to catch other known cases.
KnownBits Known;
computeKnownBits(Val, Known);
return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
}
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
return ComputeNumSignBits(Op, DemandedElts, Depth);
}
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned Depth) const {
EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
unsigned VTBits = VT.getScalarSizeInBits();
unsigned NumElts = DemandedElts.getBitWidth();
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
if (Depth == 6)
return 1; // Limit search depth.
if (!DemandedElts)
return 1; // No demanded elts, better to assume we don't know anything.
switch (Op.getOpcode()) {
default: break;
case ISD::AssertSext:
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
return VTBits-Tmp+1;
case ISD::AssertZext:
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
return VTBits-Tmp;
case ISD::Constant: {
const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
return Val.getNumSignBits();
}
case ISD::BUILD_VECTOR:
Tmp = VTBits;
for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
if (!DemandedElts[i])
continue;
SDValue SrcOp = Op.getOperand(i);
Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
if (SrcOp.getValueSizeInBits() != VTBits) {
assert(SrcOp.getValueSizeInBits() > VTBits &&
"Expected BUILD_VECTOR implicit truncation");
unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
}
Tmp = std::min(Tmp, Tmp2);
}
return Tmp;
case ISD::VECTOR_SHUFFLE: {
// Collect the minimum number of sign bits that are shared by every vector
// element referenced by the shuffle.
APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
for (unsigned i = 0; i != NumElts; ++i) {
int M = SVN->getMaskElt(i);
if (!DemandedElts[i])
continue;
// For UNDEF elements, we don't know anything about the common state of
// the shuffle result.
if (M < 0)
return 1;
if ((unsigned)M < NumElts)
DemandedLHS.setBit((unsigned)M % NumElts);
else
DemandedRHS.setBit((unsigned)M % NumElts);
}
Tmp = std::numeric_limits<unsigned>::max();
if (!!DemandedLHS)
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
if (!!DemandedRHS) {
Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
Tmp = std::min(Tmp, Tmp2);
}
// If we don't know anything, early out and try computeKnownBits fall-back.
if (Tmp == 1)
break;
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
case ISD::SIGN_EXTEND:
case ISD::SIGN_EXTEND_VECTOR_INREG:
Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
case ISD::SIGN_EXTEND_INREG:
// Max of the input and what this extends.
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
Tmp = VTBits-Tmp+1;
Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
return std::max(Tmp, Tmp2);
case ISD::SRA:
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
// SRA X, C -> adds C sign bits.
if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
APInt ShiftVal = C->getAPIntValue();
ShiftVal += Tmp;
Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
}
return Tmp;
case ISD::SHL:
if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
// shl destroys sign bits.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (C->getAPIntValue().uge(VTBits) || // Bad shift.
C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out.
return Tmp - C->getZExtValue();
}
break;
case ISD::AND:
case ISD::OR:
case ISD::XOR: // NOT is handled here.
// Logical binary ops preserve the number of sign bits at the worst.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp != 1) {
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
FirstAnswer = std::min(Tmp, Tmp2);
// We computed what we know about the sign bits as our first
// answer. Now proceed to the generic code that uses
// computeKnownBits, and pick whichever answer is better.
}
break;
case ISD::SELECT:
Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
if (Tmp == 1) return 1; // Early out.
Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
return std::min(Tmp, Tmp2);
case ISD::SELECT_CC:
Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1);
if (Tmp == 1) return 1; // Early out.
Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1);
return std::min(Tmp, Tmp2);
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
if (Tmp == 1)
return 1; // Early out.
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
return std::min(Tmp, Tmp2);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO:
if (Op.getResNo() != 1)
break;
// The boolean result conforms to getBooleanContents. Fall through.
// If setcc returns 0/-1, all bits are sign bits.
// We know that we have an integer-based boolean since these operations
// are only available for integer.
if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
case ISD::SETCC:
// If setcc returns 0/-1, all bits are sign bits.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
case ISD::ROTL:
case ISD::ROTR:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
unsigned RotAmt = C->getZExtValue() & (VTBits-1);
// Handle rotate right by N like a rotate left by 32-N.
if (Op.getOpcode() == ISD::ROTR)
RotAmt = (VTBits-RotAmt) & (VTBits-1);
// If we aren't rotating out all of the known-in sign bits, return the
// number that are left. This handles rotl(sext(x), 1) for example.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp > RotAmt+1) return Tmp-RotAmt;
}
break;
case ISD::ADD:
case ISD::ADDC:
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp == 1) return 1; // Early out.
// Special case decrementing a value (ADD X, -1):
if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
if (CRHS->isAllOnesValue()) {
KnownBits Known;
computeKnownBits(Op.getOperand(0), Known, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((Known.Zero | 1).isAllOnesValue())
return VTBits;
// If we are subtracting one from a positive number, there is no carry
// out of the result.
if (Known.isNonNegative())
return Tmp;
}
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
if (Tmp2 == 1) return 1;
return std::min(Tmp, Tmp2)-1;
case ISD::SUB:
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
if (Tmp2 == 1) return 1;
// Handle NEG.
if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
if (CLHS->isNullValue()) {
KnownBits Known;
computeKnownBits(Op.getOperand(1), Known, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((Known.Zero | 1).isAllOnesValue())
return VTBits;
// If the input is known to be positive (the sign bit is known clear),
// the output of the NEG has the same number of sign bits as the input.
if (Known.isNonNegative())
return Tmp2;
// Otherwise, we treat this like a SUB.
}
// Sub can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp == 1) return 1; // Early out.
return std::min(Tmp, Tmp2)-1;
case ISD::TRUNCATE: {
// Check if the sign bits of source go down as far as the truncated value.
unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
if (NumSrcSignBits > (NumSrcBits - VTBits))
return NumSrcSignBits - (NumSrcBits - VTBits);
break;
}
case ISD::EXTRACT_ELEMENT: {
const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
const int BitWidth = Op.getValueSizeInBits();
const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth;
// Get reverse index (starting from 1), Op1 value indexes elements from
// little end. Sign starts at big end.
const int rIndex = Items - 1 - Op.getConstantOperandVal(1);
// If the sign portion ends in our element the subtraction gives correct
// result. Otherwise it gives either negative or > bitwidth result
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
case ISD::INSERT_VECTOR_ELT: {
SDValue InVec = Op.getOperand(0);
SDValue InVal = Op.getOperand(1);
SDValue EltNo = Op.getOperand(2);
unsigned NumElts = InVec.getValueType().getVectorNumElements();
ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
// If we know the element index, split the demand between the
// source vector and the inserted element.
unsigned EltIdx = CEltNo->getZExtValue();
// If we demand the inserted element then get its sign bits.
Tmp = std::numeric_limits<unsigned>::max();
if (DemandedElts[EltIdx]) {
// TODO - handle implicit truncation of inserted elements.
if (InVal.getScalarValueSizeInBits() != VTBits)
break;
Tmp = ComputeNumSignBits(InVal, Depth + 1);
}
// If we demand the source vector then get its sign bits, and determine
// the minimum.
APInt VectorElts = DemandedElts;
VectorElts.clearBit(EltIdx);
if (!!VectorElts) {
Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1);
Tmp = std::min(Tmp, Tmp2);
}
} else {
// Unknown element index, so ignore DemandedElts and demand them all.
Tmp = ComputeNumSignBits(InVec, Depth + 1);
Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
Tmp = std::min(Tmp, Tmp2);
}
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
case ISD::EXTRACT_VECTOR_ELT: {
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
const unsigned BitWidth = Op.getValueSizeInBits();
const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
// If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
// anything about sign bits. But if the sizes match we can derive knowledge
// about sign bits from the vector operand.
if (BitWidth != EltBitWidth)
break;
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
}
case ISD::EXTRACT_SUBVECTOR: {
// If we know the element index, just demand that subvector elements,
// otherwise demand them all.
SDValue Src = Op.getOperand(0);
ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
}
return ComputeNumSignBits(Src, Depth + 1);
}
case ISD::CONCAT_VECTORS:
// Determine the minimum number of sign bits across all demanded
// elts of the input vectors. Early out if the result is already 1.
Tmp = std::numeric_limits<unsigned>::max();
EVT SubVectorVT = Op.getOperand(0).getValueType();
unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
unsigned NumSubVectors = Op.getNumOperands();
for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) {
APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
DemandedSub = DemandedSub.trunc(NumSubVectorElts);
if (!DemandedSub)
continue;
Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1);
Tmp = std::min(Tmp, Tmp2);
}
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
// If we are looking at the loaded value of the SDNode.
if (Op.getResNo() == 0) {
// Handle LOADX separately here. EXTLOAD case will fallthrough.
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
unsigned ExtType = LD->getExtensionType();
switch (ExtType) {
default: break;
case ISD::SEXTLOAD: // '17' bits known
Tmp = LD->getMemoryVT().getScalarSizeInBits();
return VTBits-Tmp+1;
case ISD::ZEXTLOAD: // '16' bits known
Tmp = LD->getMemoryVT().getScalarSizeInBits();
return VTBits-Tmp;
}
}
}
// Allow the target to implement this method for its nodes.
if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) {
unsigned NumBits =
TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
if (NumBits > 1)
FirstAnswer = std::max(FirstAnswer, NumBits);
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
KnownBits Known;
computeKnownBits(Op, Known, DemandedElts, Depth);
APInt Mask;
if (Known.isNonNegative()) { // sign bit is 0
Mask = Known.Zero;
} else if (Known.isNegative()) { // sign bit is 1;
Mask = Known.One;
} else {
// Nothing known.
return FirstAnswer;
}
// Okay, we know that the sign bit in Mask is set. Use CLZ to determine
// the number of identical bits in the top of the input value.
Mask = ~Mask;
Mask <<= Mask.getBitWidth()-VTBits;
// Return # leading zeros. We use 'min' here in case Val was zero before
// shifting. We don't want to return '64' as for an i32 "0".
return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
}
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
!isa<ConstantSDNode>(Op.getOperand(1)))
return false;
if (Op.getOpcode() == ISD::OR &&
!MaskedValueIsZero(Op.getOperand(0),
cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
return false;
return true;
}
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
if (getTarget().Options.NoNaNsFPMath)
return true;
if (Op->getFlags().hasNoNaNs())
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return !C->getValueAPF().isNaN();
// TODO: Recognize more cases here.
return false;
}
bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
// If the value is a constant, we can obviously see if it is a zero or not.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return !C->isZero();
// TODO: Recognize more cases here.
switch (Op.getOpcode()) {
default: break;
case ISD::OR:
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
return !C->isNullValue();
break;
}
return false;
}
bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
// Check the obvious case.
if (A == B) return true;
// For for negative and positive zero.
if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
if (CA->isZero() && CB->isZero()) return true;
// Otherwise they may not be equal.
return false;
}
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
KnownBits AKnown, BKnown;
computeKnownBits(A, AKnown);
computeKnownBits(B, BKnown);
return (AKnown.Zero | BKnown.Zero).isAllOnesValue();
}
static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
SelectionDAG &DAG) {
assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
assert(llvm::all_of(Ops,
[Ops](SDValue Op) {
return Ops[0].getValueType() == Op.getValueType();
}) &&
"Concatenation of vectors with inconsistent value types!");
assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) ==
VT.getVectorNumElements() &&
"Incorrect element count in vector concatenation!");
if (Ops.size() == 1)
return Ops[0];
// Concat of UNDEFs is UNDEF.
if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
return DAG.getUNDEF(VT);
// A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
// simplified to one big BUILD_VECTOR.
// FIXME: Add support for SCALAR_TO_VECTOR as well.
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 16> Elts;
for (SDValue Op : Ops) {
EVT OpVT = Op.getValueType();
if (Op.isUndef())
Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT));
else if (Op.getOpcode() == ISD::BUILD_VECTOR)
Elts.append(Op->op_begin(), Op->op_end());
else
return SDValue();
}
// BUILD_VECTOR requires all inputs to be of the same type, find the
// maximum type and extend them all.
for (SDValue Op : Elts)
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
if (SVT.bitsGT(VT.getScalarType()))
for (SDValue &Op : Elts)
Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
? DAG.getZExtOrTrunc(Op, DL, SVT)
: DAG.getSExtOrTrunc(Op, DL, SVT);
return DAG.getBuildVector(VT, DL, Elts);
}
/// Gets or creates the specified node.
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, getVTList(VT), None);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(),
getVTList(VT));
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue Operand, const SDNodeFlags Flags) {
// Constant fold unary operations with an integer constant operand. Even
// opaque constant will be folded, because the folding of unary operations
// doesn't create new constants with different values. Nevertheless, the
// opaque flag is preserved during folding to prevent future folding with
// other constants.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) {
const APInt &Val = C->getAPIntValue();
switch (Opcode) {
default: break;
case ISD::SIGN_EXTEND:
return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::TRUNCATE:
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
APFloat apf(EVTToAPFloatSemantics(VT),
APInt::getNullValue(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
return getConstantFP(apf, DL, VT);
}
case ISD::BITCAST:
if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
break;
case ISD::ABS:
return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::BITREVERSE:
return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::BSWAP:
return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::CTPOP:
return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::FP16_TO_FP: {
bool Ignored;
APFloat FPV(APFloat::IEEEhalf(),
(Val.getBitWidth() == 16) ? Val : Val.trunc(16));
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
(void)FPV.convert(EVTToAPFloatSemantics(VT),
APFloat::rmNearestTiesToEven, &Ignored);
return getConstantFP(FPV, DL, VT);
}
}
}
// Constant fold unary operations with a floating point constant operand.
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) {
APFloat V = C->getValueAPF(); // make copy
switch (Opcode) {
case ISD::FNEG:
V.changeSign();
return getConstantFP(V, DL, VT);
case ISD::FABS:
V.clearSign();
return getConstantFP(V, DL, VT);
case ISD::FCEIL: {
APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
if (fs == APFloat::opOK || fs == APFloat::opInexact)
return getConstantFP(V, DL, VT);
break;
}
case ISD::FTRUNC: {
APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
if (fs == APFloat::opOK || fs == APFloat::opInexact)
return getConstantFP(V, DL, VT);
break;
}
case ISD::FFLOOR: {
APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
if (fs == APFloat::opOK || fs == APFloat::opInexact)
return getConstantFP(V, DL, VT);
break;
}
case ISD::FP_EXTEND: {
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
(void)V.convert(EVTToAPFloatSemantics(VT),
APFloat::rmNearestTiesToEven, &ignored);
return getConstantFP(V, DL, VT);
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
bool ignored;
APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
// FIXME need to be more flexible about rounding mode.
APFloat::opStatus s =
V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
break;
return getConstant(IntVal, DL, VT);
}
case ISD::BITCAST:
if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
break;
case ISD::FP_TO_FP16: {
bool Ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
(void)V.convert(APFloat::IEEEhalf(),
APFloat::rmNearestTiesToEven, &Ignored);
return getConstant(V.bitcastToAPInt(), DL, VT);
}
}
}
// Constant fold unary operations with a vector integer or float operand.
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) {
if (BV->isConstant()) {
switch (Opcode) {
default:
// FIXME: Entirely reasonable to perform folding of other unary
// operations here as the need arises.
break;
case ISD::FNEG:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FTRUNC:
case ISD::FFLOOR:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
SDValue Ops = { Operand };
if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
return Fold;
}
}
}
}
unsigned OpOpcode = Operand.getNode()->getOpcode();
switch (Opcode) {
case ISD::TokenFactor:
case ISD::MERGE_VALUES:
case ISD::CONCAT_VECTORS:
return Operand; // Factor, merge or concat of one node? No need.
case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
case ISD::FP_EXTEND:
assert(VT.isFloatingPoint() &&
Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
if (Operand.getValueType() == VT) return Operand; // noop conversion.
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid fpext node, dst < src!");
if (Operand.isUndef())
return getUNDEF(VT);
break;
case ISD::SIGN_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid sext node, dst < src!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
else if (OpOpcode == ISD::UNDEF)
// sext(undef) = 0, because the top bits will all be the same.
return getConstant(0, DL, VT);
break;
case ISD::ZERO_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ZERO_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid zext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0));
else if (OpOpcode == ISD::UNDEF)
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
break;
case ISD::ANY_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ANY_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid anyext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
else if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// (ext (trunx x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
SDValue OpOp = Operand.getOperand(0);
if (OpOp.getValueType() == VT)
return OpOp;
}
break;
case ISD::TRUNCATE:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid TRUNCATE!");
if (Operand.getValueType() == VT) return Operand; // noop truncate
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsGT(VT) &&
"Invalid truncate node, src < dst!");
if (OpOpcode == ISD::TRUNCATE)
return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
if (Operand.getOperand(0).getValueType().getScalarType()
.bitsLT(VT.getScalarType()))
return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
if (Operand.getOperand(0).getValueType().bitsGT(VT))
return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
return Operand.getOperand(0);
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
case ISD::ABS:
assert(VT.isInteger() && VT == Operand.getValueType() &&
"Invalid ABS!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
case ISD::BSWAP:
assert(VT.isInteger() && VT == Operand.getValueType() &&
"Invalid BSWAP!");
assert((VT.getScalarSizeInBits() % 16 == 0) &&
"BSWAP types must be a multiple of 16 bits!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
case ISD::BITREVERSE:
assert(VT.isInteger() && VT == Operand.getValueType() &&
"Invalid BITREVERSE!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
case ISD::BITCAST:
// Basic sanity checking.
assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
"Cannot BITCAST between types of different sizes!");
if (VT == Operand.getValueType()) return Operand; // noop conversion.
if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
case ISD::SCALAR_TO_VECTOR:
assert(VT.isVector() && !Operand.getValueType().isVector() &&
(VT.getVectorElementType() == Operand.getValueType() ||
(VT.getVectorElementType().isInteger() &&
Operand.getValueType().isInteger() &&
VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
"Illegal SCALAR_TO_VECTOR node!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(Operand.getOperand(1)) &&
Operand.getConstantOperandVal(1) == 0 &&
Operand.getOperand(0).getValueType() == VT)
return Operand.getOperand(0);
break;
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
// FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
Operand.getOperand(0), Operand.getNode()->getFlags());
if (OpOpcode == ISD::FNEG) // --X -> X
return Operand.getOperand(0);
break;
case ISD::FABS:
if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
break;
}
SDNode *N;
SDVTList VTs = getVTList(VT);
SDValue Ops[] = {Operand};
if (VT != MVT::Glue) { // Don't CSE flag producing nodes
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
E->intersectFlagsWith(Flags);
return SDValue(E, 0);
}
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
N->setFlags(Flags);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
createOperands(N, Ops);
}
InsertNode(N);
return SDValue(N, 0);
}
static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
const APInt &C2) {
switch (Opcode) {
case ISD::ADD: return std::make_pair(C1 + C2, true);
case ISD::SUB: return std::make_pair(C1 - C2, true);
case ISD::MUL: return std::make_pair(C1 * C2, true);
case ISD::AND: return std::make_pair(C1 & C2, true);
case ISD::OR: return std::make_pair(C1 | C2, true);
case ISD::XOR: return std::make_pair(C1 ^ C2, true);
case ISD::SHL: return std::make_pair(C1 << C2, true);
case ISD::SRL: return std::make_pair(C1.lshr(C2), true);
case ISD::SRA: return std::make_pair(C1.ashr(C2), true);
case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
case ISD::UDIV:
if (!C2.getBoolValue())
break;
return std::make_pair(C1.udiv(C2), true);
case ISD::UREM:
if (!C2.getBoolValue())
break;
return std::make_pair(C1.urem(C2), true);
case ISD::SDIV:
if (!C2.getBoolValue())
break;
return std::make_pair(C1.sdiv(C2), true);
case ISD::SREM:
if (!C2.getBoolValue())
break;
return std::make_pair(C1.srem(C2), true);
}
return std::make_pair(APInt(1, 0), false);
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
EVT VT, const ConstantSDNode *Cst1,
const ConstantSDNode *Cst2) {
if (Cst1->isOpaque() || Cst2->isOpaque())
return SDValue();
std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
Cst2->getAPIntValue());
if (!Folded.second)
return SDValue();
return getConstant(Folded.first, DL, VT);
}
SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
const GlobalAddressSDNode *GA,
const SDNode *N2) {
if (GA->getOpcode() != ISD::GlobalAddress)
return SDValue();
if (!TLI->isOffsetFoldingLegal(GA))
return SDValue();
const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2);
if (!Cst2)
return SDValue();
int64_t Offset = Cst2->getSExtValue();
switch (Opcode) {
case ISD::ADD: break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT,
GA->getOffset() + uint64_t(Offset));
}
bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
switch (Opcode) {
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM: {
// If a divisor is zero/undef or any element of a divisor vector is
// zero/undef, the whole op is undef.
assert(Ops.size() == 2 && "Div/rem should have 2 operands");
SDValue Divisor = Ops[1];
if (Divisor.isUndef() || isNullConstant(Divisor))
return true;
return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
llvm::any_of(Divisor->op_values(),
[](SDValue V) { return V.isUndef() ||
isNullConstant(V); });
// TODO: Handle signed overflow.
}
// TODO: Handle oversized shifts.
default:
return false;
}
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
EVT VT, SDNode *Cst1,
SDNode *Cst2) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
return getUNDEF(VT);
// Handle the case of two scalars.
if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2);
assert((!Folded || !VT.isVector()) &&
"Can't fold vectors ops with scalar operands");
return Folded;
}
}
// fold (add Sym, c) -> Sym+c
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1))
return FoldSymbolOffset(Opcode, VT, GA, Cst2);
if (TLI->isCommutativeBinOp(Opcode))
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
return FoldSymbolOffset(Opcode, VT, GA, Cst1);
// For vectors extract each constant element into Inputs so we can constant
// fold them individually.
BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
if (!BV1 || !BV2)
return SDValue();
assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 4> Outputs;
for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
SDValue V1 = BV1->getOperand(I);
SDValue V2 = BV2->getOperand(I);
// Avoid BUILD_VECTOR nodes that perform implicit truncation.
// FIXME: This is valid and could be handled by truncation.
if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
return SDValue();
// Fold one vector element.
SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
// Scalar folding only succeeded if the result is a constant or UNDEF.
if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
ScalarResult.getOpcode() != ISD::ConstantFP)
return SDValue();
Outputs.push_back(ScalarResult);
}
assert(VT.getVectorNumElements() == Outputs.size() &&
"Vector size mismatch!");
// We may have a vector type but a scalar result. Create a splat.
Outputs.resize(VT.getVectorNumElements(), Outputs.back());
// Build a big vector out of the scalar elements we generated.
return getBuildVector(VT, SDLoc(), Outputs);
}
SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
const SDNodeFlags Flags) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
if (isUndef(Opcode, Ops))
return getUNDEF(VT);
// We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
if (!VT.isVector())
return SDValue();
unsigned NumElts = VT.getVectorNumElements();
auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
return !Op.getValueType().isVector() ||
Op.getValueType().getVectorNumElements() == NumElts;
};
auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) ||
(BV && BV->isConstant());
};
// All operands must be vector types with the same number of elements as
// the result type and must be either UNDEF or a build vector of constant
// or UNDEF scalars.
if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) ||
!llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
// If we are comparing vectors, then the result needs to be a i1 boolean
// that is then sign-extended back to the legal result type.
EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());
// Find legal integer scalar type for constant promotion and
// ensure that its scalar size is at least as large as source.
EVT LegalSVT = VT.getScalarType();
if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
if (LegalSVT.bitsLT(VT.getScalarType()))
return SDValue();
}
// Constant fold each scalar lane separately.
SmallVector<SDValue, 4> ScalarResults;
for (unsigned i = 0; i != NumElts; i++) {
SmallVector<SDValue, 4> ScalarOps;
for (SDValue Op : Ops) {
EVT InSVT = Op.getValueType().getScalarType();
BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
if (!InBV) {
// We've checked that this is UNDEF or a constant of some kind.
if (Op.isUndef())
ScalarOps.push_back(getUNDEF(InSVT));
else
ScalarOps.push_back(Op);
continue;
}
SDValue ScalarOp = InBV->getOperand(i);
EVT ScalarVT = ScalarOp.getValueType();
// Build vector (integer) scalar operands may need implicit
// truncation - do this before constant folding.
if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
ScalarOps.push_back(ScalarOp);
}
// Constant fold the scalar operands.
SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
// Scalar folding only succeeded if the result is a constant or UNDEF.
if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
ScalarResult.getOpcode() != ISD::ConstantFP)
return SDValue();
ScalarResults.push_back(ScalarResult);
}
return getBuildVector(VT, DL, ScalarResults);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
// Canonicalize constant to RHS if commutative.
if (TLI->isCommutativeBinOp(Opcode)) {
if (N1C && !N2C) {
std::swap(N1C, N2C);
std::swap(N1, N2);
} else if (N1CFP && !N2CFP) {
std::swap(N1CFP, N2CFP);
std::swap(N1, N2);
}
}
switch (Opcode) {
default: break;
case ISD::TokenFactor:
assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
N2.getValueType() == MVT::Other && "Invalid token factor!");
// Fold trivial token factors.
if (N1.getOpcode() == ISD::EntryToken) return N2;
if (N2.getOpcode() == ISD::EntryToken) return N1;
if (N1 == N2) return N1;
break;
case ISD::CONCAT_VECTORS: {
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2};
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
}
case ISD::AND:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
// worth handling here.
if (N2C && N2C->isNullValue())
return N2;
if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
return N1;
break;
case ISD::OR:
case ISD::XOR:
case ISD::ADD:
case ISD::SUB:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
// (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
// it's worth handling here.
if (N2C && N2C->isNullValue())
return N1;
break;
case ISD::UDIV:
case ISD::UREM:
case ISD::MULHU:
case ISD::MULHS:
case ISD::MUL:
case ISD::SDIV:
case ISD::SREM:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
break;
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
if (getTarget().Options.UnsafeFPMath) {
if (Opcode == ISD::FADD) {
// x+0 --> x
if (N2CFP && N2CFP->getValueAPF().isZero())
return N1;
} else if (Opcode == ISD::FSUB) {
// x-0 --> x
if (N2CFP && N2CFP->getValueAPF().isZero())
return N1;
} else if (Opcode == ISD::FMUL) {
// x*0 --> 0
if (N2CFP && N2CFP->isZero())
return N2;
// x*1 --> x
if (N2CFP && N2CFP->isExactlyValue(1.0))
return N1;
}
}
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
break;
case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
assert(N1.getValueType() == VT &&
N1.getValueType().isFloatingPoint() &&
N2.getValueType().isFloatingPoint() &&
"Invalid FCOPYSIGN!");
break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
assert(VT == N1.getValueType() &&
"Shift operators return type must be the same as their first arg");
assert(VT.isInteger() && N2.getValueType().isInteger() &&
"Shifts only work on integers");
assert((!VT.isVector() || VT == N2.getValueType()) &&
"Vector shift amounts must be in the same as their first arg");
// Verify that the shift amount VT is bit enough to hold valid shift
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
// TLI.getShiftAmount().
assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) &&
"Invalid use of small shift amount with oversized value!");
// Always fold shifts of i1 values so the code generator doesn't need to
// handle them. Since we know the size of the shift has to be less than the
// size of the value, the shift/rotate count is guaranteed to be zero.
if (VT == MVT::i1)
return N1;
if (N2C && N2C->isNullValue())
return N1;
break;
case ISD::FP_ROUND_INREG: {
EVT EVT = cast<VTSDNode>(N2)->getVT();
assert(VT == N1.getValueType() && "Not an inreg round!");
assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
"Cannot FP_ROUND_INREG integer types");
assert(EVT.isVector() == VT.isVector() &&
"FP_ROUND_INREG type should be vector iff the operand "
"type is vector!");
assert((!EVT.isVector() ||
EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
"Vector element counts must match in FP_ROUND_INREG");
assert(EVT.bitsLE(VT) && "Not rounding down!");
(void)EVT;
if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
break;
}
case ISD::FP_ROUND:
assert(VT.isFloatingPoint() &&
N1.getValueType().isFloatingPoint() &&
VT.bitsLE(N1.getValueType()) &&
N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) &&
"Invalid FP_ROUND!");
if (N1.getValueType() == VT) return N1; // noop conversion.
break;
case ISD::AssertSext:
case ISD::AssertZext: {
EVT EVT = cast<VTSDNode>(N2)->getVT();
assert(VT == N1.getValueType() && "Not an inreg extend!");
assert(VT.isInteger() && EVT.isInteger() &&
"Cannot *_EXTEND_INREG FP types");
assert(!EVT.isVector() &&
"AssertSExt/AssertZExt type should be the vector element type "
"rather than the vector type!");
assert(EVT.bitsLE(VT) && "Not extending!");
if (VT == EVT) return N1; // noop assertion.
break;
}
case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(N2)->getVT();
assert(VT == N1.getValueType() && "Not an inreg extend!");
assert(VT.isInteger() && EVT.isInteger() &&
"Cannot *_EXTEND_INREG FP types");
assert(EVT.isVector() == VT.isVector() &&
"SIGN_EXTEND_INREG type should be vector iff the operand "
"type is vector!");
assert((!EVT.isVector() ||
EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
"Vector element counts must match in SIGN_EXTEND_INREG");
assert(EVT.bitsLE(VT) && "Not extending!");
if (EVT == VT) return N1; // Not actually extending
auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) {
unsigned FromBits = EVT.getScalarSizeInBits();
Val <<= Val.getBitWidth() - FromBits;
Val.ashrInPlace(Val.getBitWidth() - FromBits);
return getConstant(Val, DL, ConstantVT);
};
if (N1C) {
const APInt &Val = N1C->getAPIntValue();
return SignExtendInReg(Val, VT);
}
if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
SmallVector<SDValue, 8> Ops;
llvm::EVT OpVT = N1.getOperand(0).getValueType();
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
SDValue Op = N1.getOperand(i);
if (Op.isUndef()) {
Ops.push_back(getUNDEF(OpVT));
continue;
}
ConstantSDNode *C = cast<ConstantSDNode>(Op);
APInt Val = C->getAPIntValue();
Ops.push_back(SignExtendInReg(Val, OpVT));
}
return getBuildVector(VT, DL, Ops);
}
break;
}
case ISD::EXTRACT_VECTOR_ELT:
// EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
if (N1.isUndef())
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements())
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
// expanding copies of large vectors from registers.
if (N2C &&
N1.getOpcode() == ISD::CONCAT_VECTORS &&
N1.getNumOperands() > 0) {
unsigned Factor =
N1.getOperand(0).getValueType().getVectorNumElements();
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
N1.getOperand(N2C->getZExtValue() / Factor),
getConstant(N2C->getZExtValue() % Factor, DL,
N2.getValueType()));
}
// EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
// expanding large vector constants.
if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
SDValue Elt = N1.getOperand(N2C->getZExtValue());
if (VT != Elt.getValueType())
// If the vector element type is not legal, the BUILD_VECTOR operands
// are promoted and implicitly truncated, and the result implicitly
// extended. Make that explicit here.
Elt = getAnyExtOrTrunc(Elt, DL, VT);
return Elt;
}
// EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
// operations are lowered to scalars.
if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
// If the indices are the same, return the inserted element else
// if the indices are known different, extract the element from
// the original vector.
SDValue N1Op2 = N1.getOperand(2);
ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2);
if (N1Op2C && N2C) {
if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
if (VT == N1.getOperand(1).getValueType())
return N1.getOperand(1);
else
return getSExtOrTrunc(N1.getOperand(1), DL, VT);
}
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
}
}
break;
case ISD::EXTRACT_ELEMENT:
assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
assert(!N1.getValueType().isVector() && !VT.isVector() &&
(N1.getValueType().isInteger() == VT.isInteger()) &&
N1.getValueType() != VT &&
"Wrong types for EXTRACT_ELEMENT!");
// EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
// 64-bit integers into 32-bit parts. Instead of building the extract of
// the BUILD_PAIR, only to have legalize rip it apart, just do it now.
if (N1.getOpcode() == ISD::BUILD_PAIR)
return N1.getOperand(N2C->getZExtValue());
// EXTRACT_ELEMENT of a constant int is also very common.
if (N1C) {
unsigned ElementSize = VT.getSizeInBits();
unsigned Shift = ElementSize * N2C->getZExtValue();
APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift);
return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
}
break;
case ISD::EXTRACT_SUBVECTOR:
if (VT.isSimple() && N1.getValueType().isSimple()) {
assert(VT.isVector() && N1.getValueType().isVector() &&
"Extract subvector VTs must be a vectors!");
assert(VT.getVectorElementType() ==
N1.getValueType().getVectorElementType() &&
"Extract subvector VTs must have the same element type!");
assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
"Extract subvector must be from larger vector to smaller vector!");
if (N2C) {
assert((VT.getVectorNumElements() + N2C->getZExtValue()
<= N1.getValueType().getVectorNumElements())
&& "Extract subvector overflow!");
}
// Trivial extraction.
if (VT.getSimpleVT() == N1.getSimpleValueType())
return N1;
// EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
if (N1.isUndef())
return getUNDEF(VT);
// EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
// the concat have the same type as the extract.
if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
N1.getNumOperands() > 0 &&
VT == N1.getOperand(0).getValueType()) {
unsigned Factor = VT.getVectorNumElements();
return N1.getOperand(N2C->getZExtValue() / Factor);
}
// EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
// during shuffle legalization.
if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
VT == N1.getOperand(1).getValueType())
return N1.getOperand(1);
}
break;
}
// Perform trivial constant folding.
if (SDValue SV =
FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
return SV;
// Constant fold FP operations.
bool HasFPExceptions = TLI->hasFloatingPointExceptions();
if (N1CFP) {
if (N2CFP) {
APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
APFloat::opStatus s;
switch (Opcode) {
case ISD::FADD:
s = V1.add(V2, APFloat::rmNearestTiesToEven);
if (!HasFPExceptions || s != APFloat::opInvalidOp)
return getConstantFP(V1, DL, VT);
break;
case ISD::FSUB:
s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
if (!HasFPExceptions || s!=APFloat::opInvalidOp)
return getConstantFP(V1, DL, VT);
break;
case ISD::FMUL:
s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
if (!HasFPExceptions || s!=APFloat::opInvalidOp)
return getConstantFP(V1, DL, VT);
break;
case ISD::FDIV:
s = V1.divide(V2, APFloat::rmNearestTiesToEven);
if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
s!=APFloat::opDivByZero)) {
return getConstantFP(V1, DL, VT);
}
break;
case ISD::FREM :
s = V1.mod(V2);
if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
s!=APFloat::opDivByZero)) {
return getConstantFP(V1, DL, VT);
}
break;
case ISD::FCOPYSIGN:
V1.copySign(V2);
return getConstantFP(V1, DL, VT);
default: break;
}
}
if (Opcode == ISD::FP_ROUND) {
APFloat V = N1CFP->getValueAPF(); // make copy
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
(void)V.convert(EVTToAPFloatSemantics(VT),
APFloat::rmNearestTiesToEven, &ignored);
return getConstantFP(V, DL, VT);
}
}
// Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) {
if (TLI->isCommutativeBinOp(Opcode)) {
std::swap(N1, N2);
} else {
switch (Opcode) {
case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG:
case ISD::SUB:
case ISD::FSUB:
case ISD::FDIV:
case ISD::FREM:
case ISD::SRA:
return N1; // fold op(undef, arg2) -> undef
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
case ISD::SRL:
case ISD::SHL:
if (!VT.isVector())
return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
// For vectors, we can't easily build an all zero vector, just return
// the LHS.
return N2;
}
}
}
// Fold a bunch of operators when the RHS is undef.
if (N2.isUndef()) {
switch (Opcode) {
case ISD::XOR:
if (N1.isUndef())
// Handle undef ^ undef -> 0 special case. This is a common
// idiom (misuse).
return getConstant(0, DL, VT);
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUB:
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
return N2; // fold op(arg1, undef) -> undef
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
if (getTarget().Options.UnsafeFPMath)
return N2;
break;
case ISD::MUL:
case ISD::AND:
case ISD::SRL:
case ISD::SHL:
if (!VT.isVector())
return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
// For vectors, we can't easily build an all zero vector, just return
// the LHS.
return N1;
case ISD::OR:
if (!VT.isVector())
return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
// For vectors, we can't easily build an all one vector, just return
// the LHS.
return N1;
case ISD::SRA:
return N1;
}
}
// Memoize this node if possible.
SDNode *N;
SDVTList VTs = getVTList(VT);
SDValue Ops[] = {N1, N2};
if (VT != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
E->intersectFlagsWith(Flags);
return SDValue(E, 0);
}
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
N->setFlags(Flags);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
createOperands(N, Ops);
}
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3) {
// Perform various simplifications.
switch (Opcode) {
case ISD::FMA: {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3);
if (N1CFP && N2CFP && N3CFP) {
APFloat V1 = N1CFP->getValueAPF();
const APFloat &V2 = N2CFP->getValueAPF();
const APFloat &V3 = N3CFP->getValueAPF();
APFloat::opStatus s =
V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
return getConstantFP(V1, DL, VT);
}
break;
}
case ISD::CONCAT_VECTORS: {
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2, N3};
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
}
case ISD::SETCC: {
// Use FoldSetCC to simplify SETCC's.
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
return V;
// Vector constant folding.
SDValue Ops[] = {N1, N2, N3};
if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
return V;
break;
}
case ISD::SELECT:
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
if (N1C->getZExtValue())
return N2; // select true, X, Y -> X
return N3; // select false, X, Y -> Y
}
if (N2 == N3) return N2; // select C, X, X -> X
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
case ISD::INSERT_VECTOR_ELT: {
ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
return getUNDEF(VT);
break;
}
case ISD::INSERT_SUBVECTOR: {
SDValue Index = N3;
if (VT.isSimple() && N1.getValueType().isSimple()
&& N2.getValueType().isSimple()) {
assert(VT.isVector() && N1.getValueType().isVector() &&
N2.getValueType().isVector() &&
"Insert subvector VTs must be a vectors");
assert(VT == N1.getValueType() &&
"Dest and insert subvector source types must match!");
assert(N2.getSimpleValueType() <= N1.getSimpleValueType() &&
"Insert subvector must be from smaller vector to larger vector!");
if (isa<ConstantSDNode>(Index)) {
assert((N2.getValueType().getVectorNumElements() +
cast<ConstantSDNode>(Index)->getZExtValue()
<= VT.getVectorNumElements())
&& "Insert subvector overflow!");
}
// Trivial insertion.
if (VT.getSimpleVT() == N2.getSimpleValueType())
return N2;
}
break;
}
case ISD::BITCAST:
// Fold bit_convert nodes from a type to themselves.
if (N1.getValueType() == VT)
return N1;
break;
}
// Memoize node if it doesn't produce a flag.
SDNode *N;
SDVTList VTs = getVTList(VT);
SDValue Ops[] = {N1, N2, N3};
if (VT != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
createOperands(N, Ops);
}
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
SDValue Ops[] = { N1, N2, N3, N4 };
return getNode(Opcode, DL, VT, Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3, SDValue N4,
SDValue N5) {
SDValue Ops[] = { N1, N2, N3, N4, N5 };
return getNode(Opcode, DL, VT, Ops);
}
/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
/// the incoming stack arguments to be loaded from the stack.
SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
SmallVector<SDValue, 8> ArgChains;
// Include the original chain at the beginning of the list. When this is
// used by target LowerCall hooks, this helps legalize find the
// CALLSEQ_BEGIN node.
ArgChains.push_back(Chain);
// Add a chain value for each stack argument.
for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0)
ArgChains.push_back(SDValue(L, 1));
// Build a tokenfactor for all the chains.
return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
}
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
const SDLoc &dl) {
assert(!Value.isUndef());
unsigned NumBits = VT.getScalarSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
assert(C->getAPIntValue().getBitWidth() == 8);
APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
if (VT.isInteger())
return DAG.getConstant(Val, dl, VT);
return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
VT);
}
assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?");
EVT IntVT = VT.getScalarType();
if (!IntVT.isInteger())
IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits());
Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);
if (NumBits > 8) {
// Use a multiplication with 0x010101... to extend the input to the
// required length.
APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
Value = DAG.getNode(ISD::MUL, dl, IntVT, Value,
DAG.getConstant(Magic, dl, IntVT));
}
if (VT != Value.getValueType() && !VT.isInteger())
Value = DAG.getBitcast(VT.getScalarType(), Value);
if (VT != Value.getValueType())
Value = DAG.getSplatBuildVector(VT, dl, Value);
return Value;
}
/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
/// used when a memcpy is turned into a memset when the source is a constant
/// string ptr.
static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
const TargetLowering &TLI,
const ConstantDataArraySlice &Slice) {
// Handle vector with all elements zero.
if (Slice.Array == nullptr) {
if (VT.isInteger())
return DAG.getConstant(0, dl, VT);
else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
return DAG.getConstantFP(0.0, dl, VT);
else if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getConstant(0, dl,
EVT::getVectorVT(*DAG.getContext(),
EltVT, NumElts)));
} else
llvm_unreachable("Expected type!");
}
assert(!VT.isVector() && "Can't handle vector type here!");
unsigned NumVTBits = VT.getSizeInBits();
unsigned NumVTBytes = NumVTBits / 8;
unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length));
APInt Val(NumVTBits, 0);
if (DAG.getDataLayout().isLittleEndian()) {
for (unsigned i = 0; i != NumBytes; ++i)
Val |= (uint64_t)(unsigned char)Slice[i] << i*8;
} else {
for (unsigned i = 0; i != NumBytes; ++i)
Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8;
}
// If the "cost" of materializing the integer immediate is less than the cost
// of a load, then it is cost effective to turn the load into the immediate.
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
return DAG.getConstant(Val, dl, VT);
return SDValue(nullptr, 0);
}
SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset,
const SDLoc &DL) {
EVT VT = Base.getValueType();
return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT));
}
/// Returns true if memcpy source is constant data.
static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
uint64_t SrcDelta = 0;
GlobalAddressSDNode *G = nullptr;
if (Src.getOpcode() == ISD::GlobalAddress)
G = cast<GlobalAddressSDNode>(Src);
else if (Src.getOpcode() == ISD::ADD &&
Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
Src.getOperand(1).getOpcode() == ISD::Constant) {
G = cast<GlobalAddressSDNode>(Src.getOperand(0));
SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
}
if (!G)
return false;
return getConstantDataArrayInfo(G->getGlobal(), Slice, 8,
SrcDelta + G->getOffset());
}
/// Determines the optimal series of memory ops to replace the memset / memcpy.
/// Return true if the number of memory ops is below the threshold (Limit).
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
unsigned DstAS, unsigned SrcAS,
SelectionDAG &DAG,
const TargetLowering &TLI) {
assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
"Expecting memcpy / memset source to meet alignment requirement!");
// If 'SrcAlign' is zero, that means the memory operation does not need to
// load the value, i.e. memset or memcpy from constant string. Otherwise,
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
// is the specified alignment of the memory operation. If it is zero, that
// means it's possible to change the alignment of the destination.
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
IsMemset, ZeroMemset, MemcpyStrSrc,
DAG.getMachineFunction());
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
!TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());
// Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!TLI.isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
// If the type we've chosen is larger than the largest legal integer type
// then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}
unsigned NumMemOps = 0;
while (Size != 0) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
EVT NewVT = VT;
unsigned NewVTSize;
bool Found = false;
if (VT.isVector() || VT.isFloatingPoint()) {
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
TLI.isSafeMemOpType(NewVT.getSimpleVT()))
Found = true;
else if (NewVT == MVT::i64 &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
TLI.isSafeMemOpType(MVT::f64)) {
// i64 is usually not legal on 32-bit targets, but f64 may be.
NewVT = MVT::f64;
Found = true;
}
}
if (!Found) {
do {
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
if (NewVT == MVT::i8)
break;
} while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
}
NewVTSize = NewVT.getSizeInBits() / 8;
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
// FIXME: Only does this for 64-bit or more since we don't have proper
// cost model for unaligned load / store.
bool Fast;
if (NumMemOps && AllowOverlap &&
VTSize >= 8 && NewVTSize < Size &&
TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast)
VTSize = Size;
else {
VT = NewVT;
VTSize = NewVTSize;
}
}
if (++NumMemOps > Limit)
return false;
MemOps.push_back(VT);
Size -= VTSize;
}
return true;
}
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
if (MF.getTarget().getTargetTriple().isOSDarwin())
return MF.getFunction()->optForMinSize();
return MF.getFunction()->optForSize();
}
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, unsigned Align,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Turn a memcpy of undef to nop.
if (Src.isUndef())
return Chain;
// Expand memcpy to a series of load and store ops if the size operand falls
// below a certain threshold.
// TODO: In the AlwaysInline case, if the size is big then generate a loop
// rather than maybe a humongous number of loads and stores.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
ConstantDataArraySlice Slice;
bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroConstant ? 0 : SrcAlign),
false, false, CopyFromConstant, true,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(C);
unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
while (NewAlign > Align &&
DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign /= 2;
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
Align = NewAlign;
}
}
MachineMemOperand::Flags MMOFlags =
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
SmallVector<SDValue, 8> OutChains;
unsigned NumMemOps = MemOps.size();
uint64_t SrcOff = 0, DstOff = 0;
for (unsigned i = 0; i != NumMemOps; ++i) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value, Store;
if (VTSize > Size) {
// Issuing an unaligned load / store pair that overlaps with the previous
// pair. Adjust the offset accordingly.
assert(i == NumMemOps-1 && i != 0);
SrcOff -= VTSize - Size;
DstOff -= VTSize - Size;
}
if (CopyFromConstant &&
(isZeroConstant || (VT.isInteger() && !VT.isVector()))) {
// It's unlikely a store of a vector immediate can be done in a single
// instruction. It would require a load from a constantpool first.
// We only handle zero vectors here.
// FIXME: Handle other cases where store of vector immediate is done in
// a single instruction.
ConstantDataArraySlice SubSlice;
if (SrcOff < Slice.Length) {
SubSlice = Slice;
SubSlice.move(SrcOff);
} else {
// This is an out-of-bounds access and hence UB. Pretend we read zero.
SubSlice.Array = nullptr;
SubSlice.Offset = 0;
SubSlice.Length = VTSize;
}
Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
if (Value.getNode())
Store = DAG.getStore(Chain, dl, Value,
DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), Align,
MMOFlags);
}
if (!Store.getNode()) {
// The type might not be legal for the target. This should only happen
// if the type is smaller than a legal type, as on PPC, so the right
// thing to do is generate a LoadExt/StoreTrunc pair. These simplify
// to Load/Store if NVT==VT.
// FIXME does the case above also need this?
EVT NVT = TLI.getTypeToTransformTo(C, VT);
assert(NVT.bitsGE(VT));
bool isDereferenceable =
SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
OutChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
}
OutChains.push_back(Store);
SrcOff += VTSize;
DstOff += VTSize;
Size -= VTSize;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, unsigned Align,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Turn a memmove of undef to nop.
if (Src.isUndef())
return Chain;
// Expand memmove to a series of load and store ops if the size operand falls
// below a certain threshold.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align), SrcAlign,
false, false, false, false,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(C);
unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
Align = NewAlign;
}
}
MachineMemOperand::Flags MMOFlags =
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
uint64_t SrcOff = 0, DstOff = 0;
SmallVector<SDValue, 8> LoadValues;
SmallVector<SDValue, 8> LoadChains;
SmallVector<SDValue, 8> OutChains;
unsigned NumMemOps = MemOps.size();
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value;
bool isDereferenceable =
SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
Value =
DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
OutChains.clear();
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Store;
Store = DAG.getStore(Chain, dl, LoadValues[i],
DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
OutChains.push_back(Store);
DstOff += VTSize;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
/// \brief Lower the call to 'memset' intrinsic function into a series of store
/// operations.
///
/// \param DAG Selection DAG where lowered code is placed.
/// \param dl Link to corresponding IR location.
/// \param Chain Control flow dependency.
/// \param Dst Pointer to destination memory location.
/// \param Src Value of byte to write into the memory.
/// \param Size Number of bytes to write.
/// \param Align Alignment of the destination in bytes.
/// \param isVol True if destination is volatile.
/// \param DstPtrInfo IR information on the memory pointer.
/// \returns New head in the control flow, if lowering was successful, empty
/// SDValue otherwise.
///
/// The function tries to replace 'llvm.memset' intrinsic with several store
/// operations and value calculation code. This is usually profitable for small
/// memory size.
static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, unsigned Align, bool isVol,
MachinePointerInfo DstPtrInfo) {
// Turn a memset of undef to nop.
if (Src.isUndef())
return Chain;
// Expand memset to a series of load/store ops if the size operand
// falls below a certain threshold.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
true, IsZeroVal, false, true,
DstPtrInfo.getAddrSpace(), ~0u,
DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
Align = NewAlign;
}
}
SmallVector<SDValue, 8> OutChains;
uint64_t DstOff = 0;
unsigned NumMemOps = MemOps.size();
// Find the largest store and generate the bit pattern for it.
EVT LargestVT = MemOps[0];
for (unsigned i = 1; i < NumMemOps; i++)
if (MemOps[i].bitsGT(LargestVT))
LargestVT = MemOps[i];
SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
if (VTSize > Size) {
// Issuing an unaligned load / store pair that overlaps with the previous
// pair. Adjust the offset accordingly.
assert(i == NumMemOps-1 && i != 0);
DstOff -= VTSize - Size;
}
// If this store is smaller than the largest store see whether we can get
// the smaller value for free with a truncate.
SDValue Value = MemSetValue;
if (VT.bitsLT(LargestVT)) {
if (!LargestVT.isVector() && !VT.isVector() &&
TLI.isTruncateFree(LargestVT, VT))
Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
else
Value = getMemsetValue(Src, VT, DAG, dl);
}
assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), Align,
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone);
OutChains.push_back(Store);
DstOff += VT.getSizeInBits() / 8;
Size -= VTSize;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
unsigned AS) {
// Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
// pointer operands can be losslessly bitcasted to pointers of address space 0
if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
report_fatal_error("cannot lower memory intrinsic in address space " +
Twine(AS));
}
}
SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, unsigned Align,
bool isVol, bool AlwaysInline, bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memcpy with size zero? Just return the original chain.
if (ConstantSize->isNullValue())
return Chain;
SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(),Align,
isVol, false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
// Then check to see if we should lower the memcpy with target-specific
// code. If the target chooses to do this, this is the next best.
if (TSI) {
SDValue Result = TSI->EmitTargetCodeForMemcpy(
*this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
// If we really need inline code and the target declined to provide it,
// use a (potentially long) sequence of loads and stores.
if (AlwaysInline) {
assert(ConstantSize && "AlwaysInline requires a constant size!");
return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Align, isVol,
true, DstPtrInfo, SrcPtrInfo);
}
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
// FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
// memcpy is not guaranteed to be safe. libc memcpys aren't required to
// respect volatile, so they may do things like read or write memory
// beyond the given memory regions. But fixing this isn't easy, and most
// people don't care.
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Ty = getDataLayout().getIntPtrType(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
Dst.getValueType().getTypeForEVT(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
TLI->getPointerTy(getDataLayout())),
std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, unsigned Align,
bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memmove with size zero? Just return the original chain.
if (ConstantSize->isNullValue())
return Chain;
SDValue Result =
getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Align, isVol,
false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
if (TSI) {
SDValue Result = TSI->EmitTargetCodeForMemmove(
*this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
// not be safe. See memcpy above for more details.
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Ty = getDataLayout().getIntPtrType(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
Dst.getValueType().getTypeForEVT(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
TLI->getPointerTy(getDataLayout())),
std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, unsigned Align,
bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo) {
assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memset with size zero? Just return the original chain.
if (ConstantSize->isNullValue())
return Chain;
SDValue Result =
getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
Align, isVol, DstPtrInfo);
if (Result.getNode())
return Result;
}
// Then check to see if we should lower the memset with target-specific
// code. If the target chooses to do this, this is the next best.
if (TSI) {
SDValue Result = TSI->EmitTargetCodeForMemset(
*this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
if (Result.getNode())
return Result;
}
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
// Emit a library call.
Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst; Entry.Ty = IntPtrTy;
Args.push_back(Entry);
Entry.Node = Src;
Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
Args.push_back(Entry);
Entry.Node = Size;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
Dst.getValueType().getTypeForEVT(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
TLI->getPointerTy(getDataLayout())),
std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDVTList VTList, ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
FoldingSetNodeID ID;
ID.AddInteger(MemVT.getRawBits());
AddNodeIDNode(ID, Opcode, VTList, Ops);
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
VTList, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getAtomicCmpSwap(
unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment, AtomicOrdering SuccessOrdering,
AtomicOrdering FailureOrdering, SyncScope::ID SSID) {
assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
// FIXME: Volatile isn't really correct; we should keep track of atomic
// orderings in the memoperand.
auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
AAMDNodes(), nullptr, SSID, SuccessOrdering,
FailureOrdering);
return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO);
}
SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
EVT MemVT, SDVTList VTs, SDValue Chain,
SDValue Ptr, SDValue Cmp, SDValue Swp,
MachineMemOperand *MMO) {
assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Val,
const Value *PtrVal, unsigned Alignment,
AtomicOrdering Ordering,
SyncScope::ID SSID) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
// An atomic store does not load. An atomic load does not store.
// (An atomicrmw obviously both loads and stores.)
// For now, atomics are considered to be volatile always, and they are
// chained as such.
// FIXME: Volatile isn't really correct; we should keep track of atomic
// orderings in the memoperand.
auto Flags = MachineMemOperand::MOVolatile;
if (Opcode != ISD::ATOMIC_STORE)
Flags |= MachineMemOperand::MOLoad;
if (Opcode != ISD::ATOMIC_LOAD)
Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
MemVT.getStoreSize(), Alignment, AAMDNodes(),
nullptr, SSID, Ordering);
return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Val,
MachineMemOperand *MMO) {
assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
Opcode == ISD::ATOMIC_LOAD_SUB ||
Opcode == ISD::ATOMIC_LOAD_AND ||
Opcode == ISD::ATOMIC_LOAD_OR ||
Opcode == ISD::ATOMIC_LOAD_XOR ||
Opcode == ISD::ATOMIC_LOAD_NAND ||
Opcode == ISD::ATOMIC_LOAD_MIN ||
Opcode == ISD::ATOMIC_LOAD_MAX ||
Opcode == ISD::ATOMIC_LOAD_UMIN ||
Opcode == ISD::ATOMIC_LOAD_UMAX ||
Opcode == ISD::ATOMIC_SWAP ||
Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
EVT VT = Val.getValueType();
SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
getVTList(VT, MVT::Other);
SDValue Ops[] = {Chain, Ptr, Val};
return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
EVT VT, SDValue Chain, SDValue Ptr,
MachineMemOperand *MMO) {
assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = {Chain, Ptr};
return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
if (Ops.size() == 1)
return Ops[0];
SmallVector<EVT, 4> VTs;
VTs.reserve(Ops.size());
for (unsigned i = 0; i < Ops.size(); ++i)
VTs.push_back(Ops[i].getValueType());
return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops);
}
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol,
bool ReadMem, bool WriteMem, unsigned Size) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
auto Flags = MachineMemOperand::MONone;
if (WriteMem)
Flags |= MachineMemOperand::MOStore;
if (ReadMem)
Flags |= MachineMemOperand::MOLoad;
if (Vol)
Flags |= MachineMemOperand::MOVolatile;
if (!Size)
Size = MemVT.getStoreSize();
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
}
SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
SDVTList VTList,
ArrayRef<SDValue> Ops, EVT MemVT,
MachineMemOperand *MMO) {
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::PREFETCH ||
Opcode == ISD::LIFETIME_START ||
Opcode == ISD::LIFETIME_END ||
((int)Opcode <= std::numeric_limits<int>::max() &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
// Memoize the node unless it returns a flag.
MemIntrinsicSDNode *N;
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops);
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
VTList, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
VTList, MemVT, MMO);
createOperands(N, Ops);
}
InsertNode(N);
return SDValue(N, 0);
}
/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
int64_t Offset = 0) {
// If this is FI+Offset, we can model it.
if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
FI->getIndex(), Offset);
// If this is (FI+Offset1)+Offset2, we can model it.
if (Ptr.getOpcode() != ISD::ADD ||
!isa<ConstantSDNode>(Ptr.getOperand(1)) ||
!isa<FrameIndexSDNode>(Ptr.getOperand(0)))
return MachinePointerInfo();
int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
return MachinePointerInfo::getFixedStack(
DAG.getMachineFunction(), FI,
Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
}
/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
SDValue OffsetOp) {
// If the 'Offset' value isn't a constant, we can't handle this.
if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
if (OffsetOp.isUndef())
return InferPointerInfo(DAG, Ptr);
return MachinePointerInfo();
}
SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
MachinePointerInfo PtrInfo, EVT MemVT,
unsigned Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo, const MDNode *Ranges) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
MMOFlags |= MachineMemOperand::MOLoad;
assert((MMOFlags & MachineMemOperand::MOStore) == 0);
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(*this, Ptr, Offset);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Offset, EVT MemVT,
MachineMemOperand *MMO) {
if (VT == MemVT) {
ExtType = ISD::NON_EXTLOAD;
} else if (ExtType == ISD::NON_EXTLOAD) {
assert(VT == MemVT && "Non-extending load from different memory type!");
} else {
// Extending load.
assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
"Should only be an extending load, not truncating!");
assert(VT.isInteger() == MemVT.isInteger() &&
"Cannot convert from FP to Int or Int -> FP!");
assert(VT.isVector() == MemVT.isVector() &&
"Cannot use an ext load to convert to or from a vector!");
assert((!VT.isVector() ||
VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
"Cannot use an ext load to change the number of vector elements!");
}
bool Indexed = AM != ISD::UNINDEXED;
assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
SDVTList VTs = Indexed ?
getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Offset };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::LOAD, VTs, Ops);
ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>(
dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
ExtType, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, MachinePointerInfo PtrInfo,
unsigned Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo, const MDNode *Ranges) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges);
}
SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, MachineMemOperand *MMO) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
VT, MMO);
}
SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
EVT VT, SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo, EVT MemVT,
unsigned Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo,
MemVT, Alignment, MMOFlags, AAInfo);
}
SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT,
MachineMemOperand *MMO) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
MemVT, MMO);
}
SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
SDValue Base, SDValue Offset,
ISD::MemIndexedMode AM) {
LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
// Don't propagate the invariant or dereferenceable flags.
auto MMOFlags =
LD->getMemOperand()->getFlags() &
~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getPointerInfo(),
LD->getMemoryVT(), LD->getAlignment(), MMOFlags,
LD->getAAInfo());
}
SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
unsigned Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(Val.getValueType());
MMOFlags |= MachineMemOperand::MOStore;
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(*this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
}
SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachineMemOperand *MMO) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
EVT VT = Val.getValueType();
SDVTList VTs = getVTList(MVT::Other);
SDValue Undef = getUNDEF(Ptr.getValueType());
SDValue Ops[] = { Chain, Val, Ptr, Undef };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
ISD::UNINDEXED, false, VT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
EVT SVT, unsigned Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(SVT);
MMOFlags |= MachineMemOperand::MOStore;
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(*this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, EVT SVT,
MachineMemOperand *MMO) {
EVT VT = Val.getValueType();
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (VT == SVT)
return getStore(Chain, dl, Val, Ptr, MMO);
assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
"Should only be a truncating store, not extending!");
assert(VT.isInteger() == SVT.isInteger() &&
"Can't do FP-INT conversion!");
assert(VT.isVector() == SVT.isVector() &&
"Cannot use trunc store to convert to or from a vector!");
assert((!VT.isVector() ||
VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
"Cannot use trunc store to change the number of vector elements!");
SDVTList VTs = getVTList(MVT::Other);
SDValue Undef = getUNDEF(Ptr.getValueType());
SDValue Ops[] = { Chain, Val, Ptr, Undef };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
ID.AddInteger(SVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
ISD::UNINDEXED, true, SVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
SDValue Base, SDValue Offset,
ISD::MemIndexedMode AM) {
StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
assert(ST->getOffset().isUndef() && "Store is already a indexed store!");
SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
ST->isTruncatingStore(), ST->getMemoryVT(),
ST->getMemOperand());
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Mask, SDValue Src0,
EVT MemVT, MachineMemOperand *MMO,
ISD::LoadExtType ExtTy, bool isExpanding) {
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
ExtTy, isExpanding, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
SDValue Val, SDValue Ptr, SDValue Mask,
EVT MemVT, MachineMemOperand *MMO,
bool IsTruncating, bool IsCompressing) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
EVT VT = Val.getValueType();
SDVTList VTs = getVTList(MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Val };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
IsTruncating, IsCompressing, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
assert(Ops.size() == 5 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
dl.getIROrder(), VTs, VT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
VTs, VT, MMO);
createOperands(N, Ops);
assert(N->getValue().getValueType() == N->getValueType(0) &&
"Incompatible type of the PassThru value in MaskedGatherSDNode");
assert(N->getMask().getValueType().getVectorNumElements() ==
N->getValueType(0).getVectorNumElements() &&
"Vector width mismatch between mask and data");
assert(N->getIndex().getValueType().getVectorNumElements() ==
N->getValueType(0).getVectorNumElements() &&
"Vector width mismatch between index and data");
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
assert(Ops.size() == 5 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
dl.getIROrder(), VTs, VT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
VTs, VT, MMO);
createOperands(N, Ops);
assert(N->getMask().getValueType().getVectorNumElements() ==
N->getValue().getValueType().getVectorNumElements() &&
"Vector width mismatch between mask and data");
assert(N->getIndex().getValueType().getVectorNumElements() ==
N->getValue().getValueType().getVectorNumElements() &&
"Vector width mismatch between index and data");
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue SV, unsigned Align) {
SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDUse> Ops) {
switch (Ops.size()) {
case 0: return getNode(Opcode, DL, VT);
case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0]));
case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
default: break;
}
// Copy from an SDUse array into an SDValue array for use with
// the regular getNode logic.
SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end());
return getNode(Opcode, DL, VT, NewOps);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
unsigned NumOps = Ops.size();
switch (NumOps) {
case 0: return getNode(Opcode, DL, VT);
case 1: return getNode(Opcode, DL, VT, Ops[0], Flags);
case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
default: break;
}
switch (Opcode) {
default: break;
case ISD::CONCAT_VECTORS:
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
case ISD::SELECT_CC:
assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
assert(Ops[0].getValueType() == Ops[1].getValueType() &&
"LHS and RHS of condition must have same type!");
assert(Ops[2].getValueType() == Ops[3].getValueType() &&
"True and False arms of SelectCC must have same type!");
assert(Ops[2].getValueType() == VT &&
"select_cc node must be of same type as true and false value!");
break;
case ISD::BR_CC:
assert(NumOps == 5 && "BR_CC takes 5 operands!");
assert(Ops[2].getValueType() == Ops[3].getValueType() &&
"LHS/RHS of comparison should match types!");
break;
}
// Memoize nodes.
SDNode *N;
SDVTList VTs = getVTList(VT);
if (VT != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
createOperands(N, Ops);
}
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) {
return getNode(Opcode, DL, getVTList(ResultTys), Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
ArrayRef<SDValue> Ops) {
if (VTList.NumVTs == 1)
return getNode(Opcode, DL, VTList.VTs[0], Ops);
#if 0
switch (Opcode) {
// FIXME: figure out how to safely handle things like
// int foo(int x) { return 1 << (x & 255); }
// int bar() { return foo(256); }
case ISD::SRA_PARTS:
case ISD::SRL_PARTS:
case ISD::SHL_PARTS:
if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
else if (N3.getOpcode() == ISD::AND)
if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
// If the and is only masking out bits that cannot effect the shift,
// eliminate the and.
unsigned NumBits = VT.getScalarSizeInBits()*2;
if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
}
break;
}
#endif
// Memoize the node unless it returns a flag.
SDNode *N;
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
createOperands(N, Ops);
}
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
SDVTList VTList) {
return getNode(Opcode, DL, VTList, None);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue N1) {
SDValue Ops[] = { N1 };
return getNode(Opcode, DL, VTList, Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue N1, SDValue N2) {
SDValue Ops[] = { N1, N2 };
return getNode(Opcode, DL, VTList, Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue N1, SDValue N2, SDValue N3) {
SDValue Ops[] = { N1, N2, N3 };
return getNode(Opcode, DL, VTList, Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
SDValue Ops[] = { N1, N2, N3, N4 };
return getNode(Opcode, DL, VTList, Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue N1, SDValue N2, SDValue N3, SDValue N4,
SDValue N5) {
SDValue Ops[] = { N1, N2, N3, N4, N5 };
return getNode(Opcode, DL, VTList, Ops);
}
SDVTList SelectionDAG::getVTList(EVT VT) {
return makeVTList(SDNode::getValueTypeList(VT), 1);
}
SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
FoldingSetNodeID ID;
ID.AddInteger(2U);
ID.AddInteger(VT1.getRawBits());
ID.AddInteger(VT2.getRawBits());
void *IP = nullptr;
SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
if (!Result) {
EVT *Array = Allocator.Allocate<EVT>(2);
Array[0] = VT1;
Array[1] = VT2;
Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2);
VTListMap.InsertNode(Result, IP);
}
return Result->getSDVTList();
}
SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
FoldingSetNodeID ID;
ID.AddInteger(3U);
ID.AddInteger(VT1.getRawBits());
ID.AddInteger(VT2.getRawBits());
ID.AddInteger(VT3.getRawBits());
void *IP = nullptr;
SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
if (!Result) {
EVT *Array = Allocator.Allocate<EVT>(3);
Array[0] = VT1;
Array[1] = VT2;
Array[2] = VT3;
Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3);
VTListMap.InsertNode(Result, IP);
}
return Result->getSDVTList();
}
SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
FoldingSetNodeID ID;
ID.AddInteger(4U);
ID.AddInteger(VT1.getRawBits());
ID.AddInteger(VT2.getRawBits());
ID.AddInteger(VT3.getRawBits());
ID.AddInteger(VT4.getRawBits());
void *IP = nullptr;
SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
if (!Result) {
EVT *Array = Allocator.Allocate<EVT>(4);
Array[0] = VT1;
Array[1] = VT2;
Array[2] = VT3;
Array[3] = VT4;
Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4);
VTListMap.InsertNode(Result, IP);
}
return Result->getSDVTList();
}
SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) {
unsigned NumVTs = VTs.size();
FoldingSetNodeID ID;
ID.AddInteger(NumVTs);
for (unsigned index = 0; index < NumVTs; index++) {
ID.AddInteger(VTs[index].getRawBits());
}
void *IP = nullptr;
SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
if (!Result) {
EVT *Array = Allocator.Allocate<EVT>(NumVTs);
std::copy(VTs.begin(), VTs.end(), Array);
Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
VTListMap.InsertNode(Result, IP);
}
return Result->getSDVTList();
}
/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
/// specified operands. If the resultant node already exists in the DAG,
/// this does not modify the specified node, instead it returns the node that
/// already exists. If the resultant node does not exist in the DAG, the
/// input node is returned. As a degenerate case, if you specify the same
/// input operands as the node already has, the input node is returned.
SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
// Check to see if there is no change.
if (Op == N->getOperand(0)) return N;
// See if the modified node already exists.
void *InsertPos = nullptr;
if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
if (!RemoveNodeFromCSEMaps(N))
InsertPos = nullptr;
// Now we update the operands.
N->OperandList[0].set(Op);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
}
SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
// Check to see if there is no change.
if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
return N; // No operands changed, just return the input node.
// See if the modified node already exists.
void *InsertPos = nullptr;
if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
if (!RemoveNodeFromCSEMaps(N))
InsertPos = nullptr;
// Now we update the operands.
if (N->OperandList[0] != Op1)
N->OperandList[0].set(Op1);
if (N->OperandList[1] != Op2)
N->OperandList[1].set(Op2);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
}
SDNode *SelectionDAG::
UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
SDValue Ops[] = { Op1, Op2, Op3 };
return UpdateNodeOperands(N, Ops);
}
SDNode *SelectionDAG::
UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
SDValue Op3, SDValue Op4) {
SDValue Ops[] = { Op1, Op2, Op3, Op4 };
return UpdateNodeOperands(N, Ops);
}
SDNode *SelectionDAG::
UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
SDValue Op3, SDValue Op4, SDValue Op5) {
SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
return UpdateNodeOperands(N, Ops);
}
SDNode *SelectionDAG::
UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
unsigned NumOps = Ops.size();
assert(N->getNumOperands() == NumOps &&
"Update with wrong number of operands");
// If no operands changed just return the input node.
if (std::equal(Ops.begin(), Ops.end(), N->op_begin()))
return N;
// See if the modified node already exists.
void *InsertPos = nullptr;
if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos))
return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
if (!RemoveNodeFromCSEMaps(N))
InsertPos = nullptr;
// Now we update the operands.
for (unsigned i = 0; i != NumOps; ++i)
if (N->OperandList[i] != Ops[i])
N->OperandList[i].set(Ops[i]);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
}
/// DropOperands - Release the operands and set this node to have
/// zero operands.
void SDNode::DropOperands() {
// Unlike the code in MorphNodeTo that does this, we don't need to
// watch for dead nodes here.
for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
SDUse &Use = *I++;
Use.set(SDValue());
}
}
/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
/// machine opcode.
///
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT) {
SDVTList VTs = getVTList(VT);
return SelectNodeTo(N, MachineOpc, VTs, None);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT, SDValue Op1) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1 };
return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT, SDValue Op1,
SDValue Op2) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2 };
return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT, SDValue Op1,
SDValue Op2, SDValue Op3) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2, Op3 };
return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT, ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT);
return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2);
return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT1, EVT VT2) {
SDVTList VTs = getVTList(VT1, VT2);
return SelectNodeTo(N, MachineOpc, VTs, None);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT1, EVT VT2, EVT VT3,
ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT1, EVT VT2,
SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2 };
return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDVTList VTs,ArrayRef<SDValue> Ops) {
SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
// Reset the NodeID to -1.
New->setNodeId(-1);
if (New != N) {
ReplaceAllUsesWith(N, New);
RemoveDeadNode(N);
}
return New;
}
/// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away
/// the line number information on the merged node since it is not possible to
/// preserve the information that operation is associated with multiple lines.
/// This will make the debugger working better at -O0, were there is a higher
/// probability having other instructions associated with that line.
///
/// For IROrder, we keep the smaller of the two
SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) {
DebugLoc NLoc = N->getDebugLoc();
if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
N->setDebugLoc(DebugLoc());
}
unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
N->setIROrder(Order);
return N;
}
/// MorphNodeTo - This *mutates* the specified node to have the specified
/// return type, opcode, and operands.
///
/// Note that MorphNodeTo returns the resultant node. If there is already a
/// node of the specified opcode and operands, it returns that node instead of
/// the current one. Note that the SDLoc need not be the same.
///
/// Using MorphNodeTo is faster than creating a new node and swapping it in
/// with ReplaceAllUsesWith both because it often avoids allocating a new
/// node, and because it doesn't require CSE recalculation for any of
/// the node's users.
///
/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG.
/// As a consequence it isn't appropriate to use from within the DAG combiner or
/// the legalizer which maintain worklists that would need to be updated when
/// deleting things.
SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
SDVTList VTs, ArrayRef<SDValue> Ops) {
// If an identical node already exists, use it.
void *IP = nullptr;
if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops);
if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP))
return UpdateSDLocOnMergeSDNode(ON, SDLoc(N));
}
if (!RemoveNodeFromCSEMaps(N))
IP = nullptr;
// Start the morphing.
N->NodeType = Opc;
N->ValueList = VTs.VTs;
N->NumValues = VTs.NumVTs;
// Clear the operands list, updating used nodes to remove this from their
// use list. Keep track of any operands that become dead as a result.
SmallPtrSet<SDNode*, 16> DeadNodeSet;
for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
SDUse &Use = *I++;
SDNode *Used = Use.getNode();
Use.set(SDValue());
if (Used->use_empty())
DeadNodeSet.insert(Used);
}
// For MachineNode, initialize the memory references information.
if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N))
MN->setMemRefs(nullptr, nullptr);
// Swap for an appropriately sized array from the recycler.
removeOperands(N);
createOperands(N, Ops);
// Delete any nodes that are still dead after adding the uses for the
// new operands.
if (!DeadNodeSet.empty()) {
SmallVector<SDNode *, 16> DeadNodes;
for (SDNode *N : DeadNodeSet)
if (N->use_empty())
DeadNodes.push_back(N);
RemoveDeadNodes(DeadNodes);
}
if (IP)
CSEMap.InsertNode(N, IP); // Memoize the new node.
return N;
}
SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
unsigned OrigOpc = Node->getOpcode();
unsigned NewOpc;
bool IsUnary = false;
switch (OrigOpc) {
default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break;
case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break;
case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break;
case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break;
case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break;
case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break;
case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break;
case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break;
case ISD::STRICT_FNEARBYINT:
NewOpc = ISD::FNEARBYINT;
IsUnary = true;
break;
}
// We're taking this node out of the chain, so we need to re-link things.
SDValue InputChain = Node->getOperand(0);
SDValue OutputChain = SDValue(Node, 1);
ReplaceAllUsesOfValueWith(OutputChain, InputChain);
SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
SDNode *Res = nullptr;
if (IsUnary)
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
else
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
Node->getOperand(2) });
// MorphNodeTo can operate in two ways: if an existing node with the
// specified operands exists, it can just return it. Otherwise, it
// updates the node in place to have the requested operands.
if (Res == Node) {
// If we updated the node in place, reset the node ID. To the isel,
// this should be just like a newly allocated machine node.
Res->setNodeId(-1);
} else {
ReplaceAllUsesWith(Node, Res);
RemoveDeadNode(Node);
}
return Res;
}
/// getMachineNode - These are used for target selectors to create a new node
/// with specified return type(s), MachineInstr opcode, and operands.
///
/// Note that getMachineNode returns the resultant node. If there is already a
/// node of the specified opcode and operands, it returns that node instead of
/// the current one.
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT) {
SDVTList VTs = getVTList(VT);
return getMachineNode(Opcode, dl, VTs, None);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT, SDValue Op1) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT, SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT, SDValue Op1, SDValue Op2,
SDValue Op3) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2, Op3 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT, ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT);
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT1, EVT VT2, SDValue Op1,
SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT1, EVT VT2, SDValue Op1,
SDValue Op2, SDValue Op3) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2, Op3 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT1, EVT VT2,
ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2);
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT1, EVT VT2, EVT VT3,
SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
SDValue Ops[] = { Op1, Op2 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT1, EVT VT2, EVT VT3,
SDValue Op1, SDValue Op2,
SDValue Op3) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
SDValue Ops[] = { Op1, Op2, Op3 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT1, EVT VT2, EVT VT3,
ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
ArrayRef<EVT> ResultTys,
ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(ResultTys);
return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
SDVTList VTs,
ArrayRef<SDValue> Ops) {
bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
MachineSDNode *N;
void *IP = nullptr;
if (DoCSE) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ~Opcode, VTs, Ops);
IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL));
}
}
// Allocate a new MachineSDNode.
N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
createOperands(N, Ops);
if (DoCSE)
CSEMap.InsertNode(N, IP);
InsertNode(N);
return N;
}
/// getTargetExtractSubreg - A convenience function for creating
/// TargetOpcode::EXTRACT_SUBREG nodes.
SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT,
SDValue Operand) {
SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
VT, Operand, SRIdxVal);
return SDValue(Subreg, 0);
}
/// getTargetInsertSubreg - A convenience function for creating
/// TargetOpcode::INSERT_SUBREG nodes.
SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
SDValue Operand, SDValue Subreg) {
SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
VT, Operand, Subreg, SRIdxVal);
return SDValue(Result, 0);
}
/// getNodeIfExists - Get the specified node if it's already available, or
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
ArrayRef<SDValue> Ops,
const SDNodeFlags Flags) {
if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) {
E->intersectFlagsWith(Flags);
return E;
}
}
return nullptr;
}
/// getDbgValue - Creates a SDDbgValue node.
///
/// SDNode
SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
unsigned R, bool IsIndirect, uint64_t Off,
const DebugLoc &DL, unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc())
SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
}
/// Constant
SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
const Value *C, uint64_t Off,
const DebugLoc &DL, unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O);
}
/// FrameIndex
SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
unsigned FI, uint64_t Off,
const DebugLoc &DL,
unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O);
}
namespace {
/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
/// pointed to by a use iterator is deleted, increment the use iterator
/// so that it doesn't dangle.
///
class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
SDNode::use_iterator &UI;
SDNode::use_iterator &UE;
void NodeDeleted(SDNode *N, SDNode *E) override {
// Increment the iterator as needed.
while (UI != UE && N == *UI)
++UI;
}
public:
RAUWUpdateListener(SelectionDAG &d,
SDNode::use_iterator &ui,
SDNode::use_iterator &ue)
: SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
};
} // end anonymous namespace
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
/// This can cause recursive merging of nodes in the DAG.
///
/// This version assumes From has a single result value.
///
void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
SDNode *From = FromN.getNode();
assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
"Cannot replace with this method!");
assert(From != To.getNode() && "Cannot replace uses of with self");
// Preserve Debug Values
TransferDbgValues(FromN, To);
// Iterate over all the existing uses of From. New uses will be added
// to the beginning of the use list, which we avoid visiting.
// This specifically avoids visiting uses of From that arise while the
// replacement is happening, because any such uses would be the result
// of CSE: If an existing node looks like From after one of its operands
// is replaced by To, we don't want to replace of all its users with To
// too. See PR3018 for more info.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(User);
// A user can appear in a use list multiple times, and when this
// happens the uses are usually next to each other in the list.
// To help reduce the number of CSE recomputations, process all
// the uses of this user that we can find this way.
do {
SDUse &Use = UI.getUse();
++UI;
Use.set(To);
} while (UI != UE && *UI == User);
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
if (FromN == getRoot())
setRoot(To);
}
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
/// This can cause recursive merging of nodes in the DAG.
///
/// This version assumes that for each value of From, there is a
/// corresponding value in To in the same position with the same type.
///
void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
#ifndef NDEBUG
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
assert((!From->hasAnyUseOfValue(i) ||
From->getValueType(i) == To->getValueType(i)) &&
"Cannot use this version of ReplaceAllUsesWith!");
#endif
// Handle the trivial case.
if (From == To)
return;
// Preserve Debug Info. Only do this if there's a use.
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
if (From->hasAnyUseOfValue(i)) {
assert((i < To->getNumValues()) && "Invalid To location");
TransferDbgValues(SDValue(From, i), SDValue(To, i));
}
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(User);
// A user can appear in a use list multiple times, and when this
// happens the uses are usually next to each other in the list.
// To help reduce the number of CSE recomputations, process all
// the uses of this user that we can find this way.
do {
SDUse &Use = UI.getUse();
++UI;
Use.setNode(To);
} while (UI != UE && *UI == User);
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
if (From == getRoot().getNode())
setRoot(SDValue(To, getRoot().getResNo()));
}
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
/// This can cause recursive merging of nodes in the DAG.
///
/// This version can replace From with any result values. To must match the
/// number and types of values returned by From.
void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
if (From->getNumValues() == 1) // Handle the simple case efficiently.
return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
// Preserve Debug Info.
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
TransferDbgValues(SDValue(From, i), *To);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(User);
// A user can appear in a use list multiple times, and when this
// happens the uses are usually next to each other in the list.
// To help reduce the number of CSE recomputations, process all
// the uses of this user that we can find this way.
do {
SDUse &Use = UI.getUse();
const SDValue &ToOp = To[Use.getResNo()];
++UI;
Use.set(ToOp);
} while (UI != UE && *UI == User);
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
if (From == getRoot().getNode())
setRoot(SDValue(To[getRoot().getResNo()]));
}
/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The Deleted
/// vector is handled the same way as for ReplaceAllUsesWith.
void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
// Handle the really simple, really trivial case efficiently.
if (From == To) return;
// Handle the simple, trivial, case efficiently.
if (From.getNode()->getNumValues() == 1) {
ReplaceAllUsesWith(From, To);
return;
}
// Preserve Debug Info.
TransferDbgValues(From, To);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From.getNode()->use_begin(),
UE = From.getNode()->use_end();
RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
bool UserRemovedFromCSEMaps = false;
// A user can appear in a use list multiple times, and when this
// happens the uses are usually next to each other in the list.
// To help reduce the number of CSE recomputations, process all
// the uses of this user that we can find this way.
do {
SDUse &Use = UI.getUse();
// Skip uses of different values from the same node.
if (Use.getResNo() != From.getResNo()) {
++UI;
continue;
}
// If this node hasn't been modified yet, it's still in the CSE maps,
// so remove its old self from the CSE maps.
if (!UserRemovedFromCSEMaps) {
RemoveNodeFromCSEMaps(User);
UserRemovedFromCSEMaps = true;
}
++UI;
Use.set(To);
} while (UI != UE && *UI == User);
// We are iterating over all uses of the From node, so if a use
// doesn't use the specific value, no changes are made.
if (!UserRemovedFromCSEMaps)
continue;
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
if (From == getRoot())
setRoot(To);
}
namespace {
/// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
/// to record information about a use.
struct UseMemo {
SDNode *User;
unsigned Index;
SDUse *Use;
};
/// operator< - Sort Memos by User.
bool operator<(const UseMemo &L, const UseMemo &R) {
return (intptr_t)L.User < (intptr_t)R.User;
}
} // end anonymous namespace
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The same value
/// may appear in both the From and To list. The Deleted vector is
/// handled the same way as for ReplaceAllUsesWith.
void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
const SDValue *To,
unsigned Num){
// Handle the simple, trivial case efficiently.
if (Num == 1)
return ReplaceAllUsesOfValueWith(*From, *To);
TransferDbgValues(*From, *To);
// Read up all the uses and make records of them. This helps
// processing new uses that are introduced during the
// replacement process.
SmallVector<UseMemo, 4> Uses;
for (unsigned i = 0; i != Num; ++i) {
unsigned FromResNo = From[i].getResNo();
SDNode *FromNode = From[i].getNode();
for (SDNode::use_iterator UI = FromNode->use_begin(),
E = FromNode->use_end(); UI != E; ++UI) {
SDUse &Use = UI.getUse();
if (Use.getResNo() == FromResNo) {
UseMemo Memo = { *UI, i, &Use };
Uses.push_back(Memo);
}
}
}
// Sort the uses, so that all the uses from a given User are together.
std::sort(Uses.begin(), Uses.end());
for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
UseIndex != UseIndexEnd; ) {
// We know that this user uses some value of From. If it is the right
// value, update it.
SDNode *User = Uses[UseIndex].User;
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(User);
// The Uses array is sorted, so all the uses for a given User
// are next to each other in the list.
// To help reduce the number of CSE recomputations, process all
// the uses of this user that we can find this way.
do {
unsigned i = Uses[UseIndex].Index;
SDUse &Use = *Uses[UseIndex].Use;
++UseIndex;
Use.set(To[i]);
} while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
}
}
/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
/// based on their topological order. It returns the maximum id and a vector
/// of the SDNodes* in assigned order by reference.
unsigned SelectionDAG::AssignTopologicalOrder() {
unsigned DAGSize = 0;
// SortedPos tracks the progress of the algorithm. Nodes before it are
// sorted, nodes after it are unsorted. When the algorithm completes
// it is at the end of the list.
allnodes_iterator SortedPos = allnodes_begin();
// Visit all the nodes. Move nodes with no operands to the front of
// the list immediately. Annotate nodes that do have operands with their
// operand count. Before we do this, the Node Id fields of the nodes
// may contain arbitrary values. After, the Node Id fields for nodes
// before SortedPos will contain the topological sort index, and the
// Node Id fields for nodes At SortedPos and after will contain the
// count of outstanding operands.
for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
SDNode *N = &*I++;
checkForCycles(N, this);
unsigned Degree = N->getNumOperands();
if (Degree == 0) {
// A node with no uses, add it to the result array immediately.
N->setNodeId(DAGSize++);
allnodes_iterator Q(N);
if (Q != SortedPos)
SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
assert(SortedPos != AllNodes.end() && "Overran node list");
++SortedPos;
} else {
// Temporarily use the Node Id as scratch space for the degree count.
N->setNodeId(Degree);
}
}
// Visit all the nodes. As we iterate, move nodes into sorted order,
// such that by the time the end is reached all nodes will be sorted.
for (SDNode &Node : allnodes()) {
SDNode *N = &Node;
checkForCycles(N, this);
// N is in sorted position, so all its uses have one less operand
// that needs to be sorted.
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
UI != UE; ++UI) {
SDNode *P = *UI;
unsigned Degree = P->getNodeId();
assert(Degree != 0 && "Invalid node degree");
--Degree;
if (Degree == 0) {
// All of P's operands are sorted, so P may sorted now.
P->setNodeId(DAGSize++);
if (P->getIterator() != SortedPos)
SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
assert(SortedPos != AllNodes.end() && "Overran node list");
++SortedPos;
} else {
// Update P's outstanding operand count.
P->setNodeId(Degree);
}
}
if (Node.getIterator() == SortedPos) {
#ifndef NDEBUG
allnodes_iterator I(N);
SDNode *S = &*++I;
dbgs() << "Overran sorted position:\n";
S->dumprFull(this); dbgs() << "\n";
dbgs() << "Checking if this is due to cycles\n";
checkForCycles(this, true);
#endif
llvm_unreachable(nullptr);
}
}
assert(SortedPos == AllNodes.end() &&
"Topological sort incomplete!");
assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
"First node in topological sort is not the entry token!");
assert(AllNodes.front().getNodeId() == 0 &&
"First node in topological sort has non-zero id!");
assert(AllNodes.front().getNumOperands() == 0 &&
"First node in topological sort has operands!");
assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
"Last node in topologic sort has unexpected id!");
assert(AllNodes.back().use_empty() &&
"Last node in topologic sort has users!");
assert(DAGSize == allnodes_size() && "Node count mismatch!");
return DAGSize;
}
/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
/// value is produced by SD.
void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
if (SD) {
assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue());
SD->setHasDebugValue(true);
}
DbgInfo->add(DB, SD, isParameter);
}
/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes.
void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
if (From == To || !From.getNode()->getHasDebugValue())
return;
SDNode *FromNode = From.getNode();
SDNode *ToNode = To.getNode();
ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
SmallVector<SDDbgValue *, 2> ClonedDVs;
for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
I != E; ++I) {
SDDbgValue *Dbg = *I;
// Only add Dbgvalues attached to same ResNo.
if (Dbg->getKind() == SDDbgValue::SDNODE &&
Dbg->getSDNode() == From.getNode() &&
Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) {
assert(FromNode != ToNode &&
"Should not transfer Debug Values intranode");
SDDbgValue *Clone =
getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
Dbg->getDebugLoc(), Dbg->getOrder());
ClonedDVs.push_back(Clone);
Dbg->setIsInvalidated();
}
}
for (SDDbgValue *I : ClonedDVs)
AddDbgValue(I, ToNode, false);
}
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
SDValue NewMemOp) {
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
// The new memory operation must have the same position as the old load in
// terms of memory dependency. Create a TokenFactor for the old load and new
// memory operation and update uses of the old load's output chain to use that
// TokenFactor.
SDValue OldChain = SDValue(OldLoad, 1);
SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
if (!OldLoad->hasAnyUseOfValue(1))
return NewChain;
SDValue TokenFactor =
getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain);
ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
return TokenFactor;
}
//===----------------------------------------------------------------------===//
// SDNode Class
//===----------------------------------------------------------------------===//
bool llvm::isNullConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
return Const != nullptr && Const->isNullValue();
}
bool llvm::isNullFPConstant(SDValue V) {
ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
return Const != nullptr && Const->isZero() && !Const->isNegative();
}
bool llvm::isAllOnesConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
return Const != nullptr && Const->isAllOnesValue();
}
bool llvm::isOneConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
return Const != nullptr && Const->isOne();
}
bool llvm::isBitwiseNot(SDValue V) {
return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1));
}
ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
// BuildVectors can truncate their operands. Ignore that case here.
// FIXME: We blindly ignore splats which include undef which is overly
// pessimistic.
if (CN && UndefElements.none() &&
CN->getValueType(0) == N.getValueType().getScalarType())
return CN;
}
return nullptr;
}
ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
if (CN && UndefElements.none())
return CN;
}
return nullptr;
}
HandleSDNode::~HandleSDNode() {
DropOperands();
}
GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
const DebugLoc &DL,
const GlobalValue *GA, EVT VT,
int64_t o, unsigned char TF)
: SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
TheGlobal = GA;
}
AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl,
EVT VT, unsigned SrcAS,
unsigned DestAS)
: SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)),
SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {}
MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
SDVTList VTs, EVT memvt, MachineMemOperand *mmo)
: SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
MemSDNodeBits.IsVolatile = MMO->isVolatile();
MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal();
MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable();
MemSDNodeBits.IsInvariant = MMO->isInvariant();
// We check here that the size of the memory operand fits within the size of
// the MMO. This is because the MMO might indicate only a possible address
// range instead of specifying the affected memory addresses precisely.
assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
}
/// Profile - Gather unique data for the node.
///
void SDNode::Profile(FoldingSetNodeID &ID) const {
AddNodeIDNode(ID, this);
}
namespace {
struct EVTArray {
std::vector<EVT> VTs;
EVTArray() {
VTs.reserve(MVT::LAST_VALUETYPE);
for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
VTs.push_back(MVT((MVT::SimpleValueType)i));
}
};
} // end anonymous namespace
static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs;
static ManagedStatic<EVTArray> SimpleVTArray;
static ManagedStatic<sys::SmartMutex<true>> VTMutex;
/// getValueTypeList - Return a pointer to the specified value type.
///
const EVT *SDNode::getValueTypeList(EVT VT) {
if (VT.isExtended()) {
sys::SmartScopedLock<true> Lock(*VTMutex);
return &(*EVTs->insert(VT).first);
} else {
assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
"Value type out of range!");
return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
}
}
/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
/// indicated value. This method ignores uses of other values defined by this
/// operation.
bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
assert(Value < getNumValues() && "Bad value!");
// TODO: Only iterate over uses of a given value of the node
for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
if (UI.getUse().getResNo() == Value) {
if (NUses == 0)
return false;
--NUses;
}
}
// Found exactly the right number of uses?
return NUses == 0;
}
/// hasAnyUseOfValue - Return true if there are any use of the indicated
/// value. This method ignores uses of other values defined by this operation.
bool SDNode::hasAnyUseOfValue(unsigned Value) const {
assert(Value < getNumValues() && "Bad value!");
for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
if (UI.getUse().getResNo() == Value)
return true;
return false;
}
/// isOnlyUserOf - Return true if this node is the only use of N.
bool SDNode::isOnlyUserOf(const SDNode *N) const {
bool Seen = false;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDNode *User = *I;
if (User == this)
Seen = true;
else
return false;
}
return Seen;
}
/// Return true if the only users of N are contained in Nodes.
bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
bool Seen = false;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDNode *User = *I;
if (llvm::any_of(Nodes,
[&User](const SDNode *Node) { return User == Node; }))
Seen = true;
else
return false;
}
return Seen;
}
/// isOperand - Return true if this node is an operand of N.
bool SDValue::isOperandOf(const SDNode *N) const {
for (const SDValue &Op : N->op_values())
if (*this == Op)
return true;
return false;
}
bool SDNode::isOperandOf(const SDNode *N) const {
for (const SDValue &Op : N->op_values())
if (this == Op.getNode())
return true;
return false;
}
/// reachesChainWithoutSideEffects - Return true if this operand (which must
/// be a chain) reaches the specified operand without crossing any
/// side-effecting instructions on any chain path. In practice, this looks
/// through token factors and non-volatile loads. In order to remain efficient,
/// this only looks a couple of nodes in, it does not do an exhaustive search.
///
/// Note that we only need to examine chains when we're searching for
/// side-effects; SelectionDAG requires that all side-effects are represented
/// by chains, even if another operand would force a specific ordering. This
/// constraint is necessary to allow transformations like splitting loads.
bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
unsigned Depth) const {
if (*this == Dest) return true;
// Don't search too deeply, we just want to be able to see through
// TokenFactor's etc.
if (Depth == 0) return false;
// If this is a token factor, all inputs to the TF happen in parallel.
if (getOpcode() == ISD::TokenFactor) {
// First, try a shallow search.
if (is_contained((*this)->ops(), Dest)) {
// We found the chain we want as an operand of this TokenFactor.
// Essentially, we reach the chain without side-effects if we could
// serialize the TokenFactor into a simple chain of operations with
// Dest as the last operation. This is automatically true if the
// chain has one use: there are no other ordering constraints.
// If the chain has more than one use, we give up: some other
// use of Dest might force a side-effect between Dest and the current
// node.
if (Dest.hasOneUse())
return true;
}
// Next, try a deep search: check whether every operand of the TokenFactor
// reaches Dest.
return llvm::all_of((*this)->ops(), [=](SDValue Op) {
return Op.reachesChainWithoutSideEffects(Dest, Depth - 1);
});
}
// Loads don't have side effects, look through them.
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
if (!Ld->isVolatile())
return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
}
return false;
}
bool SDNode::hasPredecessor(const SDNode *N) const {
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
Worklist.push_back(this);
return hasPredecessorHelper(N, Visited, Worklist);
}
void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
this->Flags.intersectWith(Flags);
}
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
EVT VT = N->getValueType(0);
unsigned NE = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
SmallVector<SDValue, 8> Scalars;
SmallVector<SDValue, 4> Operands(N->getNumOperands());
// If ResNE is 0, fully unroll the vector op.
if (ResNE == 0)
ResNE = NE;
else if (NE > ResNE)
NE = ResNE;
unsigned i;
for (i= 0; i != NE; ++i) {
for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
SDValue Operand = N->getOperand(j);
EVT OperandVT = Operand.getValueType();
if (OperandVT.isVector()) {
// A vector operand; extract a single element.
EVT OperandEltVT = OperandVT.getVectorElementType();
Operands[j] =
getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout())));
} else {
// A scalar operand; just use it as is.
Operands[j] = Operand;
}
}
switch (N->getOpcode()) {
default: {
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
N->getFlags()));
break;
}
case ISD::VSELECT:
Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
getShiftAmountOperand(Operands[0].getValueType(),
Operands[1])));
break;
case ISD::SIGN_EXTEND_INREG:
case ISD::FP_ROUND_INREG: {
EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
Operands[0],
getValueType(ExtVT)));
}
}
}
for (; i < ResNE; ++i)
Scalars.push_back(getUNDEF(EltVT));
EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE);
return getBuildVector(VecVT, dl, Scalars);
}
bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
LoadSDNode *Base,
unsigned Bytes,
int Dist) const {
if (LD->isVolatile() || Base->isVolatile())
return false;
if (LD->isIndexed() || Base->isIndexed())
return false;
if (LD->getChain() != Base->getChain())
return false;
EVT VT = LD->getValueType(0);
if (VT.getSizeInBits() / 8 != Bytes)
return false;
SDValue Loc = LD->getOperand(1);
SDValue BaseLoc = Base->getOperand(1);
auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
auto LocDecomp = BaseIndexOffset::match(Loc, *this);
int64_t Offset = 0;
if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
return (Dist * Bytes == Offset);
return false;
}
/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
/// it cannot be inferred.
unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
// If this is a GlobalAddress + cst, return the alignment.
const GlobalValue *GV;
int64_t GVOffset = 0;
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
KnownBits Known(PtrWidth);
llvm::computeKnownBits(GV, Known, getDataLayout());
unsigned AlignBits = Known.countMinTrailingZeros();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
if (Align)
return MinAlign(Align, GVOffset);
}
// If this is a direct reference to a stack slot, use information about the
// stack slot's alignment.
int FrameIdx = 1 << 31;
int64_t FrameOffset = 0;
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
FrameIdx = FI->getIndex();
} else if (isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
// Handle FI+Cst
FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
FrameOffset = Ptr.getConstantOperandVal(1);
}
if (FrameIdx != (1 << 31)) {
const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);
return FIInfoAlign;
}
return 0;
}
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
/// which is split (or expanded) into two not necessarily identical pieces.
std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
// Currently all types are split in half.
EVT LoVT, HiVT;
if (!VT.isVector())
LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT);
else
LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext());
return std::make_pair(LoVT, HiVT);
}
/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the
/// low/high part.
std::pair<SDValue, SDValue>
SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
const EVT &HiVT) {
assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <=
N.getValueType().getVectorNumElements() &&
"More vector elements requested than available!");
SDValue Lo, Hi;
Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
getConstant(LoVT.getVectorNumElements(), DL,
TLI->getVectorIdxTy(getDataLayout())));
return std::make_pair(Lo, Hi);
}
void SelectionDAG::ExtractVectorElements(SDValue Op,
SmallVectorImpl<SDValue> &Args,
unsigned Start, unsigned Count) {
EVT VT = Op.getValueType();
if (Count == 0)
Count = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
EVT IdxTy = TLI->getVectorIdxTy(getDataLayout());
SDLoc SL(Op);
for (unsigned i = Start, e = Start + Count; i != e; ++i) {
Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
Op, getConstant(i, SL, IdxTy)));
}
}
// getAddressSpace - Return the address space this GlobalAddress belongs to.
unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
}
Type *ConstantPoolSDNode::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
return Val.ConstVal->getType();
}
bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
unsigned &SplatBitSize,
bool &HasAnyUndefs,
unsigned MinSplatBits,
bool IsBigEndian) const {
EVT VT = getValueType(0);
assert(VT.isVector() && "Expected a vector type");
unsigned VecWidth = VT.getSizeInBits();
if (MinSplatBits > VecWidth)
return false;
// FIXME: The widths are based on this node's type, but build vectors can
// truncate their operands.
SplatValue = APInt(VecWidth, 0);
SplatUndef = APInt(VecWidth, 0);
// Get the bits. Bits with undefined values (when the corresponding element
// of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
// in SplatValue. If any of the values are not constant, give up and return
// false.
unsigned int NumOps = getNumOperands();
assert(NumOps > 0 && "isConstantSplat has 0-size build vector");
unsigned EltWidth = VT.getScalarSizeInBits();
for (unsigned j = 0; j < NumOps; ++j) {
unsigned i = IsBigEndian ? NumOps - 1 - j : j;
SDValue OpVal = getOperand(i);
unsigned BitPos = j * EltWidth;
if (OpVal.isUndef())
SplatUndef.setBits(BitPos, BitPos + EltWidth);
else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal))
SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal))
SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos);
else
return false;
}
// The build_vector is all constants or undefs. Find the smallest element
// size that splats the vector.
HasAnyUndefs = (SplatUndef != 0);
// FIXME: This does not work for vectors with elements less than 8 bits.
while (VecWidth > 8) {
unsigned HalfSize = VecWidth / 2;
APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
APInt LowValue = SplatValue.trunc(HalfSize);
APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
APInt LowUndef = SplatUndef.trunc(HalfSize);
// If the two halves do not match (ignoring undef bits), stop here.
if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
MinSplatBits > HalfSize)
break;
SplatValue = HighValue | LowValue;
SplatUndef = HighUndef & LowUndef;
VecWidth = HalfSize;
}
SplatBitSize = VecWidth;
return true;
}
SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
if (UndefElements) {
UndefElements->clear();
UndefElements->resize(getNumOperands());
}
SDValue Splatted;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
SDValue Op = getOperand(i);
if (Op.isUndef()) {
if (UndefElements)
(*UndefElements)[i] = true;
} else if (!Splatted) {
Splatted = Op;
} else if (Splatted != Op) {
return SDValue();
}
}
if (!Splatted) {
assert(getOperand(0).isUndef() &&
"Can only have a splat without a constant for all undefs.");
return getOperand(0);
}
return Splatted;
}
ConstantSDNode *
BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));
}
ConstantFPSDNode *
BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
}
int32_t
BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
uint32_t BitWidth) const {
if (ConstantFPSDNode *CN =
dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) {
bool IsExact;
APSInt IntVal(BitWidth);
const APFloat &APF = CN->getValueAPF();
if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
APFloat::opOK ||
!IsExact)
return -1;
return IntVal.exactLogBase2();
}
return -1;
}
bool BuildVectorSDNode::isConstant() const {
for (const SDValue &Op : op_values()) {
unsigned Opc = Op.getOpcode();
if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP)
return false;
}
return true;
}
bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
// Find the first non-undef value in the shuffle mask.
unsigned i, e;
for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
/* search */;
assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
// Make sure all remaining elements are either undef or the same as the first
// non-undef value.
for (int Idx = Mask[i]; i != e; ++i)
if (Mask[i] >= 0 && Mask[i] != Idx)
return false;
return true;
}
// \brief Returns the SDNode if it is a constant integer BuildVector
// or constant integer.
SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
if (isa<ConstantSDNode>(N))
return N.getNode();
if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
return N.getNode();
// Treat a GlobalAddress supporting constant offset folding as a
// constant integer.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N))
if (GA->getOpcode() == ISD::GlobalAddress &&
TLI->isOffsetFoldingLegal(GA))
return GA;
return nullptr;
}
SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
if (isa<ConstantFPSDNode>(N))
return N.getNode();
if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
return N.getNode();
return nullptr;
}
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
SmallPtrSetImpl<const SDNode*> &Checked,
const llvm::SelectionDAG *DAG) {
// If this node has already been checked, don't check it again.
if (Checked.count(N))
return;
// If a node has already been visited on this depth-first walk, reject it as
// a cycle.
if (!Visited.insert(N).second) {
errs() << "Detected cycle in SelectionDAG\n";
dbgs() << "Offending node:\n";
N->dumprFull(DAG); dbgs() << "\n";
abort();
}
for (const SDValue &Op : N->op_values())
checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG);
Checked.insert(N);
Visited.erase(N);
}
#endif
void llvm::checkForCycles(const llvm::SDNode *N,
const llvm::SelectionDAG *DAG,
bool force) {
#ifndef NDEBUG
bool check = force;
#ifdef EXPENSIVE_CHECKS
check = true;
#endif // EXPENSIVE_CHECKS
if (check) {
assert(N && "Checking nonexistent SDNode");
SmallPtrSet<const SDNode*, 32> visited;
SmallPtrSet<const SDNode*, 32> checked;
checkForCyclesHelper(N, visited, checked, DAG);
}
#endif // !NDEBUG
}
void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) {
checkForCycles(DAG->getRoot().getNode(), DAG, force);
}
Index: head/contrib/llvm/lib/IR/AutoUpgrade.cpp
===================================================================
--- head/contrib/llvm/lib/IR/AutoUpgrade.cpp (revision 322854)
+++ head/contrib/llvm/lib/IR/AutoUpgrade.cpp (revision 322855)
@@ -1,2332 +1,2350 @@
//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the auto-upgrade helper functions.
// This is where deprecated IR intrinsics and other IR features are updated to
// current specifications.
//
//===----------------------------------------------------------------------===//
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Regex.h"
#include <cstring>
using namespace llvm;
static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
// changed their type from v4f32 to v2i64.
static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
Function *&NewFn) {
// Check whether this is an old version of the function, which received
// v4f32 arguments.
Type *Arg0Type = F->getFunctionType()->getParamType(0);
if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
return false;
// Yes, it's old, replace it with new version.
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
// arguments have changed their type from i32 to i8.
static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
Function *&NewFn) {
// Check that the last argument is an i32.
Type *LastArgType = F->getFunctionType()->getParamType(
F->getFunctionType()->getNumParams() - 1);
if (!LastArgType->isIntegerTy(32))
return false;
// Move this function aside and map down.
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
// All of the intrinsics matches below should be marked with which llvm
// version started autoupgrading them. At some point in the future we would
// like to use this information to remove upgrade code for some older
// intrinsics. It is currently undecided how we will determine that future
// point.
if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
Name.startswith("sse2.pcmpgt.") || // Added in 3.1
Name.startswith("avx2.pcmpeq.") || // Added in 3.1
Name.startswith("avx2.pcmpgt.") || // Added in 3.1
Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
Name == "sse.add.ss" || // Added in 4.0
Name == "sse2.add.sd" || // Added in 4.0
Name == "sse.sub.ss" || // Added in 4.0
Name == "sse2.sub.sd" || // Added in 4.0
Name == "sse.mul.ss" || // Added in 4.0
Name == "sse2.mul.sd" || // Added in 4.0
Name == "sse.div.ss" || // Added in 4.0
Name == "sse2.div.sd" || // Added in 4.0
Name == "sse41.pmaxsb" || // Added in 3.9
Name == "sse2.pmaxs.w" || // Added in 3.9
Name == "sse41.pmaxsd" || // Added in 3.9
Name == "sse2.pmaxu.b" || // Added in 3.9
Name == "sse41.pmaxuw" || // Added in 3.9
Name == "sse41.pmaxud" || // Added in 3.9
Name == "sse41.pminsb" || // Added in 3.9
Name == "sse2.pmins.w" || // Added in 3.9
Name == "sse41.pminsd" || // Added in 3.9
Name == "sse2.pminu.b" || // Added in 3.9
Name == "sse41.pminuw" || // Added in 3.9
Name == "sse41.pminud" || // Added in 3.9
Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
Name.startswith("avx2.pmax") || // Added in 3.9
Name.startswith("avx2.pmin") || // Added in 3.9
Name.startswith("avx512.mask.pmax") || // Added in 4.0
Name.startswith("avx512.mask.pmin") || // Added in 4.0
Name.startswith("avx2.vbroadcast") || // Added in 3.8
Name.startswith("avx2.pbroadcast") || // Added in 3.8
Name.startswith("avx.vpermil.") || // Added in 3.1
Name.startswith("sse2.pshuf") || // Added in 3.9
Name.startswith("avx512.pbroadcast") || // Added in 3.9
Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
Name.startswith("avx512.mask.movddup") || // Added in 3.9
Name.startswith("avx512.mask.movshdup") || // Added in 3.9
Name.startswith("avx512.mask.movsldup") || // Added in 3.9
Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
Name.startswith("avx512.mask.punpckl") || // Added in 3.9
Name.startswith("avx512.mask.punpckh") || // Added in 3.9
Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
Name.startswith("avx512.mask.pand.") || // Added in 3.9
Name.startswith("avx512.mask.pandn.") || // Added in 3.9
Name.startswith("avx512.mask.por.") || // Added in 3.9
Name.startswith("avx512.mask.pxor.") || // Added in 3.9
Name.startswith("avx512.mask.and.") || // Added in 3.9
Name.startswith("avx512.mask.andn.") || // Added in 3.9
Name.startswith("avx512.mask.or.") || // Added in 3.9
Name.startswith("avx512.mask.xor.") || // Added in 3.9
Name.startswith("avx512.mask.padd.") || // Added in 4.0
Name.startswith("avx512.mask.psub.") || // Added in 4.0
Name.startswith("avx512.mask.pmull.") || // Added in 4.0
Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
Name == "avx512.mask.add.pd.128" || // Added in 4.0
Name == "avx512.mask.add.pd.256" || // Added in 4.0
Name == "avx512.mask.add.ps.128" || // Added in 4.0
Name == "avx512.mask.add.ps.256" || // Added in 4.0
Name == "avx512.mask.div.pd.128" || // Added in 4.0
Name == "avx512.mask.div.pd.256" || // Added in 4.0
Name == "avx512.mask.div.ps.128" || // Added in 4.0
Name == "avx512.mask.div.ps.256" || // Added in 4.0
Name == "avx512.mask.mul.pd.128" || // Added in 4.0
Name == "avx512.mask.mul.pd.256" || // Added in 4.0
Name == "avx512.mask.mul.ps.128" || // Added in 4.0
Name == "avx512.mask.mul.ps.256" || // Added in 4.0
Name == "avx512.mask.sub.pd.128" || // Added in 4.0
Name == "avx512.mask.sub.pd.256" || // Added in 4.0
Name == "avx512.mask.sub.ps.128" || // Added in 4.0
Name == "avx512.mask.sub.ps.256" || // Added in 4.0
Name == "avx512.mask.max.pd.128" || // Added in 5.0
Name == "avx512.mask.max.pd.256" || // Added in 5.0
Name == "avx512.mask.max.ps.128" || // Added in 5.0
Name == "avx512.mask.max.ps.256" || // Added in 5.0
Name == "avx512.mask.min.pd.128" || // Added in 5.0
Name == "avx512.mask.min.pd.256" || // Added in 5.0
Name == "avx512.mask.min.ps.128" || // Added in 5.0
Name == "avx512.mask.min.ps.256" || // Added in 5.0
Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
Name.startswith("avx512.mask.psll.d") || // Added in 4.0
Name.startswith("avx512.mask.psll.q") || // Added in 4.0
Name.startswith("avx512.mask.psll.w") || // Added in 4.0
Name.startswith("avx512.mask.psra.d") || // Added in 4.0
Name.startswith("avx512.mask.psra.q") || // Added in 4.0
Name.startswith("avx512.mask.psra.w") || // Added in 4.0
Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
Name.startswith("avx512.mask.pslli") || // Added in 4.0
Name.startswith("avx512.mask.psrai") || // Added in 4.0
Name.startswith("avx512.mask.psrli") || // Added in 4.0
Name.startswith("avx512.mask.psllv") || // Added in 4.0
Name.startswith("avx512.mask.psrav") || // Added in 4.0
Name.startswith("avx512.mask.psrlv") || // Added in 4.0
Name.startswith("sse41.pmovsx") || // Added in 3.8
Name.startswith("sse41.pmovzx") || // Added in 3.9
Name.startswith("avx2.pmovsx") || // Added in 3.9
Name.startswith("avx2.pmovzx") || // Added in 3.9
Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
Name == "sse2.cvtdq2pd" || // Added in 3.9
Name == "sse2.cvtps2pd" || // Added in 3.9
Name == "avx.cvtdq2.pd.256" || // Added in 3.9
Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
Name.startswith("avx.vinsertf128.") || // Added in 3.7
Name == "avx2.vinserti128" || // Added in 3.7
Name.startswith("avx512.mask.insert") || // Added in 4.0
Name.startswith("avx.vextractf128.") || // Added in 3.7
Name == "avx2.vextracti128" || // Added in 3.7
Name.startswith("avx512.mask.vextract") || // Added in 4.0
Name.startswith("sse4a.movnt.") || // Added in 3.9
Name.startswith("avx.movnt.") || // Added in 3.2
Name.startswith("avx512.storent.") || // Added in 3.9
Name == "sse41.movntdqa" || // Added in 5.0
Name == "avx2.movntdqa" || // Added in 5.0
Name == "avx512.movntdqa" || // Added in 5.0
Name == "sse2.storel.dq" || // Added in 3.9
Name.startswith("sse.storeu.") || // Added in 3.9
Name.startswith("sse2.storeu.") || // Added in 3.9
Name.startswith("avx.storeu.") || // Added in 3.9
Name.startswith("avx512.mask.storeu.") || // Added in 3.9
Name.startswith("avx512.mask.store.p") || // Added in 3.9
Name.startswith("avx512.mask.store.b.") || // Added in 3.9
Name.startswith("avx512.mask.store.w.") || // Added in 3.9
Name.startswith("avx512.mask.store.d.") || // Added in 3.9
Name.startswith("avx512.mask.store.q.") || // Added in 3.9
Name.startswith("avx512.mask.loadu.") || // Added in 3.9
Name.startswith("avx512.mask.load.") || // Added in 3.9
Name == "sse42.crc32.64.8" || // Added in 3.4
Name.startswith("avx.vbroadcast.s") || // Added in 3.5
Name.startswith("avx512.mask.palignr.") || // Added in 3.9
Name.startswith("avx512.mask.valign.") || // Added in 4.0
Name.startswith("sse2.psll.dq") || // Added in 3.7
Name.startswith("sse2.psrl.dq") || // Added in 3.7
Name.startswith("avx2.psll.dq") || // Added in 3.7
Name.startswith("avx2.psrl.dq") || // Added in 3.7
Name.startswith("avx512.psll.dq") || // Added in 3.9
Name.startswith("avx512.psrl.dq") || // Added in 3.9
Name == "sse41.pblendw" || // Added in 3.7
Name.startswith("sse41.blendp") || // Added in 3.7
Name.startswith("avx.blend.p") || // Added in 3.7
Name == "avx2.pblendw" || // Added in 3.7
Name.startswith("avx2.pblendd.") || // Added in 3.7
Name.startswith("avx.vbroadcastf128") || // Added in 4.0
Name == "avx2.vbroadcasti128" || // Added in 3.7
Name == "xop.vpcmov" || // Added in 3.8
Name == "xop.vpcmov.256" || // Added in 5.0
Name.startswith("avx512.mask.move.s") || // Added in 4.0
Name.startswith("avx512.cvtmask2") || // Added in 5.0
(Name.startswith("xop.vpcom") && // Added in 3.2
F->arg_size() == 2))
return true;
return false;
}
static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
Function *&NewFn) {
// Only handle intrinsics that start with "x86.".
if (!Name.startswith("x86."))
return false;
// Remove "x86." prefix.
Name = Name.substr(4);
if (ShouldUpgradeX86Intrinsic(F, Name)) {
NewFn = nullptr;
return true;
}
// SSE4.1 ptest functions may have an old signature.
if (Name.startswith("sse41.ptest")) { // Added in 3.2
if (Name.substr(11) == "c")
return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
if (Name.substr(11) == "z")
return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
if (Name.substr(11) == "nzc")
return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
}
// Several blend and other instructions with masks used the wrong number of
// bits.
if (Name == "sse41.insertps") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
NewFn);
if (Name == "sse41.dppd") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
NewFn);
if (Name == "sse41.dpps") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
NewFn);
if (Name == "sse41.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
NewFn);
if (Name == "avx.dp.ps.256") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
NewFn);
if (Name == "avx2.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
// frcz.ss/sd may need to have an argument dropped. Added in 3.2
if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_xop_vfrcz_ss);
return true;
}
if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_xop_vfrcz_sd);
return true;
}
// Upgrade any XOP PERMIL2 index operand still using a float/double vector.
if (Name.startswith("xop.vpermil2")) { // Added in 3.9
auto Idx = F->getFunctionType()->getParamType(2);
if (Idx->isFPOrFPVectorTy()) {
rename(F);
unsigned IdxSize = Idx->getPrimitiveSizeInBits();
unsigned EltSize = Idx->getScalarSizeInBits();
Intrinsic::ID Permil2ID;
if (EltSize == 64 && IdxSize == 128)
Permil2ID = Intrinsic::x86_xop_vpermil2pd;
else if (EltSize == 32 && IdxSize == 128)
Permil2ID = Intrinsic::x86_xop_vpermil2ps;
else if (EltSize == 64 && IdxSize == 256)
Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
else
Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
return true;
}
}
return false;
}
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
// Quickly eliminate it, if it's not a candidate.
StringRef Name = F->getName();
if (Name.size() <= 8 || !Name.startswith("llvm."))
return false;
Name = Name.substr(5); // Strip off "llvm."
switch (Name[0]) {
default: break;
case 'a': {
if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("arm.neon.vclz")) {
Type* args[2] = {
F->arg_begin()->getType(),
Type::getInt1Ty(F->getContext())
};
// Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
// the end of the name. Change name from llvm.arm.neon.vclz.* to
// llvm.ctlz.*
FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
NewFn = Function::Create(fType, F->getLinkage(),
"llvm.ctlz." + Name.substr(14), F->getParent());
return true;
}
if (Name.startswith("arm.neon.vcnt")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
F->arg_begin()->getType());
return true;
}
Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vldRegex.match(Name)) {
auto fArgs = F->getFunctionType()->params();
SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
// Can't use Intrinsic::getDeclaration here as the return types might
// then only be structurally equal.
FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
NewFn = Function::Create(fType, F->getLinkage(),
"llvm." + Name + ".p0i8", F->getParent());
return true;
}
Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vstRegex.match(Name)) {
static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
Intrinsic::arm_neon_vst2,
Intrinsic::arm_neon_vst3,
Intrinsic::arm_neon_vst4};
static const Intrinsic::ID StoreLaneInts[] = {
Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
Intrinsic::arm_neon_vst4lane
};
auto fArgs = F->getFunctionType()->params();
Type *Tys[] = {fArgs[0], fArgs[1]};
if (Name.find("lane") == StringRef::npos)
NewFn = Intrinsic::getDeclaration(F->getParent(),
StoreInts[fArgs.size() - 3], Tys);
else
NewFn = Intrinsic::getDeclaration(F->getParent(),
StoreLaneInts[fArgs.size() - 5], Tys);
return true;
}
if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
return true;
}
break;
}
case 'c': {
if (Name.startswith("ctlz.") && F->arg_size() == 1) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("cttz.") && F->arg_size() == 1) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
F->arg_begin()->getType());
return true;
}
break;
}
case 'i':
case 'l': {
bool IsLifetimeStart = Name.startswith("lifetime.start");
if (IsLifetimeStart || Name.startswith("invariant.start")) {
Intrinsic::ID ID = IsLifetimeStart ?
Intrinsic::lifetime_start : Intrinsic::invariant_start;
auto Args = F->getFunctionType()->params();
Type* ObjectPtr[1] = {Args[1]};
if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
return true;
}
}
bool IsLifetimeEnd = Name.startswith("lifetime.end");
if (IsLifetimeEnd || Name.startswith("invariant.end")) {
Intrinsic::ID ID = IsLifetimeEnd ?
Intrinsic::lifetime_end : Intrinsic::invariant_end;
auto Args = F->getFunctionType()->params();
Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
return true;
}
}
break;
}
case 'm': {
if (Name.startswith("masked.load.")) {
Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_load,
Tys);
return true;
}
}
if (Name.startswith("masked.store.")) {
auto Args = F->getFunctionType()->params();
Type *Tys[] = { Args[0], Args[1] };
if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_store,
Tys);
return true;
}
}
// Renaming gather/scatter intrinsics with no address space overloading
// to the new overload which includes an address space
if (Name.startswith("masked.gather.")) {
Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_gather, Tys);
return true;
}
}
if (Name.startswith("masked.scatter.")) {
auto Args = F->getFunctionType()->params();
Type *Tys[] = {Args[0], Args[1]};
if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_scatter, Tys);
return true;
}
}
break;
}
case 'n': {
if (Name.startswith("nvvm.")) {
Name = Name.substr(5);
// The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
.Cases("brev32", "brev64", Intrinsic::bitreverse)
.Case("clz.i", Intrinsic::ctlz)
.Case("popc.i", Intrinsic::ctpop)
.Default(Intrinsic::not_intrinsic);
if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
{F->getReturnType()});
return true;
}
// The following nvvm intrinsics correspond exactly to an LLVM idiom, but
// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
//
// TODO: We could add lohi.i2d.
bool Expand = StringSwitch<bool>(Name)
.Cases("abs.i", "abs.ll", true)
.Cases("clz.ll", "popc.ll", "h2f", true)
.Cases("max.i", "max.ll", "max.ui", "max.ull", true)
.Cases("min.i", "min.ll", "min.ui", "min.ull", true)
.Default(false);
if (Expand) {
NewFn = nullptr;
return true;
}
}
break;
}
case 'o':
// We only need to change the name to match the mangling including the
// address space.
if (Name.startswith("objectsize.")) {
Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
if (F->arg_size() == 2 ||
F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
Tys);
return true;
}
}
break;
case 's':
if (Name == "stackprotectorcheck") {
NewFn = nullptr;
return true;
}
break;
case 'x':
if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
}
// Remangle our intrinsic since we upgrade the mangling
auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
if (Result != None) {
NewFn = Result.getValue();
return true;
}
// This may not belong here. This function is effectively being overloaded
// to both detect an intrinsic which needs upgrading, and to provide the
// upgraded form of the intrinsic. We should perhaps have two separate
// functions for this.
return false;
}
bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
NewFn = nullptr;
bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
assert(F != NewFn && "Intrinsic function upgraded to the same function");
// Upgrade intrinsic attributes. This does not change the function.
if (NewFn)
F = NewFn;
if (Intrinsic::ID id = F->getIntrinsicID())
F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
return Upgraded;
}
bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
// Nothing to do yet.
return false;
}
// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
// to byte shuffles.
static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
Value *Op, unsigned Shift) {
Type *ResultTy = Op->getType();
unsigned NumElts = ResultTy->getVectorNumElements() * 8;
// Bitcast from a 64-bit element type to a byte element type.
Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
Op = Builder.CreateBitCast(Op, VecTy, "cast");
// We'll be shuffling in zeroes.
Value *Res = Constant::getNullValue(VecTy);
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
// we'll just return the zero vector.
if (Shift < 16) {
uint32_t Idxs[64];
// 256/512-bit version is split into 2/4 16-byte lanes.
for (unsigned l = 0; l != NumElts; l += 16)
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = NumElts + i - Shift;
if (Idx < NumElts)
Idx -= NumElts - 16; // end of lane, switch operand.
Idxs[l + i] = Idx + l;
}
Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
}
// Bitcast back to a 64-bit element type.
return Builder.CreateBitCast(Res, ResultTy, "cast");
}
// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
// to byte shuffles.
static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
unsigned Shift) {
Type *ResultTy = Op->getType();
unsigned NumElts = ResultTy->getVectorNumElements() * 8;
// Bitcast from a 64-bit element type to a byte element type.
Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
Op = Builder.CreateBitCast(Op, VecTy, "cast");
// We'll be shuffling in zeroes.
Value *Res = Constant::getNullValue(VecTy);
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
// we'll just return the zero vector.
if (Shift < 16) {
uint32_t Idxs[64];
// 256/512-bit version is split into 2/4 16-byte lanes.
for (unsigned l = 0; l != NumElts; l += 16)
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = i + Shift;
if (Idx >= 16)
Idx += NumElts - 16; // end of lane, switch operand.
Idxs[l + i] = Idx + l;
}
Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
}
// Bitcast back to a 64-bit element type.
return Builder.CreateBitCast(Res, ResultTy, "cast");
}
static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
unsigned NumElts) {
llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
cast<IntegerType>(Mask->getType())->getBitWidth());
Mask = Builder.CreateBitCast(Mask, MaskTy);
// If we have less than 8 elements, then the starting mask was an i8 and
// we need to extract down to the right number of elements.
if (NumElts < 8) {
uint32_t Indices[4];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
Mask = Builder.CreateShuffleVector(Mask, Mask,
makeArrayRef(Indices, NumElts),
"extract");
}
return Mask;
}
static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
Value *Op0, Value *Op1) {
// If the mask is all ones just emit the align operation.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Op0;
Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
return Builder.CreateSelect(Mask, Op0, Op1);
}
// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
Value *Op1, Value *Shift,
Value *Passthru, Value *Mask,
bool IsVALIGN) {
unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
unsigned NumElts = Op0->getType()->getVectorNumElements();
assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
// Mask the immediate for VALIGN.
if (IsVALIGN)
ShiftVal &= (NumElts - 1);
// If palignr is shifting the pair of vectors more than the size of two
// lanes, emit zero.
if (ShiftVal >= 32)
return llvm::Constant::getNullValue(Op0->getType());
// If palignr is shifting the pair of input vectors more than one lane,
// but less than two lanes, convert to shifting in zeroes.
if (ShiftVal > 16) {
ShiftVal -= 16;
Op1 = Op0;
Op0 = llvm::Constant::getNullValue(Op0->getType());
}
uint32_t Indices[64];
// 256-bit palignr operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l < NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = ShiftVal + i;
if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
Idx += NumElts - 16; // End of lane, switch operand.
Indices[l + i] = Idx + l;
}
}
Value *Align = Builder.CreateShuffleVector(Op1, Op0,
makeArrayRef(Indices, NumElts),
"palignr");
return EmitX86Select(Builder, Mask, Align, Passthru);
}
static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
Value *Ptr, Value *Data, Value *Mask,
bool Aligned) {
// Cast the pointer to the right type.
Ptr = Builder.CreateBitCast(Ptr,
llvm::PointerType::getUnqual(Data->getType()));
unsigned Align =
Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
// If the mask is all ones just emit a regular store.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Builder.CreateAlignedStore(Data, Ptr, Align);
// Convert the mask from an integer type to a vector of i1.
unsigned NumElts = Data->getType()->getVectorNumElements();
Mask = getX86MaskVec(Builder, Mask, NumElts);
return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
}
static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
Value *Ptr, Value *Passthru, Value *Mask,
bool Aligned) {
// Cast the pointer to the right type.
Ptr = Builder.CreateBitCast(Ptr,
llvm::PointerType::getUnqual(Passthru->getType()));
unsigned Align =
Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
// If the mask is all ones just emit a regular store.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Builder.CreateAlignedLoad(Ptr, Align);
// Convert the mask from an integer type to a vector of i1.
unsigned NumElts = Passthru->getType()->getVectorNumElements();
Mask = getX86MaskVec(Builder, Mask, NumElts);
return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
}
static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
ICmpInst::Predicate Pred) {
Value *Op0 = CI.getArgOperand(0);
Value *Op1 = CI.getArgOperand(1);
Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
if (CI.getNumArgOperands() == 4)
Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
return Res;
}
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
unsigned CC, bool Signed) {
Value *Op0 = CI.getArgOperand(0);
unsigned NumElts = Op0->getType()->getVectorNumElements();
Value *Cmp;
if (CC == 3) {
Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
} else if (CC == 7) {
Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
} else {
ICmpInst::Predicate Pred;
switch (CC) {
default: llvm_unreachable("Unknown condition code");
case 0: Pred = ICmpInst::ICMP_EQ; break;
case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
case 4: Pred = ICmpInst::ICMP_NE; break;
case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
}
Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
}
Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
const auto *C = dyn_cast<Constant>(Mask);
if (!C || !C->isAllOnesValue())
Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
if (NumElts < 8) {
uint32_t Indices[8];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
for (unsigned i = NumElts; i != 8; ++i)
Indices[i] = NumElts + i % NumElts;
Cmp = Builder.CreateShuffleVector(Cmp,
Constant::getNullValue(Cmp->getType()),
Indices);
}
return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
std::max(NumElts, 8U)));
}
// Replace a masked intrinsic with an older unmasked intrinsic.
static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
Intrinsic::ID IID) {
Function *F = CI.getCalledFunction();
Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
Value *Rep = Builder.CreateCall(Intrin,
{ CI.getArgOperand(0), CI.getArgOperand(1) });
return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
}
static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
Value* A = CI.getArgOperand(0);
Value* B = CI.getArgOperand(1);
Value* Src = CI.getArgOperand(2);
Value* Mask = CI.getArgOperand(3);
Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
Value* Cmp = Builder.CreateIsNotNull(AndNode);
Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
return Builder.CreateInsertElement(A, Select, (uint64_t)0);
}
static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
Value* Op = CI.getArgOperand(0);
Type* ReturnOp = CI.getType();
unsigned NumElts = CI.getType()->getVectorNumElements();
Value *Mask = getX86MaskVec(Builder, Op, NumElts);
return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
}
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Function *F = CI->getCalledFunction();
LLVMContext &C = CI->getContext();
IRBuilder<> Builder(C);
Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
assert(F && "Intrinsic call is not direct?");
if (!NewFn) {
// Get the Function's name.
StringRef Name = F->getName();
assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
Name = Name.substr(5);
bool IsX86 = Name.startswith("x86.");
if (IsX86)
Name = Name.substr(4);
bool IsNVVM = Name.startswith("nvvm.");
if (IsNVVM)
Name = Name.substr(5);
if (IsX86 && Name.startswith("sse4a.movnt.")) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
MDNode *Node = MDNode::get(C, Elts);
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
// Nontemporal (unaligned) store of the 0'th element of the float/double
// vector.
Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
Value *Extract =
Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
// Remove intrinsic.
CI->eraseFromParent();
return;
}
if (IsX86 && (Name.startswith("avx.movnt.") ||
Name.startswith("avx512.storent."))) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
MDNode *Node = MDNode::get(C, Elts);
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
// Convert the type of the pointer to a pointer to the stored type.
Value *BC = Builder.CreateBitCast(Arg0,
PointerType::getUnqual(Arg1->getType()),
"cast");
VectorType *VTy = cast<VectorType>(Arg1->getType());
StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
VTy->getBitWidth() / 8);
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
// Remove intrinsic.
CI->eraseFromParent();
return;
}
if (IsX86 && Name == "sse2.storel.dq") {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
Value *BC = Builder.CreateBitCast(Arg0,
PointerType::getUnqual(Elt->getType()),
"cast");
Builder.CreateAlignedStore(Elt, BC, 1);
// Remove intrinsic.
CI->eraseFromParent();
return;
}
if (IsX86 && (Name.startswith("sse.storeu.") ||
Name.startswith("sse2.storeu.") ||
Name.startswith("avx.storeu."))) {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
Arg0 = Builder.CreateBitCast(Arg0,
PointerType::getUnqual(Arg1->getType()),
"cast");
Builder.CreateAlignedStore(Arg1, Arg0, 1);
// Remove intrinsic.
CI->eraseFromParent();
return;
}
if (IsX86 && (Name.startswith("avx512.mask.store"))) {
// "avx512.mask.storeu." or "avx512.mask.store."
bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), Aligned);
// Remove intrinsic.
CI->eraseFromParent();
return;
}
Value *Rep;
// Upgrade packed integer vector compare intrinsics to compare instructions.
if (IsX86 && (Name.startswith("sse2.pcmp") ||
Name.startswith("avx2.pcmp"))) {
// "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
bool CmpEq = Name[9] == 'e';
Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
CI->getArgOperand(0), CI->getArgOperand(1));
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
} else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
Type *I32Ty = Type::getInt32Ty(C);
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
ConstantInt::get(I32Ty, 0));
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
ConstantInt::get(I32Ty, 0));
Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
Builder.CreateFAdd(Elt0, Elt1),
ConstantInt::get(I32Ty, 0));
} else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
Type *I32Ty = Type::getInt32Ty(C);
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
ConstantInt::get(I32Ty, 0));
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
ConstantInt::get(I32Ty, 0));
Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
Builder.CreateFSub(Elt0, Elt1),
ConstantInt::get(I32Ty, 0));
} else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
Type *I32Ty = Type::getInt32Ty(C);
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
ConstantInt::get(I32Ty, 0));
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
ConstantInt::get(I32Ty, 0));
Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
Builder.CreateFMul(Elt0, Elt1),
ConstantInt::get(I32Ty, 0));
} else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
Type *I32Ty = Type::getInt32Ty(C);
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
ConstantInt::get(I32Ty, 0));
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
ConstantInt::get(I32Ty, 0));
Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
Builder.CreateFDiv(Elt0, Elt1),
ConstantInt::get(I32Ty, 0));
} else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
bool CmpEq = Name[16] == 'e';
Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
} else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
} else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
} else if (IsX86 && (Name == "sse41.pmaxsb" ||
Name == "sse2.pmaxs.w" ||
Name == "sse41.pmaxsd" ||
Name.startswith("avx2.pmaxs") ||
Name.startswith("avx512.mask.pmaxs"))) {
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
} else if (IsX86 && (Name == "sse2.pmaxu.b" ||
Name == "sse41.pmaxuw" ||
Name == "sse41.pmaxud" ||
Name.startswith("avx2.pmaxu") ||
Name.startswith("avx512.mask.pmaxu"))) {
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
} else if (IsX86 && (Name == "sse41.pminsb" ||
Name == "sse2.pmins.w" ||
Name == "sse41.pminsd" ||
Name.startswith("avx2.pmins") ||
Name.startswith("avx512.mask.pmins"))) {
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
} else if (IsX86 && (Name == "sse2.pminu.b" ||
Name == "sse41.pminuw" ||
Name == "sse41.pminud" ||
Name.startswith("avx2.pminu") ||
Name.startswith("avx512.mask.pminu"))) {
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
} else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
Name == "sse2.cvtps2pd" ||
Name == "avx.cvtdq2.pd.256" ||
Name == "avx.cvt.ps2.pd.256" ||
Name.startswith("avx512.mask.cvtdq2pd.") ||
Name.startswith("avx512.mask.cvtudq2pd."))) {
// Lossless i32/float to double conversion.
// Extract the bottom elements if necessary and convert to double vector.
Value *Src = CI->getArgOperand(0);
VectorType *SrcTy = cast<VectorType>(Src->getType());
VectorType *DstTy = cast<VectorType>(CI->getType());
Rep = CI->getArgOperand(0);
unsigned NumDstElts = DstTy->getNumElements();
if (NumDstElts < SrcTy->getNumElements()) {
assert(NumDstElts == 2 && "Unexpected vector size");
uint32_t ShuffleMask[2] = { 0, 1 };
Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
ShuffleMask);
}
bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
if (SInt2Double)
Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
else if (UInt2Double)
Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
else
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
if (CI->getNumArgOperands() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
CI->getArgOperand(1), CI->getArgOperand(2),
/*Aligned*/false);
} else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),CI->getArgOperand(2),
/*Aligned*/true);
} else if (IsX86 && Name.startswith("xop.vpcom")) {
Intrinsic::ID intID;
if (Name.endswith("ub"))
intID = Intrinsic::x86_xop_vpcomub;
else if (Name.endswith("uw"))
intID = Intrinsic::x86_xop_vpcomuw;
else if (Name.endswith("ud"))
intID = Intrinsic::x86_xop_vpcomud;
else if (Name.endswith("uq"))
intID = Intrinsic::x86_xop_vpcomuq;
else if (Name.endswith("b"))
intID = Intrinsic::x86_xop_vpcomb;
else if (Name.endswith("w"))
intID = Intrinsic::x86_xop_vpcomw;
else if (Name.endswith("d"))
intID = Intrinsic::x86_xop_vpcomd;
else if (Name.endswith("q"))
intID = Intrinsic::x86_xop_vpcomq;
else
llvm_unreachable("Unknown suffix");
Name = Name.substr(9); // strip off "xop.vpcom"
unsigned Imm;
if (Name.startswith("lt"))
Imm = 0;
else if (Name.startswith("le"))
Imm = 1;
else if (Name.startswith("gt"))
Imm = 2;
else if (Name.startswith("ge"))
Imm = 3;
else if (Name.startswith("eq"))
Imm = 4;
else if (Name.startswith("ne"))
Imm = 5;
else if (Name.startswith("false"))
Imm = 6;
else if (Name.startswith("true"))
Imm = 7;
else
llvm_unreachable("Unknown condition");
Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
Rep =
Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
Builder.getInt8(Imm)});
} else if (IsX86 && Name.startswith("xop.vpcmov")) {
Value *Sel = CI->getArgOperand(2);
Value *NotSel = Builder.CreateNot(Sel);
Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
Rep = Builder.CreateOr(Sel0, Sel1);
} else if (IsX86 && Name == "sse42.crc32.64.8") {
Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_sse42_crc32_32_8);
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
} else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
// Replace broadcasts with a series of insertelements.
Type *VecTy = CI->getType();
Type *EltTy = VecTy->getVectorElementType();
unsigned EltNum = VecTy->getVectorNumElements();
Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
EltTy->getPointerTo());
Value *Load = Builder.CreateLoad(EltTy, Cast);
Type *I32Ty = Type::getInt32Ty(C);
Rep = UndefValue::get(VecTy);
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
} else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
Name.startswith("sse41.pmovzx") ||
Name.startswith("avx2.pmovsx") ||
Name.startswith("avx2.pmovzx") ||
Name.startswith("avx512.mask.pmovsx") ||
Name.startswith("avx512.mask.pmovzx"))) {
VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
VectorType *DstTy = cast<VectorType>(CI->getType());
unsigned NumDstElts = DstTy->getNumElements();
// Extract a subvector of the first NumDstElts lanes and sign/zero extend.
SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
for (unsigned i = 0; i != NumDstElts; ++i)
ShuffleMask[i] = i;
Value *SV = Builder.CreateShuffleVector(
CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
bool DoSext = (StringRef::npos != Name.find("pmovsx"));
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
: Builder.CreateZExt(SV, DstTy);
// If there are 3 arguments, it's a masked intrinsic so we need a select.
if (CI->getNumArgOperands() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
Name == "avx2.vbroadcasti128")) {
// Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
Type *EltTy = CI->getType()->getVectorElementType();
unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
Type *VT = VectorType::get(EltTy, NumSrcElts);
Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
PointerType::getUnqual(VT));
Value *Load = Builder.CreateAlignedLoad(Op, 1);
if (NumSrcElts == 2)
Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
{ 0, 1, 0, 1 });
else
Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
{ 0, 1, 2, 3, 0, 1, 2, 3 });
} else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
Name.startswith("avx2.vbroadcast") ||
Name.startswith("avx512.pbroadcast") ||
Name.startswith("avx512.mask.broadcast.s"))) {
// Replace vp?broadcasts with a vector shuffle.
Value *Op = CI->getArgOperand(0);
unsigned NumElts = CI->getType()->getVectorNumElements();
Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
Constant::getNullValue(MaskTy));
if (CI->getNumArgOperands() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),
CI->getArgOperand(2),
CI->getArgOperand(3),
CI->getArgOperand(4),
false);
} else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),
CI->getArgOperand(2),
CI->getArgOperand(3),
CI->getArgOperand(4),
true);
} else if (IsX86 && (Name == "sse2.psll.dq" ||
Name == "avx2.psll.dq")) {
// 128/256-bit shift left specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
Shift / 8); // Shift is in bits.
} else if (IsX86 && (Name == "sse2.psrl.dq" ||
Name == "avx2.psrl.dq")) {
// 128/256-bit shift right specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
Shift / 8); // Shift is in bits.
} else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
Name == "avx2.psll.dq.bs" ||
Name == "avx512.psll.dq.512")) {
// 128/256/512-bit shift left specified in bytes.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
} else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
Name == "avx2.psrl.dq.bs" ||
Name == "avx512.psrl.dq.512")) {
// 128/256/512-bit shift right specified in bytes.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
} else if (IsX86 && (Name == "sse41.pblendw" ||
Name.startswith("sse41.blendp") ||
Name.startswith("avx.blend.p") ||
Name == "avx2.pblendw" ||
Name.startswith("avx2.pblendd."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
VectorType *VecTy = cast<VectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
SmallVector<uint32_t, 16> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i)
Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
} else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
Name == "avx2.vinserti128" ||
Name.startswith("avx512.mask.insert"))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
unsigned DstNumElts = CI->getType()->getVectorNumElements();
unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
unsigned Scale = DstNumElts / SrcNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
Imm = Imm % Scale;
// Extend the second operand into a vector the size of the destination.
Value *UndefV = UndefValue::get(Op1->getType());
SmallVector<uint32_t, 8> Idxs(DstNumElts);
for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i] = i;
for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
Idxs[i] = SrcNumElts;
Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
// Insert the second operand into the first operand.
// Note that there is no guarantee that instruction lowering will actually
// produce a vinsertf128 instruction for the created shuffles. In
// particular, the 0 immediate case involves no lane changes, so it can
// be handled as a blend.
// Example of shuffle mask for 32-bit elements:
// Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
// Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
// First fill with identify mask.
for (unsigned i = 0; i != DstNumElts; ++i)
Idxs[i] = i;
// Then replace the elements where we need to insert.
for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
// If the intrinsic has a mask operand, handle that.
if (CI->getNumArgOperands() == 5)
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
Name == "avx2.vextracti128" ||
Name.startswith("avx512.mask.vextract"))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned DstNumElts = CI->getType()->getVectorNumElements();
unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
unsigned Scale = SrcNumElts / DstNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
Imm = Imm % Scale;
// Get indexes for the subvector of the input vector.
SmallVector<uint32_t, 8> Idxs(DstNumElts);
for (unsigned i = 0; i != DstNumElts; ++i) {
Idxs[i] = i + (Imm * DstNumElts);
}
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
// If the intrinsic has a mask operand, handle that.
if (CI->getNumArgOperands() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (!IsX86 && Name == "stackprotectorcheck") {
Rep = nullptr;
} else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
Name.startswith("avx512.mask.perm.di."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
VectorType *VecTy = cast<VectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
SmallVector<uint32_t, 8> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i)
Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
if (CI->getNumArgOperands() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx.vpermil.") ||
Name == "sse2.pshuf.d" ||
Name.startswith("avx512.mask.vpermil.p") ||
Name.startswith("avx512.mask.pshuf.d."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
VectorType *VecTy = cast<VectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
// Calculate the size of each index in the immediate.
unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
unsigned IdxMask = ((1 << IdxSize) - 1);
SmallVector<uint32_t, 8> Idxs(NumElts);
// Lookup the bits for this element, wrapping around the immediate every
// 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
// to offset by the first index of each group.
for (unsigned i = 0; i != NumElts; ++i)
Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
if (CI->getNumArgOperands() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name == "sse2.pshufl.w" ||
Name.startswith("avx512.mask.pshufl.w."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned NumElts = CI->getType()->getVectorNumElements();
SmallVector<uint32_t, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i != 4; ++i)
Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
for (unsigned i = 4; i != 8; ++i)
Idxs[i + l] = i + l;
}
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
if (CI->getNumArgOperands() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name == "sse2.pshufh.w" ||
Name.startswith("avx512.mask.pshufh.w."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned NumElts = CI->getType()->getVectorNumElements();
SmallVector<uint32_t, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i != 4; ++i)
Idxs[i + l] = i + l;
for (unsigned i = 0; i != 4; ++i)
Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
}
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
if (CI->getNumArgOperands() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
unsigned NumElts = CI->getType()->getVectorNumElements();
unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
unsigned HalfLaneElts = NumLaneElts / 2;
SmallVector<uint32_t, 16> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
// Base index is the starting element of the lane.
Idxs[i] = i - (i % NumLaneElts);
// If we are half way through the lane switch to the other source.
if ((i % NumLaneElts) >= HalfLaneElts)
Idxs[i] += NumElts;
// Now select the specific element. By adding HalfLaneElts bits from
// the immediate. Wrapping around the immediate every 8-bits.
Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
}
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
Name.startswith("avx512.mask.movshdup") ||
Name.startswith("avx512.mask.movsldup"))) {
Value *Op0 = CI->getArgOperand(0);
unsigned NumElts = CI->getType()->getVectorNumElements();
unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
unsigned Offset = 0;
if (Name.startswith("avx512.mask.movshdup."))
Offset = 1;
SmallVector<uint32_t, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += NumLaneElts)
for (unsigned i = 0; i != NumLaneElts; i += 2) {
Idxs[i + l + 0] = i + l + Offset;
Idxs[i + l + 1] = i + l + Offset;
}
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
Name.startswith("avx512.mask.unpckl."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
int NumElts = CI->getType()->getVectorNumElements();
int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
SmallVector<uint32_t, 64> Idxs(NumElts);
for (int l = 0; l != NumElts; l += NumLaneElts)
for (int i = 0; i != NumLaneElts; ++i)
Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
Name.startswith("avx512.mask.unpckh."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
int NumElts = CI->getType()->getVectorNumElements();
int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
SmallVector<uint32_t, 64> Idxs(NumElts);
for (int l = 0; l != NumElts; l += NumLaneElts)
for (int i = 0; i != NumLaneElts; ++i)
Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.por.")) {
Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.and.")) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
Rep = Builder.CreateAnd(Rep,
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.or.")) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
Intrinsic::ctlz,
CI->getType()),
{ CI->getArgOperand(0), Builder.getInt1(false) });
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
Name.startswith("avx512.mask.min.p"))) {
bool IsMin = Name[13] == 'i';
VectorType *VecTy = cast<VectorType>(CI->getType());
unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
unsigned EltWidth = VecTy->getScalarSizeInBits();
Intrinsic::ID IID;
if (!IsMin && VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_sse_max_ps;
else if (!IsMin && VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_sse2_max_pd;
else if (!IsMin && VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx_max_ps_256;
else if (!IsMin && VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx_max_pd_256;
else if (IsMin && VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_sse_min_ps;
else if (IsMin && VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_sse2_min_pd;
else if (IsMin && VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx_min_ps_256;
else if (IsMin && VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx_min_pd_256;
else
llvm_unreachable("Unexpected intrinsic");
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
VectorType *VecTy = cast<VectorType>(CI->getType());
Intrinsic::ID IID;
if (VecTy->getPrimitiveSizeInBits() == 128)
IID = Intrinsic::x86_ssse3_pshuf_b_128;
else if (VecTy->getPrimitiveSizeInBits() == 256)
IID = Intrinsic::x86_avx2_pshuf_b;
else if (VecTy->getPrimitiveSizeInBits() == 512)
IID = Intrinsic::x86_avx512_pshuf_b_512;
else
llvm_unreachable("Unexpected intrinsic");
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
Name.startswith("avx512.mask.pmulu.dq."))) {
bool IsUnsigned = Name[16] == 'u';
VectorType *VecTy = cast<VectorType>(CI->getType());
Intrinsic::ID IID;
if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
IID = Intrinsic::x86_sse41_pmuldq;
else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
IID = Intrinsic::x86_avx2_pmul_dq;
else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
IID = Intrinsic::x86_avx512_pmul_dq_512;
else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
IID = Intrinsic::x86_sse2_pmulu_dq;
else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
IID = Intrinsic::x86_avx2_pmulu_dq;
else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
IID = Intrinsic::x86_avx512_pmulu_dq_512;
else
llvm_unreachable("Unexpected intrinsic");
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pack")) {
bool IsUnsigned = Name[16] == 'u';
bool IsDW = Name[18] == 'd';
VectorType *VecTy = cast<VectorType>(CI->getType());
Intrinsic::ID IID;
if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
IID = Intrinsic::x86_sse2_packsswb_128;
else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
IID = Intrinsic::x86_avx2_packsswb;
else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
IID = Intrinsic::x86_avx512_packsswb_512;
else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
IID = Intrinsic::x86_sse2_packssdw_128;
else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
IID = Intrinsic::x86_avx2_packssdw;
else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
IID = Intrinsic::x86_avx512_packssdw_512;
else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
IID = Intrinsic::x86_sse2_packuswb_128;
else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
IID = Intrinsic::x86_avx2_packuswb;
else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
IID = Intrinsic::x86_avx512_packuswb_512;
else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
IID = Intrinsic::x86_sse41_packusdw;
else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
IID = Intrinsic::x86_avx2_packusdw;
else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
IID = Intrinsic::x86_avx512_packusdw_512;
else
llvm_unreachable("Unexpected intrinsic");
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.psll")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
bool IsVariable = Name[16] == 'v';
char Size = Name[16] == '.' ? Name[17] :
Name[17] == '.' ? Name[18] :
Name[18] == '.' ? Name[19] :
Name[20];
Intrinsic::ID IID;
if (IsVariable && Name[17] != '.') {
if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
IID = Intrinsic::x86_avx2_psllv_q;
else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
IID = Intrinsic::x86_avx2_psllv_q_256;
else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
IID = Intrinsic::x86_avx2_psllv_d;
else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
IID = Intrinsic::x86_avx2_psllv_d_256;
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
IID = Intrinsic::x86_avx512_psllv_w_128;
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
IID = Intrinsic::x86_avx512_psllv_w_256;
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
IID = Intrinsic::x86_avx512_psllv_w_512;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".128")) {
if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
: Intrinsic::x86_sse2_psll_d;
else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
: Intrinsic::x86_sse2_psll_q;
else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
: Intrinsic::x86_sse2_psll_w;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".256")) {
if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
: Intrinsic::x86_avx2_psll_d;
else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
: Intrinsic::x86_avx2_psll_q;
else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
: Intrinsic::x86_avx2_psll_w;
else
llvm_unreachable("Unexpected size");
} else {
if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
Intrinsic::x86_avx512_psll_d_512;
else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
Intrinsic::x86_avx512_psll_q_512;
else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
: Intrinsic::x86_avx512_psll_w_512;
else
llvm_unreachable("Unexpected size");
}
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
} else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
bool IsVariable = Name[16] == 'v';
char Size = Name[16] == '.' ? Name[17] :
Name[17] == '.' ? Name[18] :
Name[18] == '.' ? Name[19] :
Name[20];
Intrinsic::ID IID;
if (IsVariable && Name[17] != '.') {
if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
IID = Intrinsic::x86_avx2_psrlv_q;
else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
IID = Intrinsic::x86_avx2_psrlv_q_256;
else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
IID = Intrinsic::x86_avx2_psrlv_d;
else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
IID = Intrinsic::x86_avx2_psrlv_d_256;
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
IID = Intrinsic::x86_avx512_psrlv_w_128;
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
IID = Intrinsic::x86_avx512_psrlv_w_256;
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
IID = Intrinsic::x86_avx512_psrlv_w_512;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".128")) {
if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
: Intrinsic::x86_sse2_psrl_d;
else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
: Intrinsic::x86_sse2_psrl_q;
else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
: Intrinsic::x86_sse2_psrl_w;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".256")) {
if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
: Intrinsic::x86_avx2_psrl_d;
else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
: Intrinsic::x86_avx2_psrl_q;
else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
: Intrinsic::x86_avx2_psrl_w;
else
llvm_unreachable("Unexpected size");
} else {
if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
Intrinsic::x86_avx512_psrl_d_512;
else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
Intrinsic::x86_avx512_psrl_q_512;
else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
: Intrinsic::x86_avx512_psrl_w_512;
else
llvm_unreachable("Unexpected size");
}
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
} else if (IsX86 && Name.startswith("avx512.mask.psra")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
bool IsVariable = Name[16] == 'v';
char Size = Name[16] == '.' ? Name[17] :
Name[17] == '.' ? Name[18] :
Name[18] == '.' ? Name[19] :
Name[20];
Intrinsic::ID IID;
if (IsVariable && Name[17] != '.') {
if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
IID = Intrinsic::x86_avx2_psrav_d;
else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
IID = Intrinsic::x86_avx2_psrav_d_256;
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
IID = Intrinsic::x86_avx512_psrav_w_128;
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
IID = Intrinsic::x86_avx512_psrav_w_256;
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
IID = Intrinsic::x86_avx512_psrav_w_512;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".128")) {
if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
: Intrinsic::x86_sse2_psra_d;
else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
Intrinsic::x86_avx512_psra_q_128;
else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
: Intrinsic::x86_sse2_psra_w;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".256")) {
if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
: Intrinsic::x86_avx2_psra_d;
else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
Intrinsic::x86_avx512_psra_q_256;
else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
: Intrinsic::x86_avx2_psra_w;
else
llvm_unreachable("Unexpected size");
} else {
if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
Intrinsic::x86_avx512_psra_d_512;
else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
Intrinsic::x86_avx512_psra_q_512;
else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
: Intrinsic::x86_avx512_psra_w_512;
else
llvm_unreachable("Unexpected size");
}
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
} else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
Rep = upgradeMaskedMove(Builder, *CI);
} else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
Rep = UpgradeMaskToInt(Builder, *CI);
} else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
Intrinsic::ID IID;
if (Name.endswith("ps.128"))
IID = Intrinsic::x86_avx_vpermilvar_ps;
else if (Name.endswith("pd.128"))
IID = Intrinsic::x86_avx_vpermilvar_pd;
else if (Name.endswith("ps.256"))
IID = Intrinsic::x86_avx_vpermilvar_ps_256;
else if (Name.endswith("pd.256"))
IID = Intrinsic::x86_avx_vpermilvar_pd_256;
else if (Name.endswith("ps.512"))
IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
else if (Name.endswith("pd.512"))
IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
else
llvm_unreachable("Unexpected vpermilvar intrinsic");
Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
Rep = Builder.CreateCall(Intrin,
{ CI->getArgOperand(0), CI->getArgOperand(1) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.endswith(".movntdqa")) {
Module *M = F->getParent();
MDNode *Node = MDNode::get(
C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
Value *Ptr = CI->getArgOperand(0);
VectorType *VTy = cast<VectorType>(CI->getType());
// Convert the type of the pointer to a pointer to the stored type.
Value *BC =
Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
LI->setMetadata(M->getMDKindID("nontemporal"), Node);
Rep = LI;
} else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
Value *Arg = CI->getArgOperand(0);
Value *Neg = Builder.CreateNeg(Arg, "neg");
Value *Cmp = Builder.CreateICmpSGE(
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
} else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
Name == "max.ui" || Name == "max.ull")) {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
} else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
Name == "min.ui" || Name == "min.ull")) {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
} else if (IsNVVM && Name == "clz.ll") {
// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
Value *Arg = CI->getArgOperand(0);
Value *Ctlz = Builder.CreateCall(
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
{Arg->getType()}),
{Arg, Builder.getFalse()}, "ctlz");
Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
} else if (IsNVVM && Name == "popc.ll") {
// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
// i64.
Value *Arg = CI->getArgOperand(0);
Value *Popc = Builder.CreateCall(
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
{Arg->getType()}),
Arg, "ctpop");
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
} else if (IsNVVM && Name == "h2f") {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(
F->getParent(), Intrinsic::convert_from_fp16,
{Builder.getFloatTy()}),
CI->getArgOperand(0), "h2f");
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
if (Rep)
CI->replaceAllUsesWith(Rep);
CI->eraseFromParent();
return;
}
CallInst *NewCall = nullptr;
switch (NewFn->getIntrinsicID()) {
default: {
// Handle generic mangling change, but nothing else
assert(
(CI->getCalledFunction()->getName() != NewFn->getName()) &&
"Unknown function for CallInst upgrade and isn't just a name change");
CI->setCalledFunction(NewFn);
return;
}
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
case Intrinsic::arm_neon_vld4:
case Intrinsic::arm_neon_vld2lane:
case Intrinsic::arm_neon_vld3lane:
case Intrinsic::arm_neon_vld4lane:
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
case Intrinsic::arm_neon_vst4:
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
CI->arg_operands().end());
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
case Intrinsic::bitreverse:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
break;
case Intrinsic::ctlz:
case Intrinsic::cttz:
assert(CI->getNumArgOperands() == 1 &&
"Mismatch between function args and call args");
NewCall =
Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
break;
case Intrinsic::objectsize: {
Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
? Builder.getFalse()
: CI->getArgOperand(2);
NewCall = Builder.CreateCall(
NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
break;
}
case Intrinsic::ctpop:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
break;
case Intrinsic::convert_from_fp16:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
break;
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
break;
case Intrinsic::x86_xop_vpermil2pd:
case Intrinsic::x86_xop_vpermil2ps:
case Intrinsic::x86_xop_vpermil2pd_256:
case Intrinsic::x86_xop_vpermil2ps_256: {
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
CI->arg_operands().end());
VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestnzc: {
// The arguments for these intrinsics used to be v4f32, and changed
// to v2i64. This is purely a nop, since those are bitwise intrinsics.
// So, the only thing required is a bitcast for both arguments.
// First, check the arguments have the old type.
Value *Arg0 = CI->getArgOperand(0);
if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
return;
// Old intrinsic, add bitcasts
Value *Arg1 = CI->getArgOperand(1);
Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
break;
}
case Intrinsic::x86_sse41_insertps:
case Intrinsic::x86_sse41_dppd:
case Intrinsic::x86_sse41_dpps:
case Intrinsic::x86_sse41_mpsadbw:
case Intrinsic::x86_avx_dp_ps_256:
case Intrinsic::x86_avx2_mpsadbw: {
// Need to truncate the last argument from i32 to i8 -- this argument models
// an inherently 8-bit immediate operand to these x86 instructions.
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
CI->arg_operands().end());
// Replace the last argument with a trunc.
Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
case Intrinsic::thread_pointer: {
NewCall = Builder.CreateCall(NewFn, {});
break;
}
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::masked_load:
case Intrinsic::masked_store:
case Intrinsic::masked_gather:
case Intrinsic::masked_scatter: {
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
CI->arg_operands().end());
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
}
assert(NewCall && "Should have either set this variable or returned through "
"the default case");
std::string Name = CI->getName();
if (!Name.empty()) {
CI->setName(Name + ".old");
NewCall->setName(Name);
}
CI->replaceAllUsesWith(NewCall);
CI->eraseFromParent();
}
void llvm::UpgradeCallsToIntrinsic(Function *F) {
assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
// Check if this function should be upgraded and get the replacement function
// if there is one.
Function *NewFn;
if (UpgradeIntrinsicFunction(F, NewFn)) {
// Replace all users of the old function with the new function or new
// instructions. This is not a range loop because the call is deleted.
for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
if (CallInst *CI = dyn_cast<CallInst>(*UI++))
UpgradeIntrinsicCall(CI, NewFn);
// Remove old function, no longer used, from the module.
F->eraseFromParent();
}
}
MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
// Check if the tag uses struct-path aware TBAA format.
if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
return &MD;
auto &Context = MD.getContext();
if (MD.getNumOperands() == 3) {
Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
MDNode *ScalarType = MDNode::get(Context, Elts);
// Create a MDNode <ScalarType, ScalarType, offset 0, const>
Metadata *Elts2[] = {ScalarType, ScalarType,
ConstantAsMetadata::get(
Constant::getNullValue(Type::getInt64Ty(Context))),
MD.getOperand(2)};
return MDNode::get(Context, Elts2);
}
// Create a MDNode <MD, MD, offset 0>
Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
Type::getInt64Ty(Context)))};
return MDNode::get(Context, Elts);
}
Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
Instruction *&Temp) {
if (Opc != Instruction::BitCast)
return nullptr;
Temp = nullptr;
Type *SrcTy = V->getType();
if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
LLVMContext &Context = V->getContext();
// We have no information about target data layout, so we assume that
// the maximum pointer size is 64bit.
Type *MidTy = Type::getInt64Ty(Context);
Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
}
return nullptr;
}
Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
if (Opc != Instruction::BitCast)
return nullptr;
Type *SrcTy = C->getType();
if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
LLVMContext &Context = C->getContext();
// We have no information about target data layout, so we assume that
// the maximum pointer size is 64bit.
Type *MidTy = Type::getInt64Ty(Context);
return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
DestTy);
}
return nullptr;
}
/// Check the debug info version number, if it is out-dated, drop the debug
/// info. Return true if module is modified.
bool llvm::UpgradeDebugInfo(Module &M) {
unsigned Version = getDebugMetadataVersionFromModule(M);
if (Version == DEBUG_METADATA_VERSION)
return false;
bool RetCode = StripDebugInfo(M);
if (RetCode) {
DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
M.getContext().diagnose(DiagVersion);
}
return RetCode;
}
bool llvm::UpgradeModuleFlags(Module &M) {
- const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
+ NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
if (!ModFlags)
return false;
- bool HasObjCFlag = false, HasClassProperties = false;
+ bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
MDNode *Op = ModFlags->getOperand(I);
- if (Op->getNumOperands() < 2)
+ if (Op->getNumOperands() != 3)
continue;
MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
if (!ID)
continue;
if (ID->getString() == "Objective-C Image Info Version")
HasObjCFlag = true;
if (ID->getString() == "Objective-C Class Properties")
HasClassProperties = true;
+ // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
+ // field was Error and now they are Max.
+ if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
+ if (auto *Behavior =
+ mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
+ if (Behavior->getLimitedValue() == Module::Error) {
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ Metadata *Ops[3] = {
+ ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
+ MDString::get(M.getContext(), ID->getString()),
+ Op->getOperand(2)};
+ ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
+ Changed = true;
+ }
+ }
+ }
}
+
// "Objective-C Class Properties" is recently added for Objective-C. We
// upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
// flag of value 0, so we can correclty downgrade this flag when trying to
// link an ObjC bitcode without this module flag with an ObjC bitcode with
// this module flag.
if (HasObjCFlag && !HasClassProperties) {
M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
(uint32_t)0);
- return true;
+ Changed = true;
}
- return false;
+
+ return Changed;
}
static bool isOldLoopArgument(Metadata *MD) {
auto *T = dyn_cast_or_null<MDTuple>(MD);
if (!T)
return false;
if (T->getNumOperands() < 1)
return false;
auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
if (!S)
return false;
return S->getString().startswith("llvm.vectorizer.");
}
static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
StringRef OldPrefix = "llvm.vectorizer.";
assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
if (OldTag == "llvm.vectorizer.unroll")
return MDString::get(C, "llvm.loop.interleave.count");
return MDString::get(
C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
.str());
}
static Metadata *upgradeLoopArgument(Metadata *MD) {
auto *T = dyn_cast_or_null<MDTuple>(MD);
if (!T)
return MD;
if (T->getNumOperands() < 1)
return MD;
auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
if (!OldTag)
return MD;
if (!OldTag->getString().startswith("llvm.vectorizer."))
return MD;
// This has an old tag. Upgrade it.
SmallVector<Metadata *, 8> Ops;
Ops.reserve(T->getNumOperands());
Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
Ops.push_back(T->getOperand(I));
return MDTuple::get(T->getContext(), Ops);
}
MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
auto *T = dyn_cast<MDTuple>(&N);
if (!T)
return &N;
if (none_of(T->operands(), isOldLoopArgument))
return &N;
SmallVector<Metadata *, 8> Ops;
Ops.reserve(T->getNumOperands());
for (Metadata *MD : T->operands())
Ops.push_back(upgradeLoopArgument(MD));
return MDTuple::get(T->getContext(), Ops);
}
Index: head/contrib/llvm/lib/Object/COFFModuleDefinition.cpp
===================================================================
--- head/contrib/llvm/lib/Object/COFFModuleDefinition.cpp (revision 322854)
+++ head/contrib/llvm/lib/Object/COFFModuleDefinition.cpp (revision 322855)
@@ -1,331 +1,337 @@
//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Windows-specific.
// A parser for the module-definition file (.def file).
//
// The format of module-definition files are described in this document:
// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
//
//===----------------------------------------------------------------------===//
#include "llvm/Object/COFFModuleDefinition.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/Error.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm::COFF;
using namespace llvm;
namespace llvm {
namespace object {
enum Kind {
Unknown,
Eof,
Identifier,
Comma,
Equal,
KwBase,
KwConstant,
KwData,
KwExports,
KwHeapsize,
KwLibrary,
KwName,
KwNoname,
KwPrivate,
KwStacksize,
KwVersion,
};
struct Token {
explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
Kind K;
StringRef Value;
};
static bool isDecorated(StringRef Sym, bool MingwDef) {
// mingw does not prepend "_".
return (!MingwDef && Sym.startswith("_")) || Sym.startswith("@") ||
Sym.startswith("?");
}
static Error createError(const Twine &Err) {
return make_error<StringError>(StringRef(Err.str()),
object_error::parse_failed);
}
class Lexer {
public:
Lexer(StringRef S) : Buf(S) {}
Token lex() {
Buf = Buf.trim();
if (Buf.empty())
return Token(Eof);
switch (Buf[0]) {
case '\0':
return Token(Eof);
case ';': {
size_t End = Buf.find('\n');
Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
return lex();
}
case '=':
Buf = Buf.drop_front();
// GNU dlltool accepts both = and ==.
if (Buf.startswith("="))
Buf = Buf.drop_front();
return Token(Equal, "=");
case ',':
Buf = Buf.drop_front();
return Token(Comma, ",");
case '"': {
StringRef S;
std::tie(S, Buf) = Buf.substr(1).split('"');
return Token(Identifier, S);
}
default: {
size_t End = Buf.find_first_of("=,\r\n \t\v");
StringRef Word = Buf.substr(0, End);
Kind K = llvm::StringSwitch<Kind>(Word)
.Case("BASE", KwBase)
.Case("CONSTANT", KwConstant)
.Case("DATA", KwData)
.Case("EXPORTS", KwExports)
.Case("HEAPSIZE", KwHeapsize)
.Case("LIBRARY", KwLibrary)
.Case("NAME", KwName)
.Case("NONAME", KwNoname)
.Case("PRIVATE", KwPrivate)
.Case("STACKSIZE", KwStacksize)
.Case("VERSION", KwVersion)
.Default(Identifier);
Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
return Token(K, Word);
}
}
}
private:
StringRef Buf;
};
class Parser {
public:
explicit Parser(StringRef S, MachineTypes M, bool B)
: Lex(S), Machine(M), MingwDef(B) {}
Expected<COFFModuleDefinition> parse() {
do {
if (Error Err = parseOne())
return std::move(Err);
} while (Tok.K != Eof);
return Info;
}
private:
void read() {
if (Stack.empty()) {
Tok = Lex.lex();
return;
}
Tok = Stack.back();
Stack.pop_back();
}
Error readAsInt(uint64_t *I) {
read();
if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
return createError("integer expected");
return Error::success();
}
Error expect(Kind Expected, StringRef Msg) {
read();
if (Tok.K != Expected)
return createError(Msg);
return Error::success();
}
void unget() { Stack.push_back(Tok); }
Error parseOne() {
read();
switch (Tok.K) {
case Eof:
return Error::success();
case KwExports:
for (;;) {
read();
if (Tok.K != Identifier) {
unget();
return Error::success();
}
if (Error Err = parseExport())
return Err;
}
case KwHeapsize:
return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
case KwStacksize:
return parseNumbers(&Info.StackReserve, &Info.StackCommit);
case KwLibrary:
case KwName: {
bool IsDll = Tok.K == KwLibrary; // Check before parseName.
std::string Name;
if (Error Err = parseName(&Name, &Info.ImageBase))
return Err;
Info.ImportName = Name;
// Set the output file, but don't override /out if it was already passed.
if (Info.OutputFile.empty()) {
Info.OutputFile = Name;
// Append the appropriate file extension if not already present.
if (!sys::path::has_extension(Name))
Info.OutputFile += IsDll ? ".dll" : ".exe";
}
return Error::success();
}
case KwVersion:
return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
default:
return createError("unknown directive: " + Tok.Value);
}
}
Error parseExport() {
COFFShortExport E;
E.Name = Tok.Value;
read();
if (Tok.K == Equal) {
read();
if (Tok.K != Identifier)
return createError("identifier expected, but got " + Tok.Value);
E.ExtName = E.Name;
E.Name = Tok.Value;
} else {
unget();
}
if (Machine == IMAGE_FILE_MACHINE_I386) {
if (!isDecorated(E.Name, MingwDef))
E.Name = (std::string("_").append(E.Name));
if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
E.ExtName = (std::string("_").append(E.ExtName));
}
for (;;) {
read();
if (Tok.K == Identifier && Tok.Value[0] == '@') {
- Tok.Value.drop_front().getAsInteger(10, E.Ordinal);
+ if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
+ // Not an ordinal modifier at all, but the next export (fastcall
+ // decorated) - complete the current one.
+ unget();
+ Info.Exports.push_back(E);
+ return Error::success();
+ }
read();
if (Tok.K == KwNoname) {
E.Noname = true;
} else {
unget();
}
continue;
}
if (Tok.K == KwData) {
E.Data = true;
continue;
}
if (Tok.K == KwConstant) {
E.Constant = true;
continue;
}
if (Tok.K == KwPrivate) {
E.Private = true;
continue;
}
unget();
Info.Exports.push_back(E);
return Error::success();
}
}
// HEAPSIZE/STACKSIZE reserve[,commit]
Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
if (Error Err = readAsInt(Reserve))
return Err;
read();
if (Tok.K != Comma) {
unget();
Commit = nullptr;
return Error::success();
}
if (Error Err = readAsInt(Commit))
return Err;
return Error::success();
}
// NAME outputPath [BASE=address]
Error parseName(std::string *Out, uint64_t *Baseaddr) {
read();
if (Tok.K == Identifier) {
*Out = Tok.Value;
} else {
*Out = "";
unget();
return Error::success();
}
read();
if (Tok.K == KwBase) {
if (Error Err = expect(Equal, "'=' expected"))
return Err;
if (Error Err = readAsInt(Baseaddr))
return Err;
} else {
unget();
*Baseaddr = 0;
}
return Error::success();
}
// VERSION major[.minor]
Error parseVersion(uint32_t *Major, uint32_t *Minor) {
read();
if (Tok.K != Identifier)
return createError("identifier expected, but got " + Tok.Value);
StringRef V1, V2;
std::tie(V1, V2) = Tok.Value.split('.');
if (V1.getAsInteger(10, *Major))
return createError("integer expected, but got " + Tok.Value);
if (V2.empty())
*Minor = 0;
else if (V2.getAsInteger(10, *Minor))
return createError("integer expected, but got " + Tok.Value);
return Error::success();
}
Lexer Lex;
Token Tok;
std::vector<Token> Stack;
MachineTypes Machine;
COFFModuleDefinition Info;
bool MingwDef;
};
Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
MachineTypes Machine,
bool MingwDef) {
return Parser(MB.getBuffer(), Machine, MingwDef).parse();
}
} // namespace object
} // namespace llvm
Index: head/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- head/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp (revision 322854)
+++ head/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp (revision 322855)
@@ -1,14085 +1,14101 @@
//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that ARM uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "ARMISelLowering.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
#include "ARMRegisterInfo.h"
#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "arm-isel"
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
STATISTIC(NumConstpoolPromoted,
"Number of constants with their storage promoted into constant pools");
static cl::opt<bool>
ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
static cl::opt<bool> EnableConstpoolPromotion(
"arm-promote-constant", cl::Hidden,
cl::desc("Enable / disable promotion of unnamed_addr constants into "
"constant pools"),
cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
static cl::opt<unsigned> ConstpoolPromotionMaxSize(
"arm-promote-constant-max-size", cl::Hidden,
cl::desc("Maximum size of constant to promote into a constant pool"),
cl::init(64));
static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
"arm-promote-constant-max-total", cl::Hidden,
cl::desc("Maximum size of ALL constants to promote into a constant pool"),
cl::init(128));
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
MVT PromotedBitwiseVT) {
if (VT != PromotedLdStVT) {
setOperationAction(ISD::LOAD, VT, Promote);
AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
setOperationAction(ISD::STORE, VT, Promote);
AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
}
MVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::f64)
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
if (ElemTy == MVT::i32) {
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
} else {
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
}
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT.isInteger()) {
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
}
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
setOperationAction(ISD::OR, VT, Promote);
AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
setOperationAction(ISD::XOR, VT, Promote);
AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
}
// Neon does not support vector divide/remainder operations.
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
if (!VT.isFloatingPoint() &&
VT != MVT::v2i64 && VT != MVT::v1i64)
for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
}
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
addRegisterClass(VT, &ARM::DPRRegClass);
addTypeForNEON(VT, MVT::f64, MVT::v2i32);
}
void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
addRegisterClass(VT, &ARM::DPairRegClass);
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
const ARMSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
RegInfo = Subtarget->getRegisterInfo();
Itins = Subtarget->getInstrItineraryData();
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
!Subtarget->isTargetWatchOS()) {
const auto &E = Subtarget->getTargetTriple().getEnvironment();
bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
E == Triple::MuslEABIHF;
// Windows is a special case. Technically, we will replace all of the "GNU"
// calls with calls to MSVCRT if appropriate and adjust the calling
// convention then.
IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
IsHFTarget ? CallingConv::ARM_AAPCS_VFP
: CallingConv::ARM_AAPCS);
}
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
const ISD::CondCode Cond;
} LibraryCalls[] = {
// Single-precision floating-point arithmetic.
{ RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
{ RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
{ RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
{ RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
// Double-precision floating-point arithmetic.
{ RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
{ RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
{ RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
{ RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
// Single-precision comparisons.
{ RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
{ RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
{ RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
{ RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
{ RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
{ RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
{ RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
{ RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
// Double-precision comparisons.
{ RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
{ RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
{ RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
{ RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
{ RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
{ RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
{ RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
{ RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
// Floating-point to integer conversions.
// i64 conversions are done via library routines even when generating VFP
// instructions, so use the same ones.
{ RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
{ RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
// Conversions between floating types.
{ RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
{ RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
// Integer to floating-point conversions.
// i64 conversions are done via library routines even when generating VFP
// instructions, so use the same ones.
// FIXME: There appears to be some naming inconsistency in ARM libgcc:
// e.g., __floatunsidf vs. __floatunssidfvfp.
{ RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
{ RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
if (LC.Cond != ISD::SETCC_INVALID)
setCmpLibcallCC(LC.Op, LC.Cond);
}
}
// Set the correct calling convention for ARMv7k WatchOS. It's just
// AAPCS_VFP for functions as simple as libcalls.
if (Subtarget->isTargetWatchABI()) {
for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
}
}
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
// RTLIB
if (Subtarget->isAAPCS_ABI() &&
(Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
const CallingConv::ID CC;
const ISD::CondCode Cond;
} LibraryCalls[] = {
// Double-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 2
{ RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Double-precision floating-point comparison helper functions
// RTABI chapter 4.1.2, Table 3
{ RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
{ RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
// Single-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 4
{ RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Single-precision floating-point comparison helper functions
// RTABI chapter 4.1.2, Table 5
{ RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
{ RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
// Floating-point to integer conversions.
// RTABI chapter 4.1.2, Table 6
{ RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Conversions between floating types.
// RTABI chapter 4.1.2, Table 7
{ RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Integer to floating-point conversions.
// RTABI chapter 4.1.2, Table 8
{ RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Long long helper functions
// RTABI chapter 4.2, Table 9
{ RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Integer division functions
// RTABI chapter 4.3.1
{ RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
if (LC.Cond != ISD::SETCC_INVALID)
setCmpLibcallCC(LC.Op, LC.Cond);
}
// EABI dependent RTLIB
if (TM.Options.EABIVersion == EABI::EABI4 ||
TM.Options.EABIVersion == EABI::EABI5) {
static const struct {
const RTLIB::Libcall Op;
const char *const Name;
const CallingConv::ID CC;
const ISD::CondCode Cond;
} MemOpsLibraryCalls[] = {
// Memory operations
// RTABI chapter 4.3.4
{ RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
};
for (const auto &LC : MemOpsLibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
if (LC.Cond != ISD::SETCC_INVALID)
setCmpLibcallCC(LC.Op, LC.Cond);
}
}
}
if (Subtarget->isTargetWindows()) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
const CallingConv::ID CC;
} LibraryCalls[] = {
{ RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
}
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
if (Subtarget->isTargetMachO() &&
!(Subtarget->isTargetIOS() &&
Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
// The half <-> float conversion functions are always soft-float on
// non-watchos platforms, but are needed for some targets which use a
// hard-float calling convention by default.
if (!Subtarget->isTargetWatchABI()) {
if (Subtarget->isAAPCS_ABI()) {
setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
} else {
setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
}
}
// In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
// a __gnu_ prefix (which is the default).
if (Subtarget->isTargetAEABI()) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
const CallingConv::ID CC;
} LibraryCalls[] = {
{ RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
{ RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
{ RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
}
}
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
addRegisterClass(MVT::i32, &ARM::GPRRegClass);
if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
}
for (MVT VT : MVT::vector_valuetypes()) {
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
addDRTypeForNEON(MVT::v4i16);
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
addQRTypeForNEON(MVT::v16i8);
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
// neither Neon nor VFP support any arithmetic operations on it.
// The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
// supported for v4f32.
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
// FIXME: Code duplication: FDIV and FREM are expanded always, see
// ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
// FIXME: Create unittest.
// In another words, find a way when "copysign" appears in DAG with vector
// operands.
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
// FIXME: Code duplication: SETCC has custom operation action, see
// ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
// FIXME: Create unittest for FNEG and for FABS.
setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
// FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
setOperationAction(ISD::FMA, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
// Mark v2f32 intrinsics.
setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// Custom handling for some quad-vector types to detect VMULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// Custom handling for some vector types to avoid expensive expansions
setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
// a destination type that is wider than the source, and nor does
// it have a FP_TO_[SU]INT instruction with a narrower destination than
// source.
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
// NEON does not have single instruction CTTZ for vectors.
setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
// NEON only has FMA instructions as of VFP4.
if (!Subtarget->hasVFP4()) {
setOperationAction(ISD::FMA, MVT::v2f32, Expand);
setOperationAction(ISD::FMA, MVT::v4f32, Expand);
}
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
setTargetDAGCombine(ISD::LOAD);
// It is legal to extload from v4i8 to v4i16 or v4i32.
for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
MVT::v2i32}) {
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
}
}
}
if (Subtarget->isFPOnlySP()) {
// When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
// are present However, no double-precision operations other than moves,
// loads and stores are provided by the hardware.
setOperationAction(ISD::FADD, MVT::f64, Expand);
setOperationAction(ISD::FSUB, MVT::f64, Expand);
setOperationAction(ISD::FMUL, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FDIV, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
setOperationAction(ISD::FNEG, MVT::f64, Expand);
setOperationAction(ISD::FABS, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FLOG, MVT::f64, Expand);
setOperationAction(ISD::FLOG2, MVT::f64, Expand);
setOperationAction(ISD::FLOG10, MVT::f64, Expand);
setOperationAction(ISD::FEXP, MVT::f64, Expand);
setOperationAction(ISD::FEXP2, MVT::f64, Expand);
setOperationAction(ISD::FCEIL, MVT::f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
setOperationAction(ISD::FRINT, MVT::f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
}
computeRegisterProperties(Subtarget->getRegisterInfo());
// ARM does not have floating-point extending loads.
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
}
// ... or truncating stores
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
// ARM does not have i1 sign extending load.
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// ARM supports all 4 flavors of integer indexed load / store.
if (!Subtarget->isThumb1Only()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, MVT::i1, Legal);
setIndexedLoadAction(im, MVT::i8, Legal);
setIndexedLoadAction(im, MVT::i16, Legal);
setIndexedLoadAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i1, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
}
} else {
// Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
}
setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
// i64 operation support.
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
if (Subtarget->isThumb1Only()) {
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
}
if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
|| (Subtarget->isThumb2() && !Subtarget->hasDSP()))
setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
setOperationAction(ISD::SUBE, MVT::i32, Custom);
if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
// ARM does not have ROTL.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
for (MVT VT : MVT::vector_valuetypes()) {
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
// @llvm.readcyclecounter requires the Performance Monitors extension.
// Default to the 0 expansion on unsupported platforms.
// FIXME: Technically there are older ARM CPUs that have
// implementation-specific ways of obtaining this information.
if (Subtarget->hasPerfMon())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
: Subtarget->hasDivideInARMMode();
if (!hasDivide) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
setOperationAction(ISD::SDIV, MVT::i32, LibCall);
setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
setOperationAction(ISD::SDIV, MVT::i64, Custom);
setOperationAction(ISD::UDIV, MVT::i64, Custom);
}
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
// Register based DivRem for AEABI (RTABI 4.2)
if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
Subtarget->isTargetWindows()) {
setOperationAction(ISD::SREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
HasStandaloneRem = false;
if (Subtarget->isTargetWindows()) {
const struct {
const RTLIB::Libcall Op;
const char * const Name;
const CallingConv::ID CC;
} LibraryCalls[] = {
{ RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
}
} else {
const struct {
const RTLIB::Libcall Op;
const char * const Name;
const CallingConv::ID CC;
} LibraryCalls[] = {
{ RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
{ RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
{ RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
};
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
setLibcallCallingConv(LC.Op, LC.CC);
}
}
setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
} else {
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
}
if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
for (auto &VT : {MVT::f32, MVT::f64})
setOperationAction(ISD::FPOWI, VT, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// Use the default implementation.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
InsertFencesForAtomic = false;
if (Subtarget->hasAnyDataBarrier() &&
(!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
// ATOMIC_FENCE needs custom lowering; the others should have been expanded
// to ldrex/strex loops already.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
if (!Subtarget->isThumb() || !Subtarget->isMClass())
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
InsertFencesForAtomic = true;
}
} else {
// If there's anything we can use as a barrier, go through custom lowering
// for ATOMIC_FENCE.
// If target has DMB in thumb, Fences can be inserted.
if (Subtarget->hasDataBarrier())
InsertFencesForAtomic = true;
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
Subtarget->hasAnyDataBarrier() ? Custom : Expand);
// Set them all for expansion, which will force libcalls.
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
// Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
// Unordered/Monotonic case.
if (!InsertFencesForAtomic) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
}
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
if (!Subtarget->hasV6Ops()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
if (Subtarget->useSjLjEH())
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// Thumb-1 cannot currently select ARMISD::SUBE.
if (!Subtarget->isThumb1Only())
setOperationAction(ISD::SETCCE, MVT::i32, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
// We don't support sin/cos/fmod/copysign/pow
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
}
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
if (!Subtarget->hasVFP4()) {
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
}
// Various VFP goodness
if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
// FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
}
// fp16 is a special v7 extension that adds f16 <-> f32 conversions.
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
}
// Combine sin / cos into one node or libcall if possible.
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
if (Subtarget->isTargetWatchABI()) {
setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
}
if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
// For iOS, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
}
}
// FP-ARMv8 implements a lot of rounding-like FP operations.
if (Subtarget->hasFPARMv8()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
if (!Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
}
}
if (Subtarget->hasNEON()) {
// vmin and vmax aren't available in a scalar form, so we use
// a NEON instruction with an undef lane instead.
setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
}
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
if (Subtarget->hasV6Ops())
setTargetDAGCombine(ISD::SRL);
setStackPointerRegisterToSaveRestore(ARM::SP);
if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
!Subtarget->hasVFP2())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
MaxStoresPerMemset = 8;
MaxStoresPerMemsetOptSize = 4;
MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
MaxStoresPerMemcpyOptSize = 2;
MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(4);
// Prefer likely predicted branches to selects on out-of-order cores.
PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
bool ARMTargetLowering::useSoftFloat() const {
return Subtarget->useSoftFloat();
}
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
// SPR's representative would be DPR_VFP2. This should work well if register
// pressure tracking were modified such that a register use would increment the
// pressure of the register class's representative and all of it's super
// classes' representatives transitively. We have not implemented this because
// of the difficulty prior to coalescing of modeling operand register classes
// due to the common occurrence of cross class copies and subregister insertions
// and extractions.
std::pair<const TargetRegisterClass *, uint8_t>
ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
const TargetRegisterClass *RRC = nullptr;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(TRI, VT);
// Use DPR as representative register class for all floating point
// and vector types. Since there are 32 SPR registers and 32 DPR registers so
// the cost is 1 for both f32 and f64.
case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
RRC = &ARM::DPRRegClass;
// When NEON is used for SP, only half of the register file is available
// because operations that define both SP and DP results will be constrained
// to the VFP2 class (D0-D15). We currently model this constraint prior to
// coalescing by double-counting the SP regs. See the FIXME above.
if (Subtarget->useNEONForSinglePrecisionFP())
Cost = 2;
break;
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
RRC = &ARM::DPRRegClass;
Cost = 2;
break;
case MVT::v4i64:
RRC = &ARM::DPRRegClass;
Cost = 4;
break;
case MVT::v8i64:
RRC = &ARM::DPRRegClass;
Cost = 8;
break;
}
return std::make_pair(RRC, Cost);
}
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((ARMISD::NodeType)Opcode) {
case ARMISD::FIRST_NUMBER: break;
case ARMISD::Wrapper: return "ARMISD::Wrapper";
case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
case ARMISD::CALL: return "ARMISD::CALL";
case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
case ARMISD::BRCOND: return "ARMISD::BRCOND";
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMN: return "ARMISD::CMN";
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
case ARMISD::SSAT: return "ARMISD::SSAT";
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
case ARMISD::ADDC: return "ARMISD::ADDC";
case ARMISD::ADDE: return "ARMISD::ADDE";
case ARMISD::SUBC: return "ARMISD::SUBC";
case ARMISD::SUBE: return "ARMISD::SUBE";
case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
case ARMISD::VCGT: return "ARMISD::VCGT";
case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
case ARMISD::VCGTU: return "ARMISD::VCGTU";
case ARMISD::VTST: return "ARMISD::VTST";
case ARMISD::VSHL: return "ARMISD::VSHL";
case ARMISD::VSHRs: return "ARMISD::VSHRs";
case ARMISD::VSHRu: return "ARMISD::VSHRu";
case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
case ARMISD::VSLI: return "ARMISD::VSLI";
case ARMISD::VSRI: return "ARMISD::VSRI";
case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
case ARMISD::VDUP: return "ARMISD::VDUP";
case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
case ARMISD::VEXT: return "ARMISD::VEXT";
case ARMISD::VREV64: return "ARMISD::VREV64";
case ARMISD::VREV32: return "ARMISD::VREV32";
case ARMISD::VREV16: return "ARMISD::VREV16";
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
case ARMISD::VTBL1: return "ARMISD::VTBL1";
case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
case ARMISD::SMULWB: return "ARMISD::SMULWB";
case ARMISD::SMULWT: return "ARMISD::SMULWT";
case ARMISD::SMLALD: return "ARMISD::SMLALD";
case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
case ARMISD::VBSL: return "ARMISD::VBSL";
case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
}
return nullptr;
}
EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
return VT.changeVectorElementTypeToInteger();
}
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive D registers.
if (Subtarget->hasNEON()) {
if (VT == MVT::v4i64)
return &ARM::QQPRRegClass;
if (VT == MVT::v8i64)
return &ARM::QQQQPRRegClass;
}
return TargetLowering::getRegClassFor(VT);
}
// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
// source/dest is aligned and the copy size is large enough. We therefore want
// to align such objects passed to memory intrinsics.
bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
unsigned &PrefAlign) const {
if (!isa<MemIntrinsic>(CI))
return false;
MinSize = 8;
// On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
// cycle faster than 4-byte aligned LDM.
PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
return true;
}
// Create a fast isel object.
FastISel *
ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
return ARM::createFastISel(funcInfo, libInfo);
}
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned NumVals = N->getNumValues();
if (!NumVals)
return Sched::RegPressure;
for (unsigned i = 0; i != NumVals; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (VT.isFloatingPoint() || VT.isVector())
return Sched::ILP;
}
if (!N->isMachineOpcode())
return Sched::RegPressure;
// Load are scheduled for latency even if there instruction itinerary
// is not available.
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
if (MCID.getNumDefs() == 0)
return Sched::RegPressure;
if (!Itins->isEmpty() &&
Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
return Sched::ILP;
return Sched::RegPressure;
}
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
static bool isSRL16(const SDValue &Op) {
if (Op.getOpcode() != ISD::SRL)
return false;
if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
return Const->getZExtValue() == 16;
return false;
}
static bool isSRA16(const SDValue &Op) {
if (Op.getOpcode() != ISD::SRA)
return false;
if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
return Const->getZExtValue() == 16;
return false;
}
static bool isSHL16(const SDValue &Op) {
if (Op.getOpcode() != ISD::SHL)
return false;
if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
return Const->getZExtValue() == 16;
return false;
}
// Check for a signed 16-bit value. We special case SRA because it makes it
// more simple when also looking for SRAs that aren't sign extending a
// smaller value. Without the check, we'd need to take extra care with
// checking order for some operations.
static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
if (isSRA16(Op))
return isSHL16(Op.getOperand(0));
return DAG.ComputeNumSignBits(Op) == 17;
}
/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Unknown condition code!");
case ISD::SETNE: return ARMCC::NE;
case ISD::SETEQ: return ARMCC::EQ;
case ISD::SETGT: return ARMCC::GT;
case ISD::SETGE: return ARMCC::GE;
case ISD::SETLT: return ARMCC::LT;
case ISD::SETLE: return ARMCC::LE;
case ISD::SETUGT: return ARMCC::HI;
case ISD::SETUGE: return ARMCC::HS;
case ISD::SETULT: return ARMCC::LO;
case ISD::SETULE: return ARMCC::LS;
}
}
/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
CondCode2 = ARMCC::AL;
InvalidOnQNaN = true;
switch (CC) {
default: llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
case ISD::SETOEQ:
CondCode = ARMCC::EQ;
InvalidOnQNaN = false;
break;
case ISD::SETGT:
case ISD::SETOGT: CondCode = ARMCC::GT; break;
case ISD::SETGE:
case ISD::SETOGE: CondCode = ARMCC::GE; break;
case ISD::SETOLT: CondCode = ARMCC::MI; break;
case ISD::SETOLE: CondCode = ARMCC::LS; break;
case ISD::SETONE:
CondCode = ARMCC::MI;
CondCode2 = ARMCC::GT;
InvalidOnQNaN = false;
break;
case ISD::SETO: CondCode = ARMCC::VC; break;
case ISD::SETUO: CondCode = ARMCC::VS; break;
case ISD::SETUEQ:
CondCode = ARMCC::EQ;
CondCode2 = ARMCC::VS;
InvalidOnQNaN = false;
break;
case ISD::SETUGT: CondCode = ARMCC::HI; break;
case ISD::SETUGE: CondCode = ARMCC::PL; break;
case ISD::SETLT:
case ISD::SETULT: CondCode = ARMCC::LT; break;
case ISD::SETLE:
case ISD::SETULE: CondCode = ARMCC::LE; break;
case ISD::SETNE:
case ISD::SETUNE:
CondCode = ARMCC::NE;
InvalidOnQNaN = false;
break;
}
}
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
#include "ARMGenCallingConv.inc"
/// getEffectiveCallingConv - Get the effective calling convention, taking into
/// account presence of floating point hardware and calling convention
/// limitations, such as support for variadic functions.
CallingConv::ID
ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
bool isVarArg) const {
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
case CallingConv::ARM_AAPCS:
case CallingConv::ARM_APCS:
case CallingConv::GHC:
return CC;
case CallingConv::PreserveMost:
return CallingConv::PreserveMost;
case CallingConv::ARM_AAPCS_VFP:
case CallingConv::Swift:
return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
return CallingConv::ARM_APCS;
else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
return CallingConv::ARM_AAPCS_VFP;
else
return CallingConv::ARM_AAPCS;
case CallingConv::Fast:
case CallingConv::CXX_FAST_TLS:
if (!Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::Fast;
return CallingConv::ARM_APCS;
} else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::ARM_AAPCS_VFP;
else
return CallingConv::ARM_AAPCS;
}
}
CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool isVarArg) const {
return CCAssignFnForNode(CC, false, isVarArg);
}
CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
bool isVarArg) const {
return CCAssignFnForNode(CC, true, isVarArg);
}
/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
/// CallingConvention.
CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
bool Return,
bool isVarArg) const {
switch (getEffectiveCallingConv(CC, isVarArg)) {
default:
llvm_unreachable("Unsupported calling convention");
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
case CallingConv::ARM_AAPCS:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_AAPCS_VFP:
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
case CallingConv::Fast:
return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
case CallingConv::GHC:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
case CallingConv::PreserveMost:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
}
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue ARMTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
// Pass 'this' value directly from the argument to return value, to avoid
// reg unit interference
if (i == 0 && isThisReturn) {
assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment");
InVals.push_back(ThisVal);
continue;
}
SDValue Val;
if (VA.needsCustom()) {
// Handle f64 or half of a v2f64.
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);
Chain = Lo.getValue(1);
InFlag = Lo.getValue(2);
VA = RVLocs[++i]; // skip ahead to next loc
SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
if (!Subtarget->isLittle())
std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
if (VA.getLocVT() == MVT::v2f64) {
SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(0, dl, MVT::i32));
VA = RVLocs[++i]; // skip ahead to next loc
Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Lo.getValue(1);
InFlag = Lo.getValue(2);
VA = RVLocs[++i]; // skip ahead to next loc
Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
if (!Subtarget->isLittle())
std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(1, dl, MVT::i32));
}
} else {
Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
}
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
break;
}
InVals.push_back(Val);
}
return Chain;
}
/// LowerMemOpCallTo - Store the argument to the stack.
SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
SDValue Arg, const SDLoc &dl,
SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, PtrOff);
return DAG.getStore(
Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
}
void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
SDValue &StackPtr,
SmallVectorImpl<SDValue> &MemOpChains,
ISD::ArgFlagsTy Flags) const {
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
unsigned id = Subtarget->isLittle() ? 0 : 1;
RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
if (NextVA.isRegLoc())
RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
else {
assert(NextVA.isMemLoc());
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
getPointerTy(DAG.getDataLayout()));
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
dl, DAG, NextVA,
Flags));
}
}
/// LowerCall - Lowering a call into a callseq_start <-
/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
/// nodes.
SDValue
ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &dl = CLI.DL;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool doesNotRet = CLI.DoesNotReturn;
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
bool isSibCall = false;
auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
// Disable tail calls if they're not supported.
if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
isTailCall = false;
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
++NumTailCalls;
isSibCall = true;
}
}
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
// For tail calls, memory operands are available in our caller's stack.
if (isSibCall)
NumBytes = 0;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!isSibCall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
RegsToPassVector RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization, arguments are handled later.
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
// f64 and v2f64 might be passed in i32 pairs and must be split into pieces
if (VA.needsCustom()) {
if (VA.getLocVT() == MVT::v2f64) {
SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(0, dl, MVT::i32));
SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(1, dl, MVT::i32));
PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
VA = ArgLocs[++i]; // skip ahead to next loc
if (VA.isRegLoc()) {
PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
} else {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
dl, DAG, VA, Flags));
}
} else {
PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) {
if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
Outs[0].VT == MVT::i32) {
assert(VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment");
assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
"unexpected use of 'returned'");
isThisReturn = true;
}
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else if (isByVal) {
assert(VA.isMemLoc());
unsigned offset = 0;
// True if this byval aggregate will be split between registers
// and memory.
unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
if (CurByValIdx < ByValArgsCount) {
unsigned RegBegin, RegEnd;
CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
EVT PtrVT =
DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
unsigned int i, j;
for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
DAG.InferPtrAlignment(AddArg));
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
// If parameter size outsides register area, "offset" value
// helps us to calculate stack slot for remained part properly.
offset = RegEnd - RegBegin;
CCInfo.nextInRegsParam();
}
if (Flags.getByValSize() > 4*offset) {
auto PtrVT = getPointerTy(DAG.getDataLayout());
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
MVT::i32);
SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
MVT::i32);
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops));
}
} else if (!isSibCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
// Tail call byval lowering might overwrite argument registers so in case of
// tail call optimization the copies to registers are lowered later.
if (!isTailCall)
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
// For tail calls lower the arguments to the 'real' stack slot.
if (isTailCall) {
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments are stored to the stack, because the
// outgoing stack slots may alias the incoming argument stack slots, and
// the alias isn't otherwise explicit. This is slightly more conservative
// than necessary, because it means that each store effectively depends
// on every argument instead of just those arguments it would clobber.
// Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
InFlag = SDValue();
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
bool isDirect = false;
const TargetMachine &TM = getTargetMachine();
const Module *Mod = MF.getFunction()->getParent();
const GlobalValue *GV = nullptr;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
GV = G->getGlobal();
bool isStub =
!TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
bool isLocalARMFunc = false;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
auto PtrVt = getPointerTy(DAG.getDataLayout());
if (Subtarget->genLongCalls()) {
assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
"long-calls codegen is not position independent!");
// Handle a global address or an external symbol. If it's not one of
// those, the target's already in a register, so we don't need to do
// anything extra.
if (isa<GlobalAddressSDNode>(Callee)) {
// Create a constant pool entry for the callee address
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
// Create a constant pool entry for the callee address
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
ARMPCLabelIndex, 0);
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
} else if (isa<GlobalAddressSDNode>(Callee)) {
// If we're optimizing for minimum size and the function is called three or
// more times in this block, we can improve codesize by calling indirectly
// as BLXr has a 16-bit encoding.
auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
auto *BB = CLI.CS->getParent();
bool PreferIndirect =
Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
count_if(GV->users(), [&BB](const User *U) {
return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
}) > 2;
if (!PreferIndirect) {
isDirect = true;
bool isDef = GV->isStrongDefinitionForLinker();
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
// tBX takes a register source operand.
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
Callee = DAG.getNode(
ARMISD::WrapperPIC, dl, PtrVt,
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
/* Alignment = */ 0, MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
unsigned TargetFlags = GV->hasDLLImportStorageClass()
? ARMII::MO_DLLIMPORT
: ARMII::MO_NO_FLAG;
Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
TargetFlags);
if (GV->hasDLLImportStorageClass())
Callee =
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
} else {
Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
}
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
// tBX takes a register source operand.
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
ARMPCLabelIndex, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
} else {
Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
}
}
// FIXME: handle tail calls differently.
unsigned CallOpc;
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = ARMISD::CALL;
} else {
if (!isDirect && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
// Emit regular call when code size is the priority
!MF.getFunction()->optForMinSize())
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
}
std::vector<SDValue> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
if (!isTailCall) {
const uint32_t *Mask;
const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
if (isThisReturn) {
// For 'this' returns, use the R0-preserving mask if applicable
Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
// Set isThisReturn to false if the calling convention is not one that
// allows 'returned' to be modeled in this way, so LowerCallResult does
// not try to pass 'this' straight through
isThisReturn = false;
Mask = ARI->getCallPreservedMask(MF, CallConv);
}
} else
Mask = ARI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
}
if (InFlag.getNode())
Ops.push_back(InFlag);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (isTailCall) {
MF.getFrameInfo().setHasTailCall();
return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
}
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
if (!Ins.empty())
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
InVals, isThisReturn,
isThisReturn ? OutVals[0] : SDValue());
}
/// HandleByVal - Every parameter *after* a byval parameter is passed
/// on the stack. Remember the next parameter register to allocate,
/// and then confiscate the rest of the parameter registers to insure
/// this.
void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
unsigned Align) const {
// Byval (as with any stack) slots are always at least 4 byte aligned.
Align = std::max(Align, 4U);
unsigned Reg = State->AllocateReg(GPRArgRegs);
if (!Reg)
return;
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
for (unsigned i = 0; i < Waste; ++i)
Reg = State->AllocateReg(GPRArgRegs);
if (!Reg)
return;
unsigned Excess = 4 * (ARM::R4 - Reg);
// Special case when NSAA != SP and parameter size greater than size of
// all remained GPR regs. In that case we can't split parameter, we must
// send it to stack. We also must set NCRN to R4, so waste all
// remained registers.
const unsigned NSAAOffset = State->getNextStackOffset();
if (NSAAOffset != 0 && Size > Excess) {
while (State->AllocateReg(GPRArgRegs))
;
return;
}
// First register for byval parameter is the first register that wasn't
// allocated before this method call, so it would be "reg".
// If parameter is small enough to be saved in range [reg, r4), then
// the end (first after last) register would be reg + param-size-in-regs,
// else parameter would be splitted between registers and stack,
// end register would be r4 in this case.
unsigned ByValRegBegin = Reg;
unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
// Note, first register is allocated in the beginning of function already,
// allocate remained amount of registers we need.
for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
State->AllocateReg(GPRArgRegs);
// A byval parameter that is split between registers and memory needs its
// size truncated here.
// In the case where the entire structure fits in registers, we set the
// size in memory to zero.
Size = std::max<int>(Size - Excess, 0);
}
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
unsigned Bytes = Arg.getValueSizeInBits() / 8;
int FI = std::numeric_limits<int>::max();
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!TargetRegisterInfo::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
return false;
if (!Flags.isByVal()) {
if (!TII->isLoadFromStackSlot(*Def, FI))
return false;
} else {
return false;
}
} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
if (Flags.isByVal())
// ByVal argument is passed in as a pointer but it's now being
// dereferenced. e.g.
// define @foo(%struct.X* %A) {
// tail call @bar(%struct.X* byval %A)
// }
return false;
SDValue Ptr = Ld->getBasePtr();
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
if (!FINode)
return false;
FI = FINode->getIndex();
} else
return false;
assert(FI != std::numeric_limits<int>::max());
if (!MFI.isFixedObjectIndex(FI))
return false;
return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
}
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
bool
ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
assert(Subtarget->supportsTailCall());
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
// Exception-handling functions need a special set of instructions to indicate
// a return to the hardware. Tail-calling another function would probably
// break this.
if (CallerF->hasFnAttribute("interrupt"))
return false;
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
return false;
// Externally-defined functions with weak linkage should not be
// tail-called on ARM when the OS does not support dynamic
// pre-emption of symbols, as the AAELF spec requires normal calls
// to undefined weak functions to be replaced with a NOP or jump to the
// next instruction. The behaviour of branch instructions in this
// situation (as used for tail calls) is implementation-defined, so we
// cannot rely on the linker replacing the tail call with a return.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
(!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
return false;
}
// Check that the call results are passed in the same way.
LLVMContext &C = *DAG.getContext();
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
CCAssignFnForReturn(CalleeCC, isVarArg),
CCAssignFnForReturn(CallerCC, isVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (CalleeCC != CallerCC) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
// If Caller's vararg or byval argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's
// local frame.
const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
if (AFI_Caller->getArgRegsSaveSize())
return false;
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
if (CCInfo.getNextStackOffset()) {
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo &MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
if (VA.needsCustom()) {
// f64 and vector types are split into multiple registers or
// register/stack-slot combinations. The types will not match
// the registers; give up on memory f64 refs until we figure
// out what to do about this.
if (!VA.isRegLoc())
return false;
if (!ArgLocs[++i].isRegLoc())
return false;
if (RegVT == MVT::v2f64) {
if (!ArgLocs[++i].isRegLoc())
return false;
if (!ArgLocs[++i].isRegLoc())
return false;
}
} else if (!VA.isRegLoc()) {
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
MFI, MRI, TII))
return false;
}
}
}
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
return false;
}
return true;
}
bool
ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
}
static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
const SDLoc &DL, SelectionDAG &DAG) {
const MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
// See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
// version of the "preferred return address". These offsets affect the return
// instruction if this is a return from PL1 without hypervisor extensions.
// IRQ/FIQ: +4 "subs pc, lr, #4"
// SWI: 0 "subs pc, lr, #0"
// ABORT: +4 "subs pc, lr, #4"
// UNDEF: +4/+2 "subs pc, lr, #0"
// UNDEF varies depending on where the exception came from ARM or Thumb
// mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
int64_t LROffset;
if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
IntKind == "ABORT")
LROffset = 4;
else if (IntKind == "SWI" || IntKind == "UNDEF")
LROffset = 0;
else
report_fatal_error("Unsupported interrupt attribute. If present, value "
"must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
RetOps.insert(RetOps.begin() + 1,
DAG.getConstant(LROffset, DL, MVT::i32, false));
return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
}
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slots.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
SDValue Flag;
SmallVector<SDValue, 4> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
bool isLittleEndian = Subtarget->isLittle();
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
AFI->setReturnRegsCount(RVLocs.size());
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue Arg = OutVals[realRVLocIdx];
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
if (VA.needsCustom()) {
if (VA.getLocVT() == MVT::v2f64) {
// Extract the first half and return it in two registers.
SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(0, dl, MVT::i32));
SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Half);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
HalfGPRs.getValue(isLittleEndian ? 0 : 1),
Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
HalfGPRs.getValue(isLittleEndian ? 1 : 0),
Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
// Extract the 2nd half and fall through to handle it as an f64 value.
Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(1, dl, MVT::i32));
}
// Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
// available.
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
fmrrd.getValue(isLittleEndian ? 0 : 1),
Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
fmrrd.getValue(isLittleEndian ? 1 : 0),
Flag);
} else
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
// Guarantee that all emitted copies are
// stuck together, avoiding something bad.
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *I =
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
if (I) {
for (; *I; ++I) {
if (ARM::GPRRegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::i32));
else if (ARM::DPRRegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
}
}
// Update chain and glue.
RetOps[0] = Chain;
if (Flag.getNode())
RetOps.push_back(Flag);
// CPUs which aren't M-class use a special sequence to return from
// exceptions (roughly, any instruction setting pc and cpsr simultaneously,
// though we use "subs pc, lr, #N").
//
// M-class CPUs actually use a normal return sequence with a special
// (hardware-provided) value in LR, so the normal code path works.
if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
!Subtarget->isMClass()) {
if (Subtarget->isThumb1Only())
report_fatal_error("interrupt attribute is not supported in Thumb1");
return LowerInterruptReturn(RetOps, dl, DAG);
}
return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
return false;
TCChain = Copy->getOperand(0);
} else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
SDNode *VMov = Copy;
// f64 returned in a pair of GPRs.
SmallPtrSet<SDNode*, 2> Copies;
for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != ISD::CopyToReg)
return false;
Copies.insert(*UI);
}
if (Copies.size() > 2)
return false;
for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
UI != UE; ++UI) {
SDValue UseChain = UI->getOperand(0);
if (Copies.count(UseChain.getNode()))
// Second CopyToReg
Copy = *UI;
else {
// We are at the top of this chain.
// If the copy has a glue operand, we conservatively assume it
// isn't safe to perform a tail call.
if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
return false;
// First CopyToReg
TCChain = UseChain;
}
}
} else if (Copy->getOpcode() == ISD::BITCAST) {
// f32 returned in a single GPR.
if (!Copy->hasOneUse())
return false;
Copy = *Copy->use_begin();
if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
return false;
TCChain = Copy->getOperand(0);
} else {
return false;
}
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != ARMISD::RET_FLAG &&
UI->getOpcode() != ARMISD::INTRET_FLAG)
return false;
HasRet = true;
}
if (!HasRet)
return false;
Chain = TCChain;
return true;
}
bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!Subtarget->supportsTailCall())
return false;
auto Attr =
CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
if (!CI->isTailCall() || Attr.getValueAsString() == "true")
return false;
return true;
}
// Trying to write a 64 bit value so need to split into two 32 bit values first,
// and pass the lower and high parts through.
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
SDValue WriteValue = Op->getOperand(2);
// This function is only supposed to be called for i64 type argument.
assert(WriteValue.getValueType() == MVT::i64
&& "LowerWRITE_REGISTER called for non-i64 type argument.");
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
DAG.getConstant(0, DL, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
DAG.getConstant(1, DL, MVT::i32));
SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOVi.
SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
// FIXME there is no actual debug info here
SDLoc dl(Op);
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Res;
// When generating execute-only code Constant Pools must be promoted to the
// global data section. It's a bit ugly that we can't share them across basic
// blocks, but this way we guarantee that execute-only behaves correct with
// position-independent addressing modes.
if (Subtarget->genExecuteOnly()) {
auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
auto T = const_cast<Type*>(CP->getType());
auto C = const_cast<Constant*>(CP->getConstVal());
auto M = const_cast<Module*>(DAG.getMachineFunction().
getFunction()->getParent());
auto GV = new GlobalVariable(
*M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
Twine(AFI->createPICLabelUId())
);
SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
dl, PtrVT);
return LowerGlobalAddress(GA, DAG);
}
if (CP->isMachineConstantPoolEntry())
Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
CP->getAlignment());
else
Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
CP->getAlignment());
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
}
unsigned ARMTargetLowering::getJumpTableEncoding() const {
return MachineJumpTableInfo::EK_Inline;
}
SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = 0;
SDLoc DL(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
SDValue CPAddr;
bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
if (!IsPositionIndependent) {
CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
} else {
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
ARMCP::CPBlockAddress, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
SDValue Result = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
if (!IsPositionIndependent)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
}
/// \brief Convert a TLS address reference into the correct sequence of loads
/// and calls to compute the variable's address for Darwin, and return an
/// SDValue containing the final node.
/// Darwin only has one TLS scheme which must be capable of dealing with the
/// fully general situation, in the worst case. This means:
/// + "extern __thread" declaration.
/// + Defined in a possibly unknown dynamic library.
///
/// The general system is that each __thread variable has a [3 x i32] descriptor
/// which contains information used by the runtime to calculate the address. The
/// only part of this the compiler needs to know about is the first word, which
/// contains a function pointer that must be called with the address of the
/// entire descriptor in "r0".
///
/// Since this descriptor may be in a different unit, in general access must
/// proceed along the usual ARM rules. A common sequence to produce is:
///
/// movw rT1, :lower16:_var$non_lazy_ptr
/// movt rT1, :upper16:_var$non_lazy_ptr
/// ldr r0, [rT1]
/// ldr rT2, [r0]
/// blx rT2
/// [...address now in r0...]
SDValue
ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
SDLoc DL(Op);
// First step is to get the address of the actua global symbol. This is where
// the TLS descriptor lives.
SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
// The first entry in the descriptor is a function pointer that we must call
// to obtain the address of the variable.
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet = DAG.getLoad(
MVT::i32, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
/* Alignment = */ 4,
MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
Chain = FuncTLVGet.getValue(1);
MachineFunction &F = DAG.getMachineFunction();
MachineFrameInfo &MFI = F.getFrameInfo();
MFI.setAdjustsStack(true);
// TLS calls preserve all registers except those that absolutely must be
// trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
// silly).
auto TRI =
getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: r0 takes the address of the descriptor, and
// returns the address of the variable in this thread.
Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
Chain =
DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
DAG.getRegisterMask(Mask), Chain.getValue(1));
return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
}
SDValue
ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
SDValue Chain = DAG.getEntryNode();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
// Load the current TEB (thread environment block)
SDValue Ops[] = {Chain,
DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
DAG.getConstant(15, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(13, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(2, DL, MVT::i32)};
SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
DAG.getVTList(MVT::i32, MVT::Other), Ops);
SDValue TEB = CurrentTEB.getValue(0);
Chain = CurrentTEB.getValue(1);
// Load the ThreadLocalStoragePointer from the TEB
// A pointer to the TLS array is located at offset 0x2c from the TEB.
SDValue TLSArray =
DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
// The pointer to the thread's TLS data area is at the TLS Index scaled by 4
// offset into the TLSArray.
// Load the TLS index from the C runtime
SDValue TLSIndex =
DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
DAG.getConstant(2, DL, MVT::i32));
SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
MachinePointerInfo());
// Get the offset of the start of the .tls section (section base)
const auto *GA = cast<GlobalAddressSDNode>(Op);
auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
SDValue Offset = DAG.getLoad(
PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
DAG.getTargetConstantPool(CPV, PtrVT, 4)),
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const {
SDLoc dl(GA);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), Argument,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
// call __tls_get_addr.
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Argument;
Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
// FIXME: is there useful debug info available here?
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
// Lower ISD::GlobalTLSAddress using the "initial exec" or
// "local exec" model.
SDValue
ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const {
const GlobalValue *GV = GA->getGlobal();
SDLoc dl(GA);
SDValue Offset;
SDValue Chain = DAG.getEntryNode();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Get the Thread Pointer
SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
if (model == TLSModel::InitialExec) {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
// Initial exec model.
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
true);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
} else {
// local exec model
assert(model == TLSModel::LocalExec);
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
}
SDValue
ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->isTargetDarwin())
return LowerGlobalTLSAddressDarwin(Op, DAG);
if (Subtarget->isTargetWindows())
return LowerGlobalTLSAddressWindows(Op, DAG);
// TODO: implement the "local dynamic" model
assert(Subtarget->isTargetELF() && "Only ELF implemented here");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().Options.EmulatedTLS)
return LowerToTLSEmulatedModel(GA, DAG);
TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
switch (model) {
case TLSModel::GeneralDynamic:
case TLSModel::LocalDynamic:
return LowerToTLSGeneralDynamicModel(GA, DAG);
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModels(GA, DAG, model);
}
llvm_unreachable("bogus TLS model");
}
/// Return true if all users of V are within function F, looking through
/// ConstantExprs.
static bool allUsersAreInFunction(const Value *V, const Function *F) {
SmallVector<const User*,4> Worklist;
for (auto *U : V->users())
Worklist.push_back(U);
while (!Worklist.empty()) {
auto *U = Worklist.pop_back_val();
if (isa<ConstantExpr>(U)) {
for (auto *UU : U->users())
Worklist.push_back(UU);
continue;
}
auto *I = dyn_cast<Instruction>(U);
if (!I || I->getParent()->getParent() != F)
return false;
}
return true;
}
/// Return true if all users of V are within some (any) function, looking through
/// ConstantExprs. In other words, are there any global constant users?
static bool allUsersAreInFunctions(const Value *V) {
SmallVector<const User*,4> Worklist;
for (auto *U : V->users())
Worklist.push_back(U);
while (!Worklist.empty()) {
auto *U = Worklist.pop_back_val();
if (isa<ConstantExpr>(U)) {
for (auto *UU : U->users())
Worklist.push_back(UU);
continue;
}
if (!isa<Instruction>(U))
return false;
}
return true;
}
// Return true if T is an integer, float or an array/vector of either.
static bool isSimpleType(Type *T) {
if (T->isIntegerTy() || T->isFloatingPointTy())
return true;
Type *SubT = nullptr;
if (T->isArrayTy())
SubT = T->getArrayElementType();
else if (T->isVectorTy())
SubT = T->getVectorElementType();
else
return false;
return SubT->isIntegerTy() || SubT->isFloatingPointTy();
}
static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
EVT PtrVT, const SDLoc &dl) {
// If we're creating a pool entry for a constant global with unnamed address,
// and the global is small enough, we can emit it inline into the constant pool
// to save ourselves an indirection.
//
// This is a win if the constant is only used in one function (so it doesn't
// need to be duplicated) or duplicating the constant wouldn't increase code
// size (implying the constant is no larger than 4 bytes).
const Function *F = DAG.getMachineFunction().getFunction();
// We rely on this decision to inline being idemopotent and unrelated to the
// use-site. We know that if we inline a variable at one use site, we'll
// inline it elsewhere too (and reuse the constant pool entry). Fast-isel
// doesn't know about this optimization, so bail out if it's enabled else
// we could decide to inline here (and thus never emit the GV) but require
// the GV from fast-isel generated code.
if (!EnableConstpoolPromotion ||
DAG.getMachineFunction().getTarget().Options.EnableFastISel)
return SDValue();
auto *GVar = dyn_cast<GlobalVariable>(GV);
if (!GVar || !GVar->hasInitializer() ||
!GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
!GVar->hasLocalLinkage())
return SDValue();
// Ensure that we don't try and inline any type that contains pointers. If
// we inline a value that contains relocations, we move the relocations from
// .data to .text which is not ideal.
auto *Init = GVar->getInitializer();
if (!isSimpleType(Init->getType()))
return SDValue();
// The constant islands pass can only really deal with alignment requests
// <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
// any type wanting greater alignment requirements than 4 bytes. We also
// can only promote constants that are multiples of 4 bytes in size or
// are paddable to a multiple of 4. Currently we only try and pad constants
// that are strings for simplicity.
auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
unsigned Align = GVar->getAlignment();
unsigned RequiredPadding = 4 - (Size % 4);
bool PaddingPossible =
RequiredPadding == 4 || (CDAInit && CDAInit->isString());
if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
Size == 0)
return SDValue();
unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// We can't bloat the constant pool too much, else the ConstantIslands pass
// may fail to converge. If we haven't promoted this global yet (it may have
// multiple uses), and promoting it would increase the constant pool size (Sz
// > 4), ensure we have space to do so up to MaxTotal.
if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
ConstpoolPromotionMaxTotal)
return SDValue();
// This is only valid if all users are in a single function OR it has users
// in multiple functions but it no larger than a pointer. We also check if
// GVar has constant (non-ConstantExpr) users. If so, it essentially has its
// address taken.
if (!allUsersAreInFunction(GVar, F) &&
!(Size <= 4 && allUsersAreInFunctions(GVar)))
return SDValue();
// We're going to inline this global. Pad it out if needed.
if (RequiredPadding != 4) {
StringRef S = CDAInit->getAsString();
SmallVector<uint8_t,16> V(S.size());
std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
while (RequiredPadding--)
V.push_back(0);
Init = ConstantDataArray::get(*DAG.getContext(), V);
}
auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
SDValue CPAddr =
DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
AFI->markGlobalAsPromotedToConstantPool(GVar);
AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
PaddedSize - 4);
}
++NumConstpoolPromoted;
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
}
static bool isReadOnly(const GlobalValue *GV) {
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GV = GA->getBaseObject();
return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
isa<Function>(GV);
}
SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
switch (Subtarget->getTargetTriple().getObjectFormat()) {
default: llvm_unreachable("unknown object format");
case Triple::COFF:
return LowerGlobalAddressWindows(Op, DAG);
case Triple::ELF:
return LowerGlobalAddressELF(Op, DAG);
case Triple::MachO:
return LowerGlobalAddressDarwin(Op, DAG);
}
}
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
const TargetMachine &TM = getTargetMachine();
bool IsRO = isReadOnly(GV);
// promoteToConstantPool only if not generating XO text section
if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
return V;
if (isPositionIndependent()) {
bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
/*AddCurrentAddress=*/UseGOT_PREL);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
SDValue Chain = Result.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
if (UseGOT_PREL)
Result =
DAG.getLoad(PtrVT, dl, Chain, Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
} else if (Subtarget->isROPI() && IsRO) {
// PC-relative.
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
return Result;
} else if (Subtarget->isRWPI() && !IsRO) {
// SB-relative.
SDValue RelAddr;
if (Subtarget->useMovt(DAG.getMachineFunction())) {
++NumMovwMovt;
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
} else { // use literal pool for address constant
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
RelAddr = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
return Result;
}
// If we have T2 ops, we can materialize the address directly via movt/movw
// pair. This is always cheaper.
if (Subtarget->useMovt(DAG.getMachineFunction())) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
} else {
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
return DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
}
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin");
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (Subtarget->useMovt(DAG.getMachineFunction()))
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into multiple nodes
unsigned Wrapper =
isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
if (Subtarget->isGVIndirectSymbol(GV))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
"Windows on ARM expects to use movw/movt");
assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows");
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
const ARMII::TOF TargetFlags =
(GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
SDLoc DL(Op);
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
TargetFlags));
if (GV->hasDLLImportStorageClass())
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Val = DAG.getConstant(0, dl, MVT::i32);
return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
Op.getOperand(1), Val);
}
SDValue
ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
}
SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
Op.getOperand(0));
}
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue CPAddr;
bool IsPositionIndependent = isPositionIndependent();
unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
ARMCP::CPLSDA, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
if (IsPositionIndependent) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
return Result;
}
case Intrinsic::arm_neon_vabs:
return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
Op.getOperand(1));
case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu: {
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
? ARMISD::VMULLs : ARMISD::VMULLu;
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
case Intrinsic::arm_neon_vminnm:
case Intrinsic::arm_neon_vmaxnm: {
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
? ISD::FMINNUM : ISD::FMAXNUM;
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
case Intrinsic::arm_neon_vminu:
case Intrinsic::arm_neon_vmaxu: {
if (Op.getValueType().isFloatingPoint())
return SDValue();
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
? ISD::UMIN : ISD::UMAX;
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
case Intrinsic::arm_neon_vmins:
case Intrinsic::arm_neon_vmaxs: {
// v{min,max}s is overloaded between signed integers and floats.
if (!Op.getValueType().isFloatingPoint()) {
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
? ISD::SMIN : ISD::SMAX;
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
? ISD::FMINNAN : ISD::FMAXNAN;
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
case Intrinsic::arm_neon_vtbl1:
return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::arm_neon_vtbl2:
return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
}
}
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
SDLoc dl(Op);
ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
if (SSID == SyncScope::SingleThread)
return Op;
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, dl, MVT::i32));
}
ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
ARM_MB::MemBOpt Domain = ARM_MB::ISH;
if (Subtarget->isMClass()) {
// Only a full system barrier exists in the M-class architectures.
Domain = ARM_MB::SY;
} else if (Subtarget->preferISHSTBarriers() &&
Ord == AtomicOrdering::Release) {
// Swift happens to implement ISHST barriers in a way that's compatible with
// Release semantics but weaker than ISH so we'd be fools not to use
// it. Beware: other processors probably don't!
Domain = ARM_MB::ISHST;
}
return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
DAG.getConstant(Domain, dl, MVT::i32));
}
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// ARM pre v5TE and Thumb1 does not have preload instructions.
if (!(Subtarget->isThumb2() ||
(!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
// Just preserve the chain.
return Op.getOperand(0);
SDLoc dl(Op);
unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
if (!isRead &&
(!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
// ARMv7 with MP extension has PLDW.
return Op.getOperand(0);
unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
if (Subtarget->isThumb()) {
// Invert the bits.
isRead = ~isRead & 1;
isData = ~isData & 1;
}
return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
DAG.getConstant(isData, dl, MVT::i32));
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDLoc dl(Op);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
CCValAssign &NextVA,
SDValue &Root,
SelectionDAG &DAG,
const SDLoc &dl) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = &ARM::tGPRRegClass;
else
RC = &ARM::GPRRegClass;
// Transform the arguments stored in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
SDValue ArgValue2;
if (NextVA.isMemLoc()) {
MachineFrameInfo &MFI = MF.getFrameInfo();
int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
ArgValue2 = DAG.getLoad(
MVT::i32, dl, Root, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
if (!Subtarget->isLittle())
std::swap (ArgValue, ArgValue2);
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
// The remaining GPRs hold either the beginning of variable-argument
// data, or the beginning of an aggregate passed by value (usually
// byval). Either way, we allocate stack slots adjacent to the data
// provided by our caller, and store the unallocated registers there.
// If this is a variadic function, the va_list pointer will begin with
// these values; otherwise, this reassembles a (byval) structure that
// was split between registers and memory.
// Return: The frame index registers were stored into.
int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
const SDLoc &dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
int ArgOffset, unsigned ArgSize) const {
// Currently, two use-cases possible:
// Case #1. Non-var-args function, and we meet first byval parameter.
// Setup first unallocated register as first byval register;
// eat all remained registers
// (these two actions are performed by HandleByVal method).
// Then, here, we initialize stack frame with
// "store-reg" instructions.
// Case #2. Var-args function, that doesn't contain byval parameters.
// The same: eat all remained unallocated registers,
// initialize stack frame.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned RBegin, REnd;
if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
} else {
unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
REnd = ARM::R4;
}
if (REnd != RBegin)
ArgOffset = -4 * (ARM::R4 - RBegin);
auto PtrVT = getPointerTy(DAG.getDataLayout());
int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
SmallVector<SDValue, 4> MemOps;
const TargetRegisterClass *RC =
AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
unsigned VReg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(OrigArg, 4 * i));
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return FrameIndex;
}
// Setup stack frame, the va_list pointer will start from.
void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
const SDLoc &dl, SDValue &Chain,
unsigned ArgOffset,
unsigned TotalArgRegsSaveSize,
bool ForceMutable) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// Try to store any remaining integer argument regs
// to their spots on the stack so that they may be loaded by dereferencing
// the result of va_next.
// If there is no regs to be stored, just point address after last
// argument passed via stack.
int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
CCInfo.getInRegsParamsCount(),
CCInfo.getNextStackOffset(), 4);
AFI->setVarArgsFrameIndex(FrameIndex);
}
SDValue ARMTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
SmallVector<SDValue, 16> ArgValues;
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
// Initially ArgRegsSaveSize is zero.
// Then we increase this value each time we meet byval parameter.
// We also increase this value in case of varargs function.
AFI->setArgRegsSaveSize(0);
// Calculate the amount of stack space that we need to allocate to store
// byval and variadic arguments that are passed in registers.
// We need to know this before we allocate the first byval or variadic
// argument, as they will be allocated a stack slot below the CFA (Canonical
// Frame Address, the stack pointer at entry to the function).
unsigned ArgRegBegin = ARM::R4;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
break;
CCValAssign &VA = ArgLocs[i];
unsigned Index = VA.getValNo();
ISD::ArgFlagsTy Flags = Ins[Index].Flags;
if (!Flags.isByVal())
continue;
assert(VA.isMemLoc() && "unexpected byval pointer in reg");
unsigned RBegin, REnd;
CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
ArgRegBegin = std::min(ArgRegBegin, RBegin);
CCInfo.nextInRegsParam();
}
CCInfo.rewindByValRegsInfo();
int lastInsIndex = -1;
if (isVarArg && MFI.hasVAStart()) {
unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
if (RegIdx != array_lengthof(GPRArgRegs))
ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
}
unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
auto PtrVT = getPointerTy(DAG.getDataLayout());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (Ins[VA.getValNo()].isOrigArg()) {
std::advance(CurOrigArg,
Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
}
// Arguments stored in registers.
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
if (VA.needsCustom()) {
// f64 and vector types are split up into multiple registers or
// combinations of registers and stack slots.
if (VA.getLocVT() == MVT::v2f64) {
SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
Chain, DAG, dl);
VA = ArgLocs[++i]; // skip ahead to next loc
SDValue ArgValue2;
if (VA.isMemLoc()) {
int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(
DAG.getMachineFunction(), FI));
} else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
Chain, DAG, dl);
}
ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
ArgValue, ArgValue1,
DAG.getIntPtrConstant(0, dl));
ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
ArgValue, ArgValue2,
DAG.getIntPtrConstant(1, dl));
} else
ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
} else {
const TargetRegisterClass *RC;
if (RegVT == MVT::f32)
RC = &ARM::SPRRegClass;
else if (RegVT == MVT::f64)
RC = &ARM::DPRRegClass;
else if (RegVT == MVT::v2f64)
RC = &ARM::QPRRegClass;
else if (RegVT == MVT::i32)
RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
: &ARM::GPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
}
// If this is an 8 or 16-bit value, it is really passed promoted
// to 32 bits. Insert an assert[sz]ext to capture this, then
// truncate to the right size.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
break;
case CCValAssign::SExt:
ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
break;
case CCValAssign::ZExt:
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
break;
}
InVals.push_back(ArgValue);
} else { // VA.isRegLoc()
// sanity check
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
int index = VA.getValNo();
// Some Ins[] entries become multiple ArgLoc[] entries.
// Process them only once.
if (index != lastInsIndex)
{
ISD::ArgFlagsTy Flags = Ins[index].Flags;
// FIXME: For now, all byval parameter objects are marked mutable.
// This can be changed with more analysis.
// In case of tail call optimization mark all arguments mutable.
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
assert(Ins[index].isOrigArg() &&
"Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
int FrameIndex = StoreByValRegs(
CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
VA.getLocMemOffset(), Flags.getByValSize());
InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
CCInfo.nextInRegsParam();
} else {
unsigned FIOffset = VA.getLocMemOffset();
int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
FIOffset, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
MachinePointerInfo::getFixedStack(
DAG.getMachineFunction(), FI)));
}
lastInsIndex = index;
}
}
}
// varargs
if (isVarArg && MFI.hasVAStart())
VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
CCInfo.getNextStackOffset(),
TotalArgRegsSaveSize);
AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
return Chain;
}
/// isFloatingPointZero - Return true if this is +0.0.
static bool isFloatingPointZero(SDValue Op) {
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
return CFP->getValueAPF().isPosZero();
else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
// Maybe this has already been legalized into the constant pool?
if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
SDValue WrapperOp = Op.getOperand(1).getOperand(0);
if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
return CFP->getValueAPF().isPosZero();
}
} else if (Op->getOpcode() == ISD::BITCAST &&
Op->getValueType(0) == MVT::f64) {
// Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
// created by LowerConstantFP().
SDValue BitcastOp = Op->getOperand(0);
if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
isNullConstant(BitcastOp->getOperand(0)))
return true;
}
return false;
}
/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
/// the given operands.
SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG,
const SDLoc &dl) const {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
if (!isLegalICmpImmediate(C)) {
// Constant does not fit, try adjusting it by one?
switch (CC) {
default: break;
case ISD::SETLT:
case ISD::SETGE:
if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
RHS = DAG.getConstant(C - 1, dl, MVT::i32);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
if (C != 0 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
RHS = DAG.getConstant(C - 1, dl, MVT::i32);
}
break;
case ISD::SETLE:
case ISD::SETGT:
if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
RHS = DAG.getConstant(C + 1, dl, MVT::i32);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
RHS = DAG.getConstant(C + 1, dl, MVT::i32);
}
break;
}
}
}
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
ARMISD::NodeType CompareType;
switch (CondCode) {
default:
CompareType = ARMISD::CMP;
break;
case ARMCC::EQ:
case ARMCC::NE:
// Uses only Z Flag
CompareType = ARMISD::CMPZ;
break;
}
ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
}
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const SDLoc &dl,
bool InvalidOnQNaN) const {
assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
else
Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
/// duplicateCmp - Glue values can have only one use, so this function
/// duplicates a comparison node.
SDValue
ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
unsigned Opc = Cmp.getOpcode();
SDLoc DL(Cmp);
if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
Cmp = Cmp.getOperand(0);
Opc = Cmp.getOpcode();
if (Opc == ARMISD::CMPFP)
Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
Cmp.getOperand(1), Cmp.getOperand(2));
else {
assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
Cmp.getOperand(1));
}
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
std::pair<SDValue, SDValue>
ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
SDValue &ARMcc) const {
assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
SDValue Value, OverflowCmp;
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDLoc dl(Op);
// FIXME: We are currently always generating CMPs because we don't support
// generating CMN through the backend. This is not as good as the natural
// CMP case because it causes a register dependency and cannot be folded
// later.
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown overflow instruction!");
case ISD::SADDO:
ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
break;
case ISD::UADDO:
ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
break;
case ISD::SSUBO:
ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
break;
case ISD::USUBO:
ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
break;
} // switch (...)
return std::make_pair(Value, OverflowCmp);
}
SDValue
ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
return SDValue();
SDValue Value, OverflowCmp;
SDValue ARMcc;
std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDLoc dl(Op);
// We use 0 and 1 as false and true values.
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
EVT VT = Op.getValueType();
SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
ARMcc, CCR, OverflowCmp);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
SDValue SelectFalse = Op.getOperand(2);
SDLoc dl(Op);
unsigned Opc = Cond.getOpcode();
if (Cond.getResNo() == 1 &&
(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
Opc == ISD::USUBO)) {
if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
return SDValue();
SDValue Value, OverflowCmp;
SDValue ARMcc;
std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
EVT VT = Op.getValueType();
return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
OverflowCmp, DAG);
}
// Convert:
//
// (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
// (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
//
if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
const ConstantSDNode *CMOVTrue =
dyn_cast<ConstantSDNode>(Cond.getOperand(0));
const ConstantSDNode *CMOVFalse =
dyn_cast<ConstantSDNode>(Cond.getOperand(1));
if (CMOVTrue && CMOVFalse) {
unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
SDValue True;
SDValue False;
if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
True = SelectTrue;
False = SelectFalse;
} else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
True = SelectFalse;
False = SelectTrue;
}
if (True.getNode() && False.getNode()) {
EVT VT = Op.getValueType();
SDValue ARMcc = Cond.getOperand(2);
SDValue CCR = Cond.getOperand(3);
SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
assert(True.getValueType() == VT);
return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
}
}
}
// ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
// undefined bits before doing a full-word comparison with zero.
Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
DAG.getConstant(1, dl, Cond.getValueType()));
return DAG.getSelectCC(dl, Cond,
DAG.getConstant(0, dl, Cond.getValueType()),
SelectTrue, SelectFalse, ISD::SETNE);
}
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
bool &swpCmpOps, bool &swpVselOps) {
// Start by selecting the GE condition code for opcodes that return true for
// 'equality'
if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
CC == ISD::SETULE)
CondCode = ARMCC::GE;
// and GT for opcodes that return false for 'equality'.
else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
CC == ISD::SETULT)
CondCode = ARMCC::GT;
// Since we are constrained to GE/GT, if the opcode contains 'less', we need
// to swap the compare operands.
if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
CC == ISD::SETULT)
swpCmpOps = true;
// Both GT and GE are ordered comparisons, and return false for 'unordered'.
// If we have an unordered opcode, we need to swap the operands to the VSEL
// instruction (effectively negating the condition).
//
// This also has the effect of swapping which one of 'less' or 'greater'
// returns true, so we also swap the compare operands. It also switches
// whether we return true for 'equality', so we compensate by picking the
// opposite condition code to our original choice.
if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
CC == ISD::SETUGT) {
swpCmpOps = !swpCmpOps;
swpVselOps = !swpVselOps;
CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
}
// 'ordered' is 'anything but unordered', so use the VS condition code and
// swap the VSEL operands.
if (CC == ISD::SETO) {
CondCode = ARMCC::VS;
swpVselOps = true;
}
// 'unordered or not equal' is 'anything but equal', so use the EQ condition
// code and swap the VSEL operands.
if (CC == ISD::SETUNE) {
CondCode = ARMCC::EQ;
swpVselOps = true;
}
}
SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
SDValue TrueVal, SDValue ARMcc, SDValue CCR,
SDValue Cmp, SelectionDAG &DAG) const {
if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
SDValue TrueLow = TrueVal.getValue(0);
SDValue TrueHigh = TrueVal.getValue(1);
SDValue FalseLow = FalseVal.getValue(0);
SDValue FalseHigh = FalseVal.getValue(1);
SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
ARMcc, CCR, Cmp);
SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
ARMcc, CCR, duplicateCmp(Cmp, DAG));
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
} else {
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
Cmp);
}
}
static bool isGTorGE(ISD::CondCode CC) {
return CC == ISD::SETGT || CC == ISD::SETGE;
}
static bool isLTorLE(ISD::CondCode CC) {
return CC == ISD::SETLT || CC == ISD::SETLE;
}
// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
// All of these conditions (and their <= and >= counterparts) will do:
// x < k ? k : x
// x > k ? x : k
// k < x ? x : k
// k > x ? k : x
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
const SDValue TrueVal, const SDValue FalseVal,
const ISD::CondCode CC, const SDValue K) {
return (isGTorGE(CC) &&
((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
(isLTorLE(CC) &&
((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
}
// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
const SDValue TrueVal, const SDValue FalseVal,
const ISD::CondCode CC, const SDValue K) {
return (isGTorGE(CC) &&
((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
(isLTorLE(CC) &&
((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
}
// Check if two chained conditionals could be converted into SSAT.
//
// SSAT can replace a set of two conditional selectors that bound a number to an
// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
//
// x < -k ? -k : (x > k ? k : x)
// x < -k ? -k : (x < k ? x : k)
// x > -k ? (x > k ? k : x) : -k
// x < k ? (x < -k ? -k : x) : k
// etc.
//
// It returns true if the conversion can be done, false otherwise.
// Additionally, the variable is returned in parameter V and the constant in K.
static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
uint64_t &K) {
SDValue LHS1 = Op.getOperand(0);
SDValue RHS1 = Op.getOperand(1);
SDValue TrueVal1 = Op.getOperand(2);
SDValue FalseVal1 = Op.getOperand(3);
ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
if (Op2.getOpcode() != ISD::SELECT_CC)
return false;
SDValue LHS2 = Op2.getOperand(0);
SDValue RHS2 = Op2.getOperand(1);
SDValue TrueVal2 = Op2.getOperand(2);
SDValue FalseVal2 = Op2.getOperand(3);
ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
// Find out which are the constants and which are the variables
// in each conditional
SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
? &RHS1
: nullptr;
SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
? &RHS2
: nullptr;
SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
// We must detect cases where the original operations worked with 16- or
// 8-bit values. In such case, V2Tmp != V2 because the comparison operations
// must work with sign-extended values but the select operations return
// the original non-extended value.
SDValue V2TmpReg = V2Tmp;
if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
V2TmpReg = V2Tmp->getOperand(0);
// Check that the registers and the constants have the correct values
// in both conditionals
if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
V2TmpReg != V2)
return false;
// Figure out which conditional is saturating the lower/upper bound.
const SDValue *LowerCheckOp =
isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
? &Op
: isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
? &Op2
: nullptr;
const SDValue *UpperCheckOp =
isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
? &Op
: isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
? &Op2
: nullptr;
if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
return false;
// Check that the constant in the lower-bound check is
// the opposite of the constant in the upper-bound check
// in 1's complement.
int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
int64_t PosVal = std::max(Val1, Val2);
if (((Val1 > Val2 && UpperCheckOp == &Op) ||
(Val1 < Val2 && UpperCheckOp == &Op2)) &&
Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
V = V2;
K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
return true;
}
return false;
}
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op);
// Try to convert two saturating conditional selects into a single SSAT
SDValue SatValue;
uint64_t SatConstant;
if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
isSaturatingConditional(Op, SatValue, SatConstant))
return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);
if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
dl);
// If softenSetCCOperands only returned one value, we should compare it to
// zero.
if (!RHS.getNode()) {
RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
if (LHS.getValueType() == MVT::i32) {
// Try to generate VSEL on ARMv8.
// The VSEL instruction can't use all the usual ARM condition
// codes: it only has two bits to select the condition code, so it's
// constrained to use only GE, GT, VS and EQ.
//
// To implement all the various ISD::SETXXX opcodes, we sometimes need to
// swap the operands of the previous compare instruction (effectively
// inverting the compare condition, swapping 'less' and 'greater') and
// sometimes need to swap the operands to the VSEL (which inverts the
// condition in the sense of firing whenever the previous condition didn't)
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
TrueVal.getValueType() == MVT::f64)) {
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
CC = ISD::getSetCCInverse(CC, true);
std::swap(TrueVal, FalseVal);
}
}
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
}
ARMCC::CondCodes CondCode, CondCode2;
bool InvalidOnQNaN;
FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
// Try to generate VMAXNM/VMINNM on ARMv8.
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
TrueVal.getValueType() == MVT::f64)) {
bool swpCmpOps = false;
bool swpVselOps = false;
checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
if (swpCmpOps)
std::swap(LHS, RHS);
if (swpVselOps)
std::swap(TrueVal, FalseVal);
}
}
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
}
/// canChangeToInt - Given the fp compare operand, return true if it is suitable
/// to morph to an integer compare sequence.
static bool canChangeToInt(SDValue Op, bool &SeenZero,
const ARMSubtarget *Subtarget) {
SDNode *N = Op.getNode();
if (!N->hasOneUse())
// Otherwise it requires moving the value from fp to integer registers.
return false;
if (!N->getNumValues())
return false;
EVT VT = Op.getValueType();
if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
// f32 case is generally profitable. f64 case only makes sense when vcmpe +
// vmrs are very slow, e.g. cortex-a8.
return false;
if (isFloatingPointZero(Op)) {
SeenZero = true;
return true;
}
return ISD::isNormalLoad(N);
}
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
if (isFloatingPointZero(Op))
return DAG.getConstant(0, SDLoc(Op), MVT::i32);
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
llvm_unreachable("Unknown VFP cmp argument!");
}
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
SDValue &RetVal1, SDValue &RetVal2) {
SDLoc dl(Op);
if (isFloatingPointZero(Op)) {
RetVal1 = DAG.getConstant(0, dl, MVT::i32);
RetVal2 = DAG.getConstant(0, dl, MVT::i32);
return;
}
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
SDValue Ptr = Ld->getBasePtr();
RetVal1 =
DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
Ld->getAlignment(), Ld->getMemOperand()->getFlags());
EVT PtrType = Ptr.getValueType();
unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
Ld->getPointerInfo().getWithOffset(4), NewAlign,
Ld->getMemOperand()->getFlags());
return;
}
llvm_unreachable("Unknown VFP cmp argument!");
}
/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
/// f32 and even f64 comparisons to integer ones.
SDValue
ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
bool LHSSeenZero = false;
bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
bool RHSSeenZero = false;
bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
// If unsafe fp math optimization is enabled and there are no other uses of
// the CMP operands, and the condition code is EQ or NE, we can optimize it
// to an integer comparison.
if (CC == ISD::SETOEQ)
CC = ISD::SETEQ;
else if (CC == ISD::SETUNE)
CC = ISD::SETNE;
SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
SDValue ARMcc;
if (LHS.getValueType() == MVT::f32) {
LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
bitcastf32Toi32(LHS, DAG), Mask);
RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
bitcastf32Toi32(RHS, DAG), Mask);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
Chain, Dest, ARMcc, CCR, Cmp);
}
SDValue LHS1, LHS2;
SDValue RHS1, RHS2;
expandf64Toi32(LHS, DAG, LHS1, LHS2);
expandf64Toi32(RHS, DAG, RHS1, RHS2);
LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
}
return SDValue();
}
SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
dl);
// If softenSetCCOperands only returned one value, we should compare it to
// zero.
if (!RHS.getNode()) {
RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
Chain, Dest, ARMcc, CCR, Cmp);
}
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
if (getTargetMachine().Options.UnsafeFPMath &&
(CC == ISD::SETEQ || CC == ISD::SETOEQ ||
CC == ISD::SETNE || CC == ISD::SETUNE)) {
if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
return Result;
}
ARMCC::CondCodes CondCode, CondCode2;
bool InvalidOnQNaN;
FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
if (CondCode2 != ARMCC::AL) {
ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
}
return Res;
}
SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Table = Op.getOperand(1);
SDValue Index = Op.getOperand(2);
SDLoc dl(Op);
EVT PTy = getPointerTy(DAG.getDataLayout());
JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
// Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
// which does another jump to the destination. This also makes it easier
// to translate it to TBB / TBH later (Thumb2 only).
// FIXME: This might not work if the function is extremely large.
return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
Addr, Op.getOperand(2), JTI);
}
if (isPositionIndependent() || Subtarget->isROPI()) {
Addr =
DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
} else {
Addr =
DAG.getLoad(PTy, dl, Chain, Addr,
MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
}
}
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
SDLoc dl(Op);
if (Op.getValueType().getVectorElementType() == MVT::i32) {
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
return Op;
return DAG.UnrollVectorOp(Op.getNode());
}
assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
"Invalid type for custom lowering!");
if (VT != MVT::v4i16)
return DAG.UnrollVectorOp(Op.getNode());
Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
}
SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::FP_TO_SINT)
LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
Op.getValueType());
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
Op.getValueType());
return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
/*isSigned*/ false, SDLoc(Op)).first;
}
return Op;
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
SDLoc dl(Op);
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
if (VT.getVectorElementType() == MVT::f32)
return Op;
return DAG.UnrollVectorOp(Op.getNode());
}
assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
"Invalid type for custom lowering!");
if (VT != MVT::v4f32)
return DAG.UnrollVectorOp(Op.getNode());
unsigned CastOpc;
unsigned Opc;
switch (Op.getOpcode()) {
default: llvm_unreachable("Invalid opcode!");
case ISD::SINT_TO_FP:
CastOpc = ISD::SIGN_EXTEND;
Opc = ISD::SINT_TO_FP;
break;
case ISD::UINT_TO_FP:
CastOpc = ISD::ZERO_EXTEND;
Opc = ISD::UINT_TO_FP;
break;
}
Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
return DAG.getNode(Opc, dl, VT, Op);
}
SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::SINT_TO_FP)
LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
Op.getValueType());
else
LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
Op.getValueType());
return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
/*isSigned*/ false, SDLoc(Op)).first;
}
return Op;
}
SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Implement fcopysign with a fabs and a conditional fneg.
SDValue Tmp0 = Op.getOperand(0);
SDValue Tmp1 = Op.getOperand(1);
SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
Tmp0.getOpcode() == ARMISD::VMOVDRR;
bool UseNEON = !InGPR && Subtarget->hasNEON();
if (UseNEON) {
// Use VBSL to copy the sign bit.
unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
if (VT == MVT::f64)
Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
DAG.getConstant(32, dl, MVT::i32));
else /*if (VT == MVT::f32)*/
Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
if (SrcVT == MVT::f32) {
Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
if (VT == MVT::f64)
Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
DAG.getConstant(32, dl, MVT::i32));
} else if (VT == MVT::f32)
Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
DAG.getConstant(32, dl, MVT::i32));
Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
dl, MVT::i32);
AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
if (VT == MVT::f32) {
Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
DAG.getConstant(0, dl, MVT::i32));
} else {
Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
}
return Res;
}
// Bitcast operand 1 to i32.
if (SrcVT == MVT::f64)
Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
Tmp1).getValue(1);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
// Or in the signbit with integer operations.
SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
if (VT == MVT::f32) {
Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
}
// f64: Or the high part with signbit and then combine two parts.
Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
Tmp0);
SDValue Lo = Tmp0.getValue(0);
SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setReturnAddressIsTaken(true);
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
EVT VT = Op.getValueType();
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
return DAG.getLoad(VT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
MachinePointerInfo());
}
// Return LR, which contains the return address. Mark it an implicit live-in.
unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
const ARMBaseRegisterInfo &ARI =
*static_cast<const ARMBaseRegisterInfo*>(RegInfo);
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned FrameReg = ARI.getFrameRegister(MF);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo());
return FrameAddr;
}
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", ARM::SP)
.Default(0);
if (Reg)
return Reg;
report_fatal_error(Twine("Invalid register name \""
+ StringRef(RegName) + "\"."));
}
// Result is 64 bit value so split into two 32 bit values and return as a
// pair of values.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) {
SDLoc DL(N);
// This function is only supposed to be called for i64 type destination.
assert(N->getValueType(0) == MVT::i64
&& "ExpandREAD_REGISTER called for non-i64 type result.");
SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
N->getOperand(0),
N->getOperand(1));
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
Read.getValue(1)));
Results.push_back(Read.getOperand(0));
}
/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
/// When \p DstVT, the destination type of \p BC, is on the vector
/// register bank and the source of bitcast, \p Op, operates on the same bank,
/// it might be possible to combine them, such that everything stays on the
/// vector register bank.
/// \p return The node that would replace \p BT, if the combine
/// is possible.
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
SelectionDAG &DAG) {
SDValue Op = BC->getOperand(0);
EVT DstVT = BC->getValueType(0);
// The only vector instruction that can produce a scalar (remember,
// since the bitcast was about to be turned into VMOVDRR, the source
// type is i64) from a vector is EXTRACT_VECTOR_ELT.
// Moreover, we can do this combine only if there is one use.
// Finally, if the destination type is not a vector, there is not
// much point on forcing everything on the vector bank.
if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!Op.hasOneUse())
return SDValue();
// If the index is not constant, we will introduce an additional
// multiply that will stick.
// Give up in that case.
ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!Index)
return SDValue();
unsigned DstNumElt = DstVT.getVectorNumElements();
// Compute the new index.
const APInt &APIntIndex = Index->getAPIntValue();
APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
NewIndex *= APIntIndex;
// Check if the new constant index fits into i32.
if (NewIndex.getBitWidth() > 32)
return SDValue();
// vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
// vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
SDLoc dl(Op);
SDValue ExtractSrc = Op.getOperand(0);
EVT VecVT = EVT::getVectorVT(
*DAG.getContext(), DstVT.getScalarType(),
ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
}
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
/// operand type is illegal (e.g., v2f32 for a target that doesn't support
/// vectors), since the legalizer won't know what to do with that.
static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc dl(N);
SDValue Op = N->getOperand(0);
// This function is only supposed to be called for i64 types, either as the
// source or destination of the bit convert.
EVT SrcVT = Op.getValueType();
EVT DstVT = N->getValueType(0);
assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
"ExpandBITCAST called for non-i64 type");
// Turn i64->f64 into VMOVDRR.
if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
// Do not force values to GPRs (this is what VMOVDRR does for the inputs)
// if we can combine the bitcast with its source.
if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
return Val;
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(0, dl, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(1, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, DstVT,
DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
}
// Turn f64->i64 into VMOVRRD.
if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
SDValue Cvt;
if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
SrcVT.getVectorNumElements() > 1)
Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32),
DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
else
Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Op);
// Merge the pieces into a single i64 value.
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
}
return SDValue();
}
/// getZeroVector - Returns a vector of specified type with all zero elements.
/// Zero vectors are used to represent vector negation and in those cases
/// will be implemented with the NEON VNEG instruction. However, VNEG does
/// not support i64 elements, so sometimes the zero vectors will need to be
/// explicitly constructed. Regardless, use a canonical VMOV to create the
/// zero vector.
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
assert(VT.isVector() && "Expected a vector type");
// The canonical modified immediate encoding of a zero vector is....0!
SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
}
/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
/// i32 values and take a 2 x i32 value to shift plus a shift amount.
SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
SDLoc dl(Op);
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
DAG.getConstant(VTBits, dl, MVT::i32));
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
ARMcc, CCR, CmpLo);
SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue HiBigShift = Opc == ISD::SRA
? DAG.getNode(Opc, dl, VT, ShOpHi,
DAG.getConstant(VTBits - 1, dl, VT))
: DAG.getConstant(0, dl, VT);
SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
ARMcc, CCR, CmpHi);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
}
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
/// i32 values and take a 2 x i32 value to shift plus a shift amount.
SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
SDLoc dl(Op);
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
DAG.getConstant(VTBits, dl, MVT::i32));
SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
ARMcc, CCR, CmpHi);
SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
}
SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
// The rounding mode is in bits 23:22 of the FPSCR.
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
SDValue Ops[] = { DAG.getEntryNode(),
DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32));
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32));
}
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
if (VT.isVector()) {
assert(ST->hasNEON());
// Compute the least significant set bit: LSB = X & -X
SDValue X = N->getOperand(0);
SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
EVT ElemTy = VT.getVectorElementType();
if (ElemTy == MVT::i8) {
// Compute with: cttz(x) = ctpop(lsb - 1)
SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
DAG.getTargetConstant(1, dl, ElemTy));
SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
}
if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
(N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
// Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
unsigned NumBits = ElemTy.getSizeInBits();
SDValue WidthMinus1 =
DAG.getNode(ARMISD::VMOVIMM, dl, VT,
DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
}
// Compute with: cttz(x) = ctpop(lsb - 1)
// Since we can only compute the number of bits in a byte with vcnt.8, we
// have to gather the result with pairwise addition (vpaddl) for i16, i32,
// and i64.
// Compute LSB - 1.
SDValue Bits;
if (ElemTy == MVT::i64) {
// Load constant 0xffff'ffff'ffff'ffff to register.
SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
DAG.getTargetConstant(0x1eff, dl, MVT::i32));
Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
} else {
SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
DAG.getTargetConstant(1, dl, ElemTy));
Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
}
// Count #bits with vcnt.8.
EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
// Gather the #bits with vpaddl (pairwise add.)
EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
Cnt8);
if (ElemTy == MVT::i16)
return Cnt16;
EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
Cnt16);
if (ElemTy == MVT::i32)
return Cnt32;
assert(ElemTy == MVT::i64);
SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
Cnt32);
return Cnt64;
}
if (!ST->hasV6T2Ops())
return SDValue();
SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
}
/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
/// for each 16-bit element from operand, repeated. The basic idea is to
/// leverage vcnt to get the 8-bit counts, gather and add the results.
///
/// Trace for v4i16:
/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
/// [b0 b1 b2 b3 b4 b5 b6 b7]
/// +[b1 b0 b3 b2 b5 b4 b7 b6]
/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
}
/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
/// bit-count for each 16-bit element from the operand. We need slightly
/// different sequencing for v4i16 and v8i16 to stay within NEON's available
/// 64/128-bit registers.
///
/// Trace for v4i16:
/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
/// v4i16:Extracted = [k0 k1 k2 k3 ]
static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
if (VT.is64BitVector()) {
SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
DAG.getIntPtrConstant(0, DL));
} else {
SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
BitCounts, DAG.getIntPtrConstant(0, DL));
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
}
}
/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
/// bit-count for each 32-bit element from the operand. The idea here is
/// to split the vector into 16-bit elements, leverage the 16-bit count
/// routine, and then combine the results.
///
/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
/// input = [v0 v1 ] (vi: 32-bit elements)
/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
/// vrev: N0 = [k1 k0 k3 k2 ]
/// [k0 k1 k2 k3 ]
/// N1 =+[k1 k0 k3 k2 ]
/// [k0 k2 k1 k3 ]
/// N2 =+[k1 k3 k0 k2 ]
/// [k0 k2 k1 k3 ]
/// Extended =+[k1 k3 k0 k2 ]
/// [k0 k2 ]
/// Extracted=+[k1 k3 ]
///
static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
if (VT.is64BitVector()) {
SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
DAG.getIntPtrConstant(0, DL));
} else {
SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
DAG.getIntPtrConstant(0, DL));
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
}
}
static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering");
if (VT.getVectorElementType() == MVT::i32)
return lowerCTPOP32BitElements(N, DAG);
else
return lowerCTPOP16BitElements(N, DAG);
}
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
if (!VT.isVector())
return SDValue();
// Lower vector shifts on NEON to use VSHL.
assert(ST->hasNEON() && "unexpected vector shift");
// Left shifts translate directly to the vshiftu intrinsic.
if (N->getOpcode() == ISD::SHL)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
MVT::i32),
N->getOperand(0), N->getOperand(1));
assert((N->getOpcode() == ISD::SRA ||
N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
// NEON uses the same intrinsics for both left and right shifts. For
// right shifts, the shift amounts are negative, so negate the vector of
// shift amounts.
EVT ShiftVT = N->getOperand(1).getValueType();
SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
getZeroVector(ShiftVT, DAG, dl),
N->getOperand(1));
Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
Intrinsic::arm_neon_vshifts :
Intrinsic::arm_neon_vshiftu);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(vshiftInt, dl, MVT::i32),
N->getOperand(0), NegatedCount);
}
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
// We can get here for a node like i32 = ISD::SHL i32, i64
if (VT != MVT::i64)
return SDValue();
assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
"Unknown shift to lower!");
// We only lower SRA, SRL of 1 here, all others use generic lowering.
if (!isOneConstant(N->getOperand(1)))
return SDValue();
// If we are in thumb mode, we don't have RRX.
if (ST->isThumb1Only()) return SDValue();
// Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
DAG.getConstant(0, dl, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
DAG.getConstant(1, dl, MVT::i32));
// First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
// captures the result into a carry flag.
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
// Merge the pieces into a single i64 value.
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
}
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
SDValue TmpOp0, TmpOp1;
bool Invert = false;
bool Swap = false;
unsigned Opc = 0;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
EVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
(SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
// Special-case integer 64-bit equality comparisons. They aren't legal,
// but they can be lowered with a few vector instructions.
unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
DAG.getCondCode(ISD::SETEQ));
SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
if (SetCCOpcode == ISD::SETNE)
Merged = DAG.getNOT(dl, Merged, CmpVT);
Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
return Merged;
}
if (CmpVT.getVectorElementType() == MVT::i64)
// 64-bit comparisons are not legal in general.
return SDValue();
if (Op1.getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETOEQ:
case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
case ISD::SETOLT:
case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETOGT:
case ISD::SETGT: Opc = ARMISD::VCGT; break;
case ISD::SETOLE:
case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETOGE:
case ISD::SETGE: Opc = ARMISD::VCGE; break;
case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETONE:
// Expand this to (OLT | OGT).
TmpOp0 = Op0;
TmpOp1 = Op1;
Opc = ISD::OR;
Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
break;
case ISD::SETUO:
Invert = true;
LLVM_FALLTHROUGH;
case ISD::SETO:
// Expand this to (OLT | OGE).
TmpOp0 = Op0;
TmpOp1 = Op1;
Opc = ISD::OR;
Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
break;
}
} else {
// Integer comparisons.
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal integer comparison");
case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGT: Opc = ARMISD::VCGT; break;
case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGE: Opc = ARMISD::VCGE; break;
case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
}
// Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
if (Opc == ARMISD::VCEQ) {
SDValue AndOp;
if (ISD::isBuildVectorAllZeros(Op1.getNode()))
AndOp = Op0;
else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
AndOp = Op1;
// Ignore bitconvert.
if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
AndOp = AndOp.getOperand(0);
if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
Opc = ARMISD::VTST;
Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
Invert = !Invert;
}
}
}
if (Swap)
std::swap(Op0, Op1);
// If one of the operands is a constant vector zero, attempt to fold the
// comparison to a specialized compare-against-zero form.
SDValue SingleOp;
if (ISD::isBuildVectorAllZeros(Op1.getNode()))
SingleOp = Op0;
else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
if (Opc == ARMISD::VCGE)
Opc = ARMISD::VCLEZ;
else if (Opc == ARMISD::VCGT)
Opc = ARMISD::VCLTZ;
SingleOp = Op1;
}
SDValue Result;
if (SingleOp.getNode()) {
switch (Opc) {
case ARMISD::VCEQ:
Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGE:
Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCLEZ:
Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGT:
Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCLTZ:
Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
default:
Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
}
} else {
Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
}
Result = DAG.getSExtOrTrunc(Result, dl, VT);
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
return Result;
}
static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue Carry = Op.getOperand(2);
SDValue Cond = Op.getOperand(3);
SDLoc DL(Op);
assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.");
assert(Carry.getOpcode() != ISD::CARRY_FALSE);
SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
SDValue ARMcc = DAG.getConstant(
IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
Cmp.getValue(1), SDValue());
return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
CCR, Chain.getValue(1));
}
/// isNEONModifiedImm - Check if the specified splat value corresponds to a
/// valid vector constant for a NEON instruction with a "modified immediate"
/// operand (e.g., VMOV). If so, return the encoded value.
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
const SDLoc &dl, EVT &VT, bool is128Bits,
NEONModImmType type) {
unsigned OpCmode, Imm;
// SplatBitSize is set to the smallest size that splats the vector, so a
// zero vector will always have SplatBitSize == 8. However, NEON modified
// immediate instructions others than VMOV do not support the 8-bit encoding
// of a zero vector, and the default encoding of zero is supposed to be the
// 32-bit version.
if (SplatBits == 0)
SplatBitSize = 32;
switch (SplatBitSize) {
case 8:
if (type != VMOVModImm)
return SDValue();
// Any 1-byte value is OK. Op=0, Cmode=1110.
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
OpCmode = 0xe;
Imm = SplatBits;
VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
break;
case 16:
// NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
if ((SplatBits & ~0xff) == 0) {
// Value = 0x00nn: Op=x, Cmode=100x.
OpCmode = 0x8;
Imm = SplatBits;
break;
}
if ((SplatBits & ~0xff00) == 0) {
// Value = 0xnn00: Op=x, Cmode=101x.
OpCmode = 0xa;
Imm = SplatBits >> 8;
break;
}
return SDValue();
case 32:
// NEON's 32-bit VMOV supports splat values where:
// * only one byte is nonzero, or
// * the least significant byte is 0xff and the second byte is nonzero, or
// * the least significant 2 bytes are 0xff and the third is nonzero.
VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
if ((SplatBits & ~0xff) == 0) {
// Value = 0x000000nn: Op=x, Cmode=000x.
OpCmode = 0;
Imm = SplatBits;
break;
}
if ((SplatBits & ~0xff00) == 0) {
// Value = 0x0000nn00: Op=x, Cmode=001x.
OpCmode = 0x2;
Imm = SplatBits >> 8;
break;
}
if ((SplatBits & ~0xff0000) == 0) {
// Value = 0x00nn0000: Op=x, Cmode=010x.
OpCmode = 0x4;
Imm = SplatBits >> 16;
break;
}
if ((SplatBits & ~0xff000000) == 0) {
// Value = 0xnn000000: Op=x, Cmode=011x.
OpCmode = 0x6;
Imm = SplatBits >> 24;
break;
}
// cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
if (type == OtherModImm) return SDValue();
if ((SplatBits & ~0xffff) == 0 &&
((SplatBits | SplatUndef) & 0xff) == 0xff) {
// Value = 0x0000nnff: Op=x, Cmode=1100.
OpCmode = 0xc;
Imm = SplatBits >> 8;
break;
}
if ((SplatBits & ~0xffffff) == 0 &&
((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
// Value = 0x00nnffff: Op=x, Cmode=1101.
OpCmode = 0xd;
Imm = SplatBits >> 16;
break;
}
// Note: there are a few 32-bit splat values (specifically: 00ffff00,
// ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
// VMOV.I32. A (very) minor optimization would be to replicate the value
// and fall through here to test for a valid 64-bit splat. But, then the
// caller would also need to check and handle the change in size.
return SDValue();
case 64: {
if (type != VMOVModImm)
return SDValue();
// NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
uint64_t BitMask = 0xff;
uint64_t Val = 0;
unsigned ImmMask = 1;
Imm = 0;
for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
Val |= BitMask;
Imm |= ImmMask;
} else if ((SplatBits & BitMask) != 0) {
return SDValue();
}
BitMask <<= 8;
ImmMask <<= 1;
}
if (DAG.getDataLayout().isBigEndian())
// swap higher and lower 32 bit word
Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
// Op=1, Cmode=1110.
OpCmode = 0x1e;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
break;
}
default:
llvm_unreachable("unexpected size for isNEONModifiedImm");
}
unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
}
SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
const APFloat &FPVal = CFP->getValueAPF();
// Prevent floating-point constants from using literal loads
// when execute-only is enabled.
if (ST->genExecuteOnly()) {
APInt INTVal = FPVal.bitcastToAPInt();
SDLoc DL(CFP);
if (IsDouble) {
SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
if (!ST->isLittle())
std::swap(Lo, Hi);
return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
} else {
return DAG.getConstant(INTVal, DL, MVT::i32);
}
}
if (!ST->hasVFP3())
return SDValue();
// Use the default (constant pool) lowering for double constants when we have
// an SP-only FPU
if (IsDouble && Subtarget->isFPOnlySP())
return SDValue();
// Try splatting with a VMOV.f32...
int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
if (ImmVal != -1) {
if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
// We have code in place to select a valid ConstantFP already, no need to
// do any mangling.
return Op;
}
// It's a float and we are trying to use NEON operations where
// possible. Lower it to a splat followed by an extract.
SDLoc DL(Op);
SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
NewVal);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
DAG.getConstant(0, DL, MVT::i32));
}
// The rest of our options are NEON only, make sure that's allowed before
// proceeding..
if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
return SDValue();
EVT VMovVT;
uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
// It wouldn't really be worth bothering for doubles except for one very
// important value, which does happen to match: 0.0. So make sure we don't do
// anything stupid.
if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
return SDValue();
// Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
VMovVT, false, VMOVModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
NewVal);
if (IsDouble)
return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
// It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
DAG.getConstant(0, DL, MVT::i32));
}
// Finally, try a VMVN.i32
NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
false, VMVNModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
if (IsDouble)
return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
// It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
DAG.getConstant(0, DL, MVT::i32));
}
return SDValue();
}
// check if an VEXT instruction can handle the shuffle mask when the
// vector sources of the shuffle are the same.
static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
// Assume that the first shuffle index is not UNDEF. Fail if it is.
if (M[0] < 0)
return false;
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
// element. The other shuffle indices must be the successive elements after
// the first one.
unsigned ExpectedElt = Imm;
for (unsigned i = 1; i < NumElts; ++i) {
// Increment the expected index. If it wraps around, just follow it
// back to index zero and keep going.
++ExpectedElt;
if (ExpectedElt == NumElts)
ExpectedElt = 0;
if (M[i] < 0) continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
return true;
}
static bool isVEXTMask(ArrayRef<int> M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
ReverseVEXT = false;
// Assume that the first shuffle index is not UNDEF. Fail if it is.
if (M[0] < 0)
return false;
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
// element. The other shuffle indices must be the successive elements after
// the first one.
unsigned ExpectedElt = Imm;
for (unsigned i = 1; i < NumElts; ++i) {
// Increment the expected index. If it wraps around, it may still be
// a VEXT but the source vectors must be swapped.
ExpectedElt += 1;
if (ExpectedElt == NumElts * 2) {
ExpectedElt = 0;
ReverseVEXT = true;
}
if (M[i] < 0) continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
// Adjust the index value if the source operands will be swapped.
if (ReverseVEXT)
Imm -= NumElts;
return true;
}
/// isVREVMask - Check if a vector shuffle corresponds to a VREV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64");
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
// If the first shuffle index is UNDEF, be optimistic.
if (M[0] < 0)
BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
if (M[i] < 0) continue; // ignore UNDEF indices
if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
return false;
}
return true;
}
static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
// We can handle <8 x i8> vector shuffles. If the index in the mask is out of
// range, then 0 is placed into the resulting vector. So pretty much any mask
// of 8 elements can work here.
return VT == MVT::v8i8 && M.size() == 8;
}
// Checks whether the shuffle mask represents a vector transpose (VTRN) by
// checking that pairs of elements in the shuffle mask represent the same index
// in each vector, incrementing the expected index by 2 at each step.
// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
// v2={e,f,g,h}
// WhichResult gives the offset for each element in the mask based on which
// of the two results it belongs to.
//
// The transpose can be represented either as:
// result1 = shufflevector v1, v2, result1_shuffle_mask
// result2 = shufflevector v1, v2, result2_shuffle_mask
// where v1/v2 and the shuffle masks have the same number of elements
// (here WhichResult (see below) indicates which result is being checked)
//
// or as:
// results = shufflevector v1, v2, shuffle_mask
// where both results are returned in one vector and the shuffle mask has twice
// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
// want to check the low half and high half of the shuffle mask as if it were
// the other case
static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
if (M.size() != NumElts && M.size() != NumElts*2)
return false;
// If the mask is twice as long as the input vector then we need to check the
// upper and lower parts of the mask with a matching value for WhichResult
// FIXME: A mask with only even values will be rejected in case the first
// element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
// M[0] is used to determine WhichResult
for (unsigned i = 0; i < M.size(); i += NumElts) {
if (M.size() == NumElts * 2)
WhichResult = i / NumElts;
else
WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
return false;
}
}
if (M.size() == NumElts*2)
WhichResult = 0;
return true;
}
/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
if (M.size() != NumElts && M.size() != NumElts*2)
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
if (M.size() == NumElts * 2)
WhichResult = i / NumElts;
else
WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
return false;
}
}
if (M.size() == NumElts*2)
WhichResult = 0;
return true;
}
// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
// that the mask elements are either all even and in steps of size 2 or all odd
// and in steps of size 2.
// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
// v2={e,f,g,h}
// Requires similar checks to that of isVTRNMask with
// respect the how results are returned.
static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
if (M.size() != NumElts && M.size() != NumElts*2)
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; ++j) {
if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
return false;
}
}
if (M.size() == NumElts*2)
WhichResult = 0;
// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
return true;
}
/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
if (M.size() != NumElts && M.size() != NumElts*2)
return false;
unsigned Half = NumElts / 2;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; j += Half) {
unsigned Idx = WhichResult;
for (unsigned k = 0; k < Half; ++k) {
int MIdx = M[i + j + k];
if (MIdx >= 0 && (unsigned) MIdx != Idx)
return false;
Idx += 2;
}
}
}
if (M.size() == NumElts*2)
WhichResult = 0;
// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
return true;
}
// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
// that pairs of elements of the shufflemask represent the same index in each
// vector incrementing sequentially through the vectors.
// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
// v2={e,f,g,h}
// Requires similar checks to that of isVTRNMask with respect the how results
// are returned.
static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
if (M.size() != NumElts && M.size() != NumElts*2)
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
return false;
Idx += 1;
}
}
if (M.size() == NumElts*2)
WhichResult = 0;
// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
return true;
}
/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
if (M.size() != NumElts && M.size() != NumElts*2)
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
return false;
Idx += 1;
}
}
if (M.size() == NumElts*2)
WhichResult = 0;
// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
return true;
}
/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
unsigned &WhichResult,
bool &isV_UNDEF) {
isV_UNDEF = false;
if (isVTRNMask(ShuffleMask, VT, WhichResult))
return ARMISD::VTRN;
if (isVUZPMask(ShuffleMask, VT, WhichResult))
return ARMISD::VUZP;
if (isVZIPMask(ShuffleMask, VT, WhichResult))
return ARMISD::VZIP;
isV_UNDEF = true;
if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
return ARMISD::VTRN;
if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
return ARMISD::VUZP;
if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
return ARMISD::VZIP;
return 0;
}
/// \return true if this is a reverse operation on an vector.
static bool isReverseMask(ArrayRef<int> M, EVT VT) {
unsigned NumElts = VT.getVectorNumElements();
// Make sure the mask has the right size.
if (NumElts != M.size())
return false;
// Look for <15, ..., 3, -1, 1, 0>.
for (unsigned i = 0; i != NumElts; ++i)
if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
return false;
return true;
}
// If N is an integer constant that can be moved into a register in one
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
const ARMSubtarget *ST, const SDLoc &dl) {
uint64_t Val;
if (!isa<ConstantSDNode>(N))
return SDValue();
Val = cast<ConstantSDNode>(N)->getZExtValue();
if (ST->isThumb1Only()) {
if (Val <= 255 || ~Val <= 255)
return DAG.getConstant(Val, dl, MVT::i32);
} else {
if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
return DAG.getConstant(Val, dl, MVT::i32);
}
return SDValue();
}
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
SDLoc dl(Op);
EVT VT = Op.getValueType();
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatUndef.isAllOnesValue())
return DAG.getUNDEF(VT);
if (SplatBitSize <= 64) {
// Check if an immediate VMOV works.
EVT VmovVT;
SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VmovVT, VT.is128BitVector(),
VMOVModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
}
// Try an immediate VMVN.
uint64_t NegatedImm = (~SplatBits).getZExtValue();
Val = isNEONModifiedImm(NegatedImm,
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VmovVT, VT.is128BitVector(),
VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
}
// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
int ImmVal = ARM_AM::getFP32Imm(SplatBits);
if (ImmVal != -1) {
SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
}
}
}
}
// Scan through the operands to see if only one value is used.
//
// As an optimisation, even if more than one value is used it may be more
// profitable to splat with one value then change some lanes.
//
// Heuristically we decide to do this if the vector has a "dominant" value,
// defined as splatted to more than half of the lanes.
unsigned NumElts = VT.getVectorNumElements();
bool isOnlyLowElement = true;
bool usesOnlyOneValue = true;
bool hasDominantValue = false;
bool isConstant = true;
// Map of the number of times a particular SDValue appears in the
// element list.
DenseMap<SDValue, unsigned> ValueCounts;
SDValue Value;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.isUndef())
continue;
if (i > 0)
isOnlyLowElement = false;
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
isConstant = false;
ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
// Is this value dominant? (takes up more than half of the lanes)
if (++Count > (NumElts / 2)) {
hasDominantValue = true;
Value = V;
}
}
if (ValueCounts.size() != 1)
usesOnlyOneValue = false;
if (!Value.getNode() && !ValueCounts.empty())
Value = ValueCounts.begin()->first;
if (ValueCounts.empty())
return DAG.getUNDEF(VT);
// Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
// Keep going if we are hitting this case.
if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
unsigned EltSize = VT.getScalarSizeInBits();
// Use VDUP for non-constant splats. For f32 constant splats, reduce to
// i32 and try again.
if (hasDominantValue && EltSize <= 32) {
if (!isConstant) {
SDValue N;
// If we are VDUPing a value that comes directly from a vector, that will
// cause an unnecessary move to and from a GPR, where instead we could
// just use VDUPLANE. We can only do this if the lane being extracted
// is at a constant index, as the VDUP from lane instructions only have
// constant-index forms.
ConstantSDNode *constIndex;
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
(constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
// We need to create a new undef vector to use for the VDUPLANE if the
// size of the vector from which we get the value is different than the
// size of the vector that we need to create. We will insert the element
// such that the register coalescer will remove unnecessary copies.
if (VT != Value->getOperand(0).getValueType()) {
unsigned index = constIndex->getAPIntValue().getLimitedValue() %
VT.getVectorNumElements();
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
Value, DAG.getConstant(index, dl, MVT::i32)),
DAG.getConstant(index, dl, MVT::i32));
} else
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
Value->getOperand(0), Value->getOperand(1));
} else
N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
if (!usesOnlyOneValue) {
// The dominant value was splatted as 'N', but we now have to insert
// all differing elements.
for (unsigned I = 0; I < NumElts; ++I) {
if (Op.getOperand(I) == Value)
continue;
SmallVector<SDValue, 3> Ops;
Ops.push_back(N);
Ops.push_back(Op.getOperand(I));
Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
}
}
return N;
}
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
Val = LowerBUILD_VECTOR(Val, DAG, ST);
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
if (usesOnlyOneValue) {
SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
if (isConstant && Val.getNode())
return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
}
}
// If all elements are constants and the case above didn't get hit, fall back
// to the default expansion, which will generate a load from the constant
// pool.
if (isConstant)
return SDValue();
// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
if (NumElts >= 4) {
SDValue shuffle = ReconstructShuffle(Op, DAG);
if (shuffle != SDValue())
return shuffle;
}
if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
// If we haven't found an efficient lowering, try splitting a 128-bit vector
// into two 64-bit vectors; we might discover a better way to lower it.
SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
EVT ExtVT = VT.getVectorElementType();
EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
SDValue Lower =
DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
if (Lower.getOpcode() == ISD::BUILD_VECTOR)
Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
SDValue Upper = DAG.getBuildVector(
HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
if (Upper.getOpcode() == ISD::BUILD_VECTOR)
Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
if (Lower && Upper)
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
}
// Vectors with 32- or 64-bit elements can be built by directly assigning
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
// will be legalized.
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
// registers are defined to use, and since i64 is not legal.
EVT EltVT = EVT::getFloatingPointVT(EltSize);
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
// worse. For a vector with one or two non-undef values, that's
// scalar_to_vector for the elements followed by a shuffle (provided the
// shuffle is valid for the target) and materialization element by element
// on the stack followed by a load for everything else.
if (!isConstant && !usesOnlyOneValue) {
SDValue Vec = DAG.getUNDEF(VT);
for (unsigned i = 0 ; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.isUndef())
continue;
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
}
return Vec;
}
return SDValue();
}
// Gather data to see if the operation can be modelled as a
// shuffle in combination with VEXTs.
SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
struct ShuffleSourceInfo {
SDValue Vec;
unsigned MinElt = std::numeric_limits<unsigned>::max();
unsigned MaxElt = 0;
// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
// be compatible with the shuffle we intend to construct. As a result
// ShuffleVec will be some sliding window into the original Vec.
SDValue ShuffleVec;
// Code should guarantee that element i in Vec starts at element "WindowBase
// + i * WindowScale in ShuffleVec".
int WindowBase = 0;
int WindowScale = 1;
ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
// node.
SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.isUndef())
continue;
else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
// A shuffle can only come from building a vector from various
// elements of other vectors.
return SDValue();
} else if (!isa<ConstantSDNode>(V.getOperand(1))) {
// Furthermore, shuffles require a constant mask, whereas extractelts
// accept variable indices.
return SDValue();
}
// Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
auto Source = llvm::find(Sources, SourceVec);
if (Source == Sources.end())
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
// Update the minimum and maximum lane number seen.
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
Source->MinElt = std::min(Source->MinElt, EltNo);
Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
// Currently only do something sane when at most two source vectors
// are involved.
if (Sources.size() > 2)
return SDValue();
// Find out the smallest element size among result and two sources, and use
// it as element size to build the shuffle_vector.
EVT SmallestEltTy = VT.getVectorElementType();
for (auto &Source : Sources) {
EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
if (SrcEltTy.bitsLT(SmallestEltTy))
SmallestEltTy = SrcEltTy;
}
unsigned ResMultiplier =
VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
// If the source vector is too wide or too narrow, we may nevertheless be able
// to construct a compatible shuffle either by concatenating it with UNDEF or
// extracting a suitable range of elements.
for (auto &Src : Sources) {
EVT SrcVT = Src.ShuffleVec.getValueType();
if (SrcVT.getSizeInBits() == VT.getSizeInBits())
continue;
// This stage of the search produces a source with the same element type as
// the original, but with a total width matching the BUILD_VECTOR output.
EVT EltVT = SrcVT.getVectorElementType();
unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
return SDValue();
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
Src.ShuffleVec =
DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
DAG.getUNDEF(Src.ShuffleVec.getValueType()));
continue;
}
if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
return SDValue();
if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
// Span too large for a VEXT to cope
return SDValue();
}
if (Src.MinElt >= NumSrcElts) {
// The extraction can just take the second half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(NumSrcElts, dl, MVT::i32));
Src.WindowBase = -NumSrcElts;
} else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(0, dl, MVT::i32));
} else {
// An actual VEXT is needed
SDValue VEXTSrc1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(0, dl, MVT::i32));
SDValue VEXTSrc2 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(NumSrcElts, dl, MVT::i32));
Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
VEXTSrc2,
DAG.getConstant(Src.MinElt, dl, MVT::i32));
Src.WindowBase = -Src.MinElt;
}
}
// Another possible incompatibility occurs from the vector element types. We
// can fix this by bitcasting the source vectors to the same type we intend
// for the shuffle.
for (auto &Src : Sources) {
EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
if (SrcEltTy == SmallestEltTy)
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
Src.WindowBase *= Src.WindowScale;
}
// Final sanity check before we try to actually produce a shuffle.
DEBUG(
for (auto Src : Sources)
assert(Src.ShuffleVec.getValueType() == ShuffleVT);
);
// The stars all align, our next step is to produce the mask for the shuffle.
SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
if (Entry.isUndef())
continue;
auto Src = llvm::find(Sources, Entry.getOperand(0));
int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
// This source is expected to fill ResMultiplier lanes of the final shuffle,
// starting at the appropriate offset.
int *LaneMask = &Mask[i * ResMultiplier];
int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
ExtractBase += NumElts * (Src - Sources.begin());
for (int j = 0; j < LanesDefined; ++j)
LaneMask[j] = ExtractBase + j;
}
// Final check before we try to produce nonsense...
if (!isShuffleMaskLegal(Mask, ShuffleVT))
return SDValue();
// We can't handle more than two sources. This should have already
// been checked before this point.
assert(Sources.size() <= 2 && "Too many sources!");
SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
for (unsigned i = 0; i < Sources.size(); ++i)
ShuffleOps[i] = Sources[i].ShuffleVec;
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
ShuffleOps[1], Mask);
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
bool
ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
if (VT.getVectorNumElements() == 4 &&
(VT.is128BitVector() || VT.is64BitVector())) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (M[i] < 0)
PFIndexes[i] = 8;
else
PFIndexes[i] = M[i];
}
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
if (Cost <= 4)
return true;
}
bool ReverseVEXT, isV_UNDEF;
unsigned Imm, WhichResult;
unsigned EltSize = VT.getScalarSizeInBits();
return (EltSize >= 32 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isVREVMask(M, VT, 64) ||
isVREVMask(M, VT, 32) ||
isVREVMask(M, VT, 16) ||
isVEXTMask(M, VT, ReverseVEXT, Imm) ||
isVTBLMask(M, VT) ||
isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
const SDLoc &dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
enum {
OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
OP_VREV,
OP_VDUP0,
OP_VDUP1,
OP_VDUP2,
OP_VDUP3,
OP_VEXT1,
OP_VEXT2,
OP_VEXT3,
OP_VUZPL, // VUZP, left result
OP_VUZPR, // VUZP, right result
OP_VZIPL, // VZIP, left result
OP_VZIPR, // VZIP, right result
OP_VTRNL, // VTRN, left result
OP_VTRNR // VTRN, right result
};
if (OpNum == OP_COPY) {
if (LHSID == (1*9+2)*9+3) return LHS;
assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
return RHS;
}
SDValue OpLHS, OpRHS;
OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
EVT VT = OpLHS.getValueType();
switch (OpNum) {
default: llvm_unreachable("Unknown shuffle opcode!");
case OP_VREV:
// VREV divides the vector in half and swaps within the half.
if (VT.getVectorElementType() == MVT::i32 ||
VT.getVectorElementType() == MVT::f32)
return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
// vrev <4 x i16> -> VREV32
if (VT.getVectorElementType() == MVT::i16)
return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
// vrev <4 x i8> -> VREV16
assert(VT.getVectorElementType() == MVT::i8);
return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
case OP_VDUP0:
case OP_VDUP1:
case OP_VDUP2:
case OP_VDUP3:
return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
case OP_VEXT1:
case OP_VEXT2:
case OP_VEXT3:
return DAG.getNode(ARMISD::VEXT, dl, VT,
OpLHS, OpRHS,
DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
case OP_VUZPL:
case OP_VUZPR:
return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
case OP_VZIPL:
case OP_VZIPR:
return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
case OP_VTRNL:
case OP_VTRNR:
return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
}
}
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
ArrayRef<int> ShuffleMask,
SelectionDAG &DAG) {
// Check to see if we can use the VTBL instruction.
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc DL(Op);
SmallVector<SDValue, 8> VTBLMask;
for (ArrayRef<int>::iterator
I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
if (V2.getNode()->isUndef())
return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
}
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
SelectionDAG &DAG) {
SDLoc DL(Op);
SDValue OpLHS = Op.getOperand(0);
EVT VT = OpLHS.getValueType();
assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
"Expect an v8i16/v16i8 type");
OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
// For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
// extract the first 8 bytes into the top double word and the last 8 bytes
// into the bottom double word. The v8i16 case is similar.
unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
DAG.getConstant(ExtractNum, DL, MVT::i32));
}
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc dl(Op);
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
// Convert shuffles that are directly supported on NEON to target-specific
// DAG nodes, instead of keeping them as shuffles and matching them again
// during code selection. This is more efficient and avoids the possibility
// of inconsistencies between legalization and selection.
// FIXME: floating-point vectors should be canonicalized to integer vectors
// of the same time so that they get CSEd properly.
ArrayRef<int> ShuffleMask = SVN->getMask();
unsigned EltSize = VT.getScalarSizeInBits();
if (EltSize <= 32) {
if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1) Lane = 0;
// Test if V1 is a SCALAR_TO_VECTOR.
if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
}
// Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
// (and probably will turn into a SCALAR_TO_VECTOR once legalization
// reaches it).
if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
!isa<ConstantSDNode>(V1.getOperand(0))) {
bool IsScalarToVector = true;
for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
if (!V1.getOperand(i).isUndef()) {
IsScalarToVector = false;
break;
}
if (IsScalarToVector)
return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
}
return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
DAG.getConstant(Lane, dl, MVT::i32));
}
bool ReverseVEXT;
unsigned Imm;
if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
if (ReverseVEXT)
std::swap(V1, V2);
return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
DAG.getConstant(Imm, dl, MVT::i32));
}
if (isVREVMask(ShuffleMask, VT, 64))
return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
if (isVREVMask(ShuffleMask, VT, 32))
return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
if (isVREVMask(ShuffleMask, VT, 16))
return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
DAG.getConstant(Imm, dl, MVT::i32));
}
// Check for Neon shuffles that modify both input vectors in place.
// If both results are used, i.e., if there are two shuffles with the same
// source operands and with masks corresponding to both results of one of
// these operations, DAG memoization will ensure that a single node is
// used for both shuffles.
unsigned WhichResult;
bool isV_UNDEF;
if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
ShuffleMask, VT, WhichResult, isV_UNDEF)) {
if (isV_UNDEF)
V2 = V1;
return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
.getValue(WhichResult);
}
// Also check for these shuffles through CONCAT_VECTORS: we canonicalize
// shuffles that produce a result larger than their operands with:
// shuffle(concat(v1, undef), concat(v2, undef))
// ->
// shuffle(concat(v1, v2), undef)
// because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
//
// This is useful in the general case, but there are special cases where
// native shuffles produce larger results: the two-result ops.
//
// Look through the concat when lowering them:
// shuffle(concat(v1, v2), undef)
// ->
// concat(VZIP(v1, v2):0, :1)
//
if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
SDValue SubV1 = V1->getOperand(0);
SDValue SubV2 = V1->getOperand(1);
EVT SubVT = SubV1.getValueType();
// We expect these to have been canonicalized to -1.
assert(llvm::all_of(ShuffleMask, [&](int i) {
return i < (int)VT.getVectorNumElements();
}) && "Unexpected shuffle index into UNDEF operand!");
if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
if (isV_UNDEF)
SubV2 = SubV1;
assert((WhichResult == 0) &&
"In-place shuffle of concat can only have one result!");
SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
SubV1, SubV2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
Res.getValue(1));
}
}
}
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
unsigned NumElts = VT.getVectorNumElements();
if (NumElts == 4) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (ShuffleMask[i] < 0)
PFIndexes[i] = 8;
else
PFIndexes[i] = ShuffleMask[i];
}
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
if (Cost <= 4)
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
// Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
// registers are defined to use, and since i64 is not legal.
EVT EltVT = EVT::getFloatingPointVT(EltSize);
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i) {
if (ShuffleMask[i] < 0)
Ops.push_back(DAG.getUNDEF(EltVT));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
ShuffleMask[i] < (int)NumElts ? V1 : V2,
DAG.getConstant(ShuffleMask[i] & (NumElts-1),
dl, MVT::i32)));
}
SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
if (VT == MVT::v8i8)
if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
return NewOp;
return SDValue();
}
static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// INSERT_VECTOR_ELT is legal only for immediate indexes.
SDValue Lane = Op.getOperand(2);
if (!isa<ConstantSDNode>(Lane))
return SDValue();
return Op;
}
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// EXTRACT_VECTOR_ELT is legal only for immediate indexes.
SDValue Lane = Op.getOperand(1);
if (!isa<ConstantSDNode>(Lane))
return SDValue();
SDValue Vec = Op.getOperand(0);
if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
SDLoc dl(Op);
return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
}
return Op;
}
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
// The only time a CONCAT_VECTORS operation can have legal types is when
// two 64-bit vectors are concatenated to a 128-bit vector.
assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
"unexpected CONCAT_VECTORS");
SDLoc dl(Op);
SDValue Val = DAG.getUNDEF(MVT::v2f64);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (!Op0.isUndef())
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
DAG.getIntPtrConstant(0, dl));
if (!Op1.isUndef())
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
DAG.getIntPtrConstant(1, dl));
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
}
/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
/// element has been zero/sign-extended, depending on the isSigned parameter,
/// from an integer type half its size.
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
bool isSigned) {
// A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
EVT VT = N->getValueType(0);
if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
SDNode *BVN = N->getOperand(0).getNode();
if (BVN->getValueType(0) != MVT::v4i32 ||
BVN->getOpcode() != ISD::BUILD_VECTOR)
return false;
unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
unsigned HiElt = 1 - LoElt;
ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
return false;
if (isSigned) {
if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
return true;
} else {
if (Hi0->isNullValue() && Hi1->isNullValue())
return true;
}
return false;
}
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDNode *Elt = N->getOperand(i).getNode();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
unsigned EltSize = VT.getScalarSizeInBits();
unsigned HalfSize = EltSize / 2;
if (isSigned) {
if (!isIntN(HalfSize, C->getSExtValue()))
return false;
} else {
if (!isUIntN(HalfSize, C->getZExtValue()))
return false;
}
continue;
}
return false;
}
return true;
}
/// isSignExtended - Check if a node is a vector value that is sign-extended
/// or a constant BUILD_VECTOR with sign-extended elements.
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
return true;
if (isExtendedBUILD_VECTOR(N, DAG, true))
return true;
return false;
}
/// isZeroExtended - Check if a node is a vector value that is zero-extended
/// or a constant BUILD_VECTOR with zero-extended elements.
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
return true;
if (isExtendedBUILD_VECTOR(N, DAG, false))
return true;
return false;
}
static EVT getExtensionTo64Bits(const EVT &OrigVT) {
if (OrigVT.getSizeInBits() >= 64)
return OrigVT;
assert(OrigVT.isSimple() && "Expecting a simple value type");
MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
switch (OrigSimpleTy) {
default: llvm_unreachable("Unexpected Vector Type");
case MVT::v2i8:
case MVT::v2i16:
return MVT::v2i32;
case MVT::v4i8:
return MVT::v4i16;
}
}
/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
/// We insert the required extension here to get the vector to fill a D register.
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
const EVT &OrigTy,
const EVT &ExtTy,
unsigned ExtOpcode) {
// The vector originally had a size of OrigTy. It was then extended to ExtTy.
// We expect the ExtTy to be 128-bits total. If the OrigTy is less than
// 64-bits we need to insert a new extension so that it will be 64-bits.
assert(ExtTy.is128BitVector() && "Unexpected extension size");
if (OrigTy.getSizeInBits() >= 64)
return N;
// Must extend size to at least 64 bits to be used as an operand for VMULL.
EVT NewVT = getExtensionTo64Bits(OrigTy);
return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}
/// SkipLoadExtensionForVMULL - return a load of the original vector size that
/// does not do any sign/zero extension. If the original vector is less
/// than 64 bits, an appropriate extension will be added after the load to
/// reach a total size of 64 bits. We have to add the extension separately
/// because ARM does not have a sign/zero extending load for vectors.
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
// The load already has the right type.
if (ExtendedTy == LD->getMemoryVT())
return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
LD->getBasePtr(), LD->getPointerInfo(),
LD->getAlignment(), LD->getMemOperand()->getFlags());
// We need to create a zextload/sextload. We cannot just create a load
// followed by a zext/zext node because LowerMUL is also run during normal
// operation legalization where we can't create illegal types.
return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
LD->getMemoryVT(), LD->getAlignment(),
LD->getMemOperand()->getFlags());
}
/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
/// extending load, or BUILD_VECTOR with extended elements, return the
/// unextended value. The unextended vector should be 64 bits so that it can
/// be used as an operand to a VMULL instruction. If the original vector size
/// before extension is less than 64 bits we add a an extension to resize
/// the vector to 64 bits.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
N->getOperand(0)->getValueType(0),
N->getValueType(0),
N->getOpcode());
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
"Expected extending load");
SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
SDValue extLoad =
DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
return newLoad;
}
// Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
// have been legalized as a BITCAST from v4i32.
if (N->getOpcode() == ISD::BITCAST) {
SDNode *BVN = N->getOperand(0).getNode();
assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
return DAG.getBuildVector(
MVT::v2i32, SDLoc(N),
{BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
}
// Construct a new BUILD_VECTOR with elements truncated to half the size.
assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
EVT VT = N->getValueType(0);
unsigned EltSize = VT.getScalarSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
MVT TruncVT = MVT::getIntegerVT(EltSize);
SmallVector<SDValue, 8> Ops;
SDLoc dl(N);
for (unsigned i = 0; i != NumElts; ++i) {
ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
const APInt &CInt = C->getAPIntValue();
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N->getOpcode();
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDNode *N0 = N->getOperand(0).getNode();
SDNode *N1 = N->getOperand(1).getNode();
return N0->hasOneUse() && N1->hasOneUse() &&
isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
}
return false;
}
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N->getOpcode();
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDNode *N0 = N->getOperand(0).getNode();
SDNode *N1 = N->getOperand(1).getNode();
return N0->hasOneUse() && N1->hasOneUse() &&
isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
}
return false;
}
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
EVT VT = Op.getValueType();
assert(VT.is128BitVector() && VT.isInteger() &&
"unexpected type for custom-lowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
bool isMLA = false;
bool isN0SExt = isSignExtended(N0, DAG);
bool isN1SExt = isSignExtended(N1, DAG);
if (isN0SExt && isN1SExt)
NewOpc = ARMISD::VMULLs;
else {
bool isN0ZExt = isZeroExtended(N0, DAG);
bool isN1ZExt = isZeroExtended(N1, DAG);
if (isN0ZExt && isN1ZExt)
NewOpc = ARMISD::VMULLu;
else if (isN1SExt || isN1ZExt) {
// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
// into (s/zext A * s/zext C) + (s/zext B * s/zext C)
if (isN1SExt && isAddSubSExt(N0, DAG)) {
NewOpc = ARMISD::VMULLs;
isMLA = true;
} else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
NewOpc = ARMISD::VMULLu;
isMLA = true;
} else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
std::swap(N0, N1);
NewOpc = ARMISD::VMULLu;
isMLA = true;
}
}
if (!NewOpc) {
if (VT == MVT::v2i64)
// Fall through to expand this. It is not legal.
return SDValue();
else
// Other vector multiplications are legal.
return Op;
}
}
// Legalize to a VMULL instruction.
SDLoc DL(Op);
SDValue Op0;
SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
if (!isMLA) {
Op0 = SkipExtensionForVMULL(N0, DAG);
assert(Op0.getValueType().is64BitVector() &&
Op1.getValueType().is64BitVector() &&
"unexpected types for extended operands to VMULL");
return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
}
// Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
// isel lowering to take advantage of no-stall back to back vmul + vmla.
// vmull q0, d4, d6
// vmlal q0, d5, d6
// is faster than
// vaddl q0, d4, d5
// vmovl q1, d6
// vmul q0, q0, q1
SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
EVT Op1VT = Op1.getValueType();
return DAG.getNode(N0->getOpcode(), DL, VT,
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
SelectionDAG &DAG) {
// TODO: Should this propagate fast-math-flags?
// Convert to float
// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
// float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
// Get reciprocal estimate.
// float4 recip = vrecpeq_f32(yf);
Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
Y);
// Because char has a smaller range than uchar, we can actually get away
// without any newton steps. This requires that we use a weird bias
// of 0xb000, however (again, this has been exhaustively tested).
// float4 result = as_float4(as_int4(xf*recip) + 0xb000);
X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
// Convert back to short.
X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
return X;
}
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
SelectionDAG &DAG) {
// TODO: Should this propagate fast-math-flags?
SDValue N2;
// Convert to float.
// float4 yf = vcvt_f32_s32(vmovl_s16(y));
// float4 xf = vcvt_f32_s32(vmovl_s16(x));
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
// Use reciprocal estimate and one refinement step.
// float4 recip = vrecpeq_f32(yf);
// recip *= vrecpsq_f32(yf, recip);
N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
N1);
N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
N1, N2);
N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
// Because short has a smaller range than ushort, we can actually get away
// with only a single newton step. This requires that we use a weird bias
// of 89, however (again, this has been exhaustively tested).
// float4 result = as_float4(as_int4(xf*recip) + 0x89);
N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
// Convert back to integer and return.
// return vmovn_s32(vcvt_s32_f32(result));
N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
return N0;
}
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::SDIV");
SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2, N3;
if (VT == MVT::v8i8) {
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
DAG.getIntPtrConstant(4, dl));
N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
DAG.getIntPtrConstant(4, dl));
N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
DAG.getIntPtrConstant(0, dl));
N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
DAG.getIntPtrConstant(0, dl));
N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
N0 = LowerCONCAT_VECTORS(N0, DAG);
N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
return N0;
}
return LowerSDIV_v4i16(N0, N1, dl, DAG);
}
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
// TODO: Should this propagate fast-math-flags?
EVT VT = Op.getValueType();
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::UDIV");
SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2, N3;
if (VT == MVT::v8i8) {
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
DAG.getIntPtrConstant(4, dl));
N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
DAG.getIntPtrConstant(4, dl));
N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
DAG.getIntPtrConstant(0, dl));
N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
DAG.getIntPtrConstant(0, dl));
N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
N0 = LowerCONCAT_VECTORS(N0, DAG);
N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
MVT::i32),
N0);
return N0;
}
// v4i16 sdiv ... Convert to float.
// float4 yf = vcvt_f32_s32(vmovl_u16(y));
// float4 xf = vcvt_f32_s32(vmovl_u16(x));
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
// Use reciprocal estimate and two refinement steps.
// float4 recip = vrecpeq_f32(yf);
// recip *= vrecpsq_f32(yf, recip);
// recip *= vrecpsq_f32(yf, recip);
N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
BN1);
N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
BN1, N2);
N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
BN1, N2);
N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
// Simply multiplying by the reciprocal estimate can leave us a few ulps
// too low, so we add 2 ulps (exhaustive testing shows that this is enough,
// and that it will never cause us to return an answer too large).
// float4 result = as_float4(as_int4(xf*recip) + 2);
N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
N1 = DAG.getConstant(2, dl, MVT::v4i32);
N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
// Convert back to integer and return.
// return vmovn_u32(vcvt_s32_f32(result));
N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
return N0;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getNode()->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
default: llvm_unreachable("Invalid code");
case ISD::ADDC: Opc = ARMISD::ADDC; break;
case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
case ISD::SUBC: Opc = ARMISD::SUBC; break;
case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
}
if (!ExtraOp)
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1));
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1), Op.getOperand(2));
}
SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin());
// For iOS, we want to call an alternative entry point: __sincos_stret,
// return values are passed via sret.
SDLoc dl(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
auto PtrVT = getPointerTy(DAG.getDataLayout());
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
Type *RetTy = StructType::get(ArgTy, ArgTy);
auto &DL = DAG.getDataLayout();
ArgListTy Args;
bool ShouldUseSRet = Subtarget->isAPCS_ABI();
SDValue SRet;
if (ShouldUseSRet) {
// Create stack object for sret.
const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
ArgListEntry Entry;
Entry.Node = SRet;
Entry.Ty = RetTy->getPointerTo();
Entry.IsSExt = false;
Entry.IsZExt = false;
Entry.IsSRet = true;
Args.push_back(Entry);
RetTy = Type::getVoidTy(*DAG.getContext());
}
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
const char *LibcallName =
(ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
RTLIB::Libcall LC =
(ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
CallingConv::ID CC = getLibcallCallingConv(LC);
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
.setCallee(CC, RetTy, Callee, std::move(Args))
.setDiscardResult(ShouldUseSRet);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
if (!ShouldUseSRet)
return CallResult.first;
SDValue LoadSin =
DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
// Address of cos field.
SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
SDValue LoadCos =
DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
LoadSin.getValue(0), LoadCos.getValue(0));
}
SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
bool Signed,
SDValue &Chain) const {
EVT VT = Op.getValueType();
assert((VT == MVT::i32 || VT == MVT::i64) &&
"unexpected type for custom lowering DIV");
SDLoc dl(Op);
const auto &DL = DAG.getDataLayout();
const auto &TLI = DAG.getTargetLoweringInfo();
const char *Name = nullptr;
if (Signed)
Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
else
Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
ARMTargetLowering::ArgListTy Args;
for (auto AI : {1, 0}) {
ArgListEntry Arg;
Arg.Node = Op.getOperand(AI);
Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
Args.push_back(Arg);
}
CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Chain)
.setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
ES, std::move(Args));
return LowerCallTo(CLI).first;
}
SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
bool Signed) const {
assert(Op.getValueType() == MVT::i32 &&
"unexpected type for custom lowering DIV");
SDLoc dl(Op);
SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
DAG.getEntryNode(), Op.getOperand(1));
return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
}
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
SDLoc DL(N);
SDValue Op = N->getOperand(1);
if (N->getValueType(0) == MVT::i32)
return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
DAG.getConstant(0, DL, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
DAG.getConstant(1, DL, MVT::i32));
return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
}
void ARMTargetLowering::ExpandDIV_Windows(
SDValue Op, SelectionDAG &DAG, bool Signed,
SmallVectorImpl<SDValue> &Results) const {
const auto &DL = DAG.getDataLayout();
const auto &TLI = DAG.getTargetLoweringInfo();
assert(Op.getValueType() == MVT::i64 &&
"unexpected type for custom lowering DIV");
SDLoc dl(Op);
SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
Results.push_back(Lower);
Results.push_back(Upper);
}
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
// Acquire/Release load/store is not legal for targets without a dmb or
// equivalent available.
return SDValue();
// Monotonic load/store is legal for all targets.
return Op;
}
static void ReplaceREADCYCLECOUNTER(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
SDLoc DL(N);
// Under Power Management extensions, the cycle-count is:
// mrc p15, #0, <Rt>, c9, c13, #0
SDValue Ops[] = { N->getOperand(0), // Chain
DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
DAG.getConstant(15, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(9, DL, MVT::i32),
DAG.getConstant(13, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32)
};
SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
DAG.getVTList(MVT::i32, MVT::Other), Ops);
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
DAG.getConstant(0, DL, MVT::i32)));
Results.push_back(Cycles32.getValue(1));
}
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
SDValue VHi = DAG.getAnyExtOrTrunc(
DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
dl, MVT::i32);
bool isBigEndian = DAG.getDataLayout().isBigEndian();
if (isBigEndian)
std::swap (VLo, VHi);
SDValue RegClass =
DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
return SDValue(
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
}
static void ReplaceCMP_SWAP_64Results(SDNode *N,
SmallVectorImpl<SDValue> & Results,
SelectionDAG &DAG) {
assert(N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal");
SDValue Ops[] = {N->getOperand(1),
createGPRPairNode(DAG, N->getOperand(2)),
createGPRPairNode(DAG, N->getOperand(3)),
N->getOperand(0)};
SDNode *CmpSwap = DAG.getMachineNode(
ARM::CMP_SWAP_64, SDLoc(N),
DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
MachineFunction &MF = DAG.getMachineFunction();
MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
bool isBigEndian = DAG.getDataLayout().isBigEndian();
Results.push_back(
DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
Results.push_back(
DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
Results.push_back(SDValue(CmpSwap, 2));
}
static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
SelectionDAG &DAG) {
const auto &TLI = DAG.getTargetLoweringInfo();
assert(Subtarget.getTargetTriple().isOSMSVCRT() &&
"Custom lowering is MSVCRT specific!");
SDLoc dl(Op);
SDValue Val = Op.getOperand(0);
MVT Ty = Val->getSimpleValueType(0);
SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
TLI.getPointerTy(DAG.getDataLayout()));
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Val;
Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
Entry.IsZExt = true;
Args.push_back(Entry);
Entry.Node = Exponent;
Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
Entry.IsZExt = true;
Args.push_back(Entry);
Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
// In the in-chain to the call is the entry node If we are emitting a
// tailcall, the chain will be mutated if the node has a non-entry input
// chain.
SDValue InChain = DAG.getEntryNode();
SDValue TCChain = InChain;
const auto *F = DAG.getMachineFunction().getFunction();
bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
F->getReturnType() == LCRTy;
if (IsTC)
InChain = TCChain;
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(InChain)
.setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
.setTailCall(IsTC);
std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
// Return the chain (the DAG root) if it is a tail call
return !CI.second.getNode() ? DAG.getRoot() : CI.first;
}
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
case ISD::SHL:
case ISD::SRL:
case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
case ISD::SREM: return LowerREM(Op.getNode(), DAG);
case ISD::UREM: return LowerREM(Op.getNode(), DAG);
case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
case ISD::SETCCE: return LowerSETCCE(Op, DAG);
case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV:
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ true);
return LowerSDIV(Op, DAG);
case ISD::UDIV:
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ false);
return LowerUDIV(Op, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
case ISD::USUBO:
return LowerXALUO(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
return LowerDYNAMIC_STACKALLOC(Op, DAG);
llvm_unreachable("Don't know how to custom lower this!");
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
}
}
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
unsigned Opc = 0;
if (IntNo == Intrinsic::arm_smlald)
Opc = ARMISD::SMLALD;
else if (IntNo == Intrinsic::arm_smlaldx)
Opc = ARMISD::SMLALDX;
else if (IntNo == Intrinsic::arm_smlsld)
Opc = ARMISD::SMLSLD;
else if (IntNo == Intrinsic::arm_smlsldx)
Opc = ARMISD::SMLSLDX;
else
return;
SDLoc dl(N);
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
N->getOperand(3),
DAG.getConstant(0, dl, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
N->getOperand(3),
DAG.getConstant(1, dl, MVT::i32));
SDValue LongMul = DAG.getNode(Opc, dl,
DAG.getVTList(MVT::i32, MVT::i32),
N->getOperand(1), N->getOperand(2),
Lo, Hi);
Results.push_back(LongMul.getValue(0));
Results.push_back(LongMul.getValue(1));
}
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDValue Res;
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom expand this!");
case ISD::READ_REGISTER:
ExpandREAD_REGISTER(N, Results, DAG);
break;
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
case ISD::SREM:
case ISD::UREM:
Res = LowerREM(N, DAG);
break;
case ISD::SDIVREM:
case ISD::UDIVREM:
Res = LowerDivRem(SDValue(N, 0), DAG);
assert(Res.getNumOperands() == 2 && "DivRem needs two values");
Results.push_back(Res.getValue(0));
Results.push_back(Res.getValue(1));
return;
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
case ISD::UDIV:
case ISD::SDIV:
assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
Results);
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_64Results(N, Results, DAG);
return;
case ISD::INTRINSIC_WO_CHAIN:
return ReplaceLongIntrinsic(N, Results, DAG);
}
if (Res.getNode())
Results.push_back(Res);
}
//===----------------------------------------------------------------------===//
// ARM Scheduler Hooks
//===----------------------------------------------------------------------===//
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB,
int FI) const {
assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported with SjLj");
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
MachineConstantPool *MCP = MF->getConstantPool();
ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
const Function *F = MF->getFunction();
bool isThumb = Subtarget->isThumb();
bool isThumb2 = Subtarget->isThumb2();
unsigned PCLabelId = AFI->createPICLabelUId();
unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
ARMConstantPoolValue *CPV =
ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
: &ARM::GPRRegClass;
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
MachineMemOperand::MOLoad, 4, 4);
MachineMemOperand *FIMMOSt =
MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
MachineMemOperand::MOStore, 4, 4);
// Load the address of the dispatch MBB into the jump buffer.
if (isThumb2) {
// Incoming value: jbuf
// ldr.n r5, LCPI1_1
// orr r5, r5, #1
// add r5, pc
// str r5, [$jbuf, #+4] ; &jbuf[1]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
.addConstantPoolIndex(CPI)
.addMemOperand(CPMMO)
.add(predOps(ARMCC::AL));
// Set the low bit because of thumb mode.
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
.addReg(NewVReg1, RegState::Kill)
.addImm(0x01)
.add(predOps(ARMCC::AL))
.add(condCodeOp());
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
.addReg(NewVReg2, RegState::Kill)
.addImm(PCLabelId);
BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
.addReg(NewVReg3, RegState::Kill)
.addFrameIndex(FI)
.addImm(36) // &jbuf[1] :: pc
.addMemOperand(FIMMOSt)
.add(predOps(ARMCC::AL));
} else if (isThumb) {
// Incoming value: jbuf
// ldr.n r1, LCPI1_4
// add r1, pc
// mov r2, #1
// orrs r1, r2
// add r2, $jbuf, #+4 ; &jbuf[1]
// str r1, [r2]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
.addConstantPoolIndex(CPI)
.addMemOperand(CPMMO)
.add(predOps(ARMCC::AL));
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
.addReg(NewVReg1, RegState::Kill)
.addImm(PCLabelId);
// Set the low bit because of thumb mode.
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
.addReg(ARM::CPSR, RegState::Define)
.addImm(1)
.add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
.addReg(ARM::CPSR, RegState::Define)
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3, RegState::Kill)
.add(predOps(ARMCC::AL));
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
.addFrameIndex(FI)
.addImm(36); // &jbuf[1] :: pc
BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
.addReg(NewVReg4, RegState::Kill)
.addReg(NewVReg5, RegState::Kill)
.addImm(0)
.addMemOperand(FIMMOSt)
.add(predOps(ARMCC::AL));
} else {
// Incoming value: jbuf
// ldr r1, LCPI1_1
// add r1, pc, r1
// str r1, [$jbuf, #+4] ; &jbuf[1]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
.addConstantPoolIndex(CPI)
.addImm(0)
.addMemOperand(CPMMO)
.add(predOps(ARMCC::AL));
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
.addReg(NewVReg1, RegState::Kill)
.addImm(PCLabelId)
.add(predOps(ARMCC::AL));
BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
.addReg(NewVReg2, RegState::Kill)
.addFrameIndex(FI)
.addImm(36) // &jbuf[1] :: pc
.addMemOperand(FIMMOSt)
.add(predOps(ARMCC::AL));
}
}
void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
MachineFrameInfo &MFI = MF->getFrameInfo();
int FI = MFI.getFunctionContextIndex();
const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
: &ARM::GPRnopcRegClass;
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
unsigned MaxCSNum = 0;
for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
++BB) {
if (!BB->isEHPad()) continue;
// FIXME: We should assert that the EH_LABEL is the first MI in the landing
// pad.
for (MachineBasicBlock::iterator
II = BB->begin(), IE = BB->end(); II != IE; ++II) {
if (!II->isEHLabel()) continue;
MCSymbol *Sym = II->getOperand(0).getMCSymbol();
if (!MF->hasCallSiteLandingPad(Sym)) continue;
SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
for (SmallVectorImpl<unsigned>::iterator
CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
CSI != CSE; ++CSI) {
CallSiteNumToLPad[*CSI].push_back(&*BB);
MaxCSNum = std::max(MaxCSNum, *CSI);
}
break;
}
}
// Get an ordered list of the machine basic blocks for the jump table.
std::vector<MachineBasicBlock*> LPadList;
SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
LPadList.reserve(CallSiteNumToLPad.size());
for (unsigned I = 1; I <= MaxCSNum; ++I) {
SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
for (SmallVectorImpl<MachineBasicBlock*>::iterator
II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
LPadList.push_back(*II);
InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
}
}
assert(!LPadList.empty() &&
"No landing pad destinations for the dispatch jump table!");
// Create the jump table and associated information.
MachineJumpTableInfo *JTI =
MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
unsigned MJTI = JTI->createJumpTableIndex(LPadList);
// Create the MBBs for the dispatch code.
// Shove the dispatch's address into the return slot in the function context.
MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
DispatchBB->setIsEHPad();
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
unsigned trap_opcode;
if (Subtarget->isThumb())
trap_opcode = ARM::tTRAP;
else
trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
BuildMI(TrapBB, dl, TII->get(trap_opcode));
DispatchBB->addSuccessor(TrapBB);
MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
DispatchBB->addSuccessor(DispContBB);
// Insert and MBBs.
MF->insert(MF->end(), DispatchBB);
MF->insert(MF->end(), DispContBB);
MF->insert(MF->end(), TrapBB);
// Insert code into the entry block that creates and registers the function
// context.
SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI),
MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4);
MachineInstrBuilder MIB;
MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
// Add a register mask with no preserved registers. This results in all
// registers being marked as clobbered. This can't work if the dispatch block
// is in a Thumb1 function and is linked with ARM code which uses the FP
// registers, as there is no way to preserve the FP registers in Thumb1 mode.
MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF));
bool IsPositionIndependent = isPositionIndependent();
unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(FIMMOLd)
.add(predOps(ARMCC::AL));
if (NumLPads < 256) {
BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
.addReg(NewVReg1)
.addImm(LPadList.size())
.add(predOps(ARMCC::AL));
} else {
unsigned VReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
.addImm(NumLPads & 0xFFFF)
.add(predOps(ARMCC::AL));
unsigned VReg2 = VReg1;
if ((NumLPads & 0xFFFF0000) != 0) {
VReg2 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
.addReg(VReg1)
.addImm(NumLPads >> 16)
.add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
.addReg(NewVReg1)
.addReg(VReg2)
.add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::HI)
.addReg(ARM::CPSR);
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
.addJumpTableIndex(MJTI)
.add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
.addReg(NewVReg3, RegState::Kill)
.addReg(NewVReg1)
.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
.add(predOps(ARMCC::AL))
.add(condCodeOp());
BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
.addReg(NewVReg4, RegState::Kill)
.addReg(NewVReg1)
.addJumpTableIndex(MJTI);
} else if (Subtarget->isThumb()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
.addFrameIndex(FI)
.addImm(1)
.addMemOperand(FIMMOLd)
.add(predOps(ARMCC::AL));
if (NumLPads < 256) {
BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
.addReg(NewVReg1)
.addImm(NumLPads)
.add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
if (Align == 0)
Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
.addReg(VReg1, RegState::Define)
.addConstantPoolIndex(Idx)
.add(predOps(ARMCC::AL));
BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
.addReg(NewVReg1)
.addReg(VReg1)
.add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
.addMBB(TrapBB)
.addImm(ARMCC::HI)
.addReg(ARM::CPSR);
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
.addReg(ARM::CPSR, RegState::Define)
.addReg(NewVReg1)
.addImm(2)
.add(predOps(ARMCC::AL));
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
.addJumpTableIndex(MJTI)
.add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
.addReg(ARM::CPSR, RegState::Define)
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3)
.add(predOps(ARMCC::AL));
MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
.addReg(NewVReg4, RegState::Kill)
.addImm(0)
.addMemOperand(JTMMOLd)
.add(predOps(ARMCC::AL));
unsigned NewVReg6 = NewVReg5;
if (IsPositionIndependent) {
NewVReg6 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
.addReg(ARM::CPSR, RegState::Define)
.addReg(NewVReg5, RegState::Kill)
.addReg(NewVReg3)
.add(predOps(ARMCC::AL));
}
BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
.addReg(NewVReg6, RegState::Kill)
.addJumpTableIndex(MJTI);
} else {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(FIMMOLd)
.add(predOps(ARMCC::AL));
if (NumLPads < 256) {
BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
.addReg(NewVReg1)
.addImm(NumLPads)
.add(predOps(ARMCC::AL));
} else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
unsigned VReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
.addImm(NumLPads & 0xFFFF)
.add(predOps(ARMCC::AL));
unsigned VReg2 = VReg1;
if ((NumLPads & 0xFFFF0000) != 0) {
VReg2 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
.addReg(VReg1)
.addImm(NumLPads >> 16)
.add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
.addReg(NewVReg1)
.addReg(VReg2)
.add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
if (Align == 0)
Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
.addReg(VReg1, RegState::Define)
.addConstantPoolIndex(Idx)
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
.addReg(NewVReg1)
.addReg(VReg1, RegState::Kill)
.add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::HI)
.addReg(ARM::CPSR);
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
.addReg(NewVReg1)
.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
.add(predOps(ARMCC::AL))
.add(condCodeOp());
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
.addJumpTableIndex(MJTI)
.add(predOps(ARMCC::AL));
MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
.addReg(NewVReg3, RegState::Kill)
.addReg(NewVReg4)
.addImm(0)
.addMemOperand(JTMMOLd)
.add(predOps(ARMCC::AL));
if (IsPositionIndependent) {
BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
.addReg(NewVReg5, RegState::Kill)
.addReg(NewVReg4)
.addJumpTableIndex(MJTI);
} else {
BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
.addReg(NewVReg5, RegState::Kill)
.addJumpTableIndex(MJTI);
}
}
// Add the jump table entries as successors to the MBB.
SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
for (std::vector<MachineBasicBlock*>::iterator
I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
MachineBasicBlock *CurMBB = *I;
if (SeenMBBs.insert(CurMBB).second)
DispContBB->addSuccessor(CurMBB);
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
SmallVector<MachineBasicBlock*, 64> MBBLPads;
for (MachineBasicBlock *BB : InvokeBBs) {
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
BB->succ_end());
while (!Successors.empty()) {
MachineBasicBlock *SMBB = Successors.pop_back_val();
if (SMBB->isEHPad()) {
BB->removeSuccessor(SMBB);
MBBLPads.push_back(SMBB);
}
}
BB->addSuccessor(DispatchBB, BranchProbability::getZero());
BB->normalizeSuccProbs();
// Find the invoke call and mark all of the callee-saved registers as
// 'implicit defined' so that they're spilled. This prevents code from
// moving instructions to before the EH block, where they will never be
// executed.
for (MachineBasicBlock::reverse_iterator
II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
if (!II->isCall()) continue;
DenseMap<unsigned, bool> DefRegs;
for (MachineInstr::mop_iterator
OI = II->operands_begin(), OE = II->operands_end();
OI != OE; ++OI) {
if (!OI->isReg()) continue;
DefRegs[OI->getReg()] = true;
}
MachineInstrBuilder MIB(*MF, &*II);
for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
unsigned Reg = SavedRegs[i];
if (Subtarget->isThumb2() &&
!ARM::tGPRRegClass.contains(Reg) &&
!ARM::hGPRRegClass.contains(Reg))
continue;
if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
continue;
if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
continue;
if (!DefRegs[Reg])
MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
}
break;
}
}
// Mark all former landing pads as non-landing pads. The dispatch is the only
// landing pad now.
for (SmallVectorImpl<MachineBasicBlock*>::iterator
I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
(*I)->setIsEHPad(false);
// The instruction is gone now.
MI.eraseFromParent();
}
static
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I)
if (*I != Succ)
return *I;
llvm_unreachable("Expecting a BB with two successors!");
}
/// Return the load opcode for a given load size. If load size >= 8,
/// neon opcode will be returned.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
if (LdSize >= 8)
return LdSize == 16 ? ARM::VLD1q32wb_fixed
: LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
if (IsThumb1)
return LdSize == 4 ? ARM::tLDRi
: LdSize == 2 ? ARM::tLDRHi
: LdSize == 1 ? ARM::tLDRBi : 0;
if (IsThumb2)
return LdSize == 4 ? ARM::t2LDR_POST
: LdSize == 2 ? ARM::t2LDRH_POST
: LdSize == 1 ? ARM::t2LDRB_POST : 0;
return LdSize == 4 ? ARM::LDR_POST_IMM
: LdSize == 2 ? ARM::LDRH_POST
: LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
}
/// Return the store opcode for a given store size. If store size >= 8,
/// neon opcode will be returned.
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
if (StSize >= 8)
return StSize == 16 ? ARM::VST1q32wb_fixed
: StSize == 8 ? ARM::VST1d32wb_fixed : 0;
if (IsThumb1)
return StSize == 4 ? ARM::tSTRi
: StSize == 2 ? ARM::tSTRHi
: StSize == 1 ? ARM::tSTRBi : 0;
if (IsThumb2)
return StSize == 4 ? ARM::t2STR_POST
: StSize == 2 ? ARM::t2STRH_POST
: StSize == 1 ? ARM::t2STRB_POST : 0;
return StSize == 4 ? ARM::STR_POST_IMM
: StSize == 2 ? ARM::STRH_POST
: StSize == 1 ? ARM::STRB_POST_IMM : 0;
}
/// Emit a post-increment load operation with given size. The instructions
/// will be added to BB at Pos.
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
const TargetInstrInfo *TII, const DebugLoc &dl,
unsigned LdSize, unsigned Data, unsigned AddrIn,
unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
assert(LdOpc != 0 && "Should have a load opcode");
if (LdSize >= 8) {
BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
.addReg(AddrOut, RegState::Define)
.addReg(AddrIn)
.addImm(0)
.add(predOps(ARMCC::AL));
} else if (IsThumb1) {
// load + update AddrIn
BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
.addReg(AddrIn)
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
.add(t1CondCodeOp())
.addReg(AddrIn)
.addImm(LdSize)
.add(predOps(ARMCC::AL));
} else if (IsThumb2) {
BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
.addReg(AddrOut, RegState::Define)
.addReg(AddrIn)
.addImm(LdSize)
.add(predOps(ARMCC::AL));
} else { // arm
BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
.addReg(AddrOut, RegState::Define)
.addReg(AddrIn)
.addReg(0)
.addImm(LdSize)
.add(predOps(ARMCC::AL));
}
}
/// Emit a post-increment store operation with given size. The instructions
/// will be added to BB at Pos.
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
const TargetInstrInfo *TII, const DebugLoc &dl,
unsigned StSize, unsigned Data, unsigned AddrIn,
unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
assert(StOpc != 0 && "Should have a store opcode");
if (StSize >= 8) {
BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
.addReg(AddrIn)
.addImm(0)
.addReg(Data)
.add(predOps(ARMCC::AL));
} else if (IsThumb1) {
// store + update AddrIn
BuildMI(*BB, Pos, dl, TII->get(StOpc))
.addReg(Data)
.addReg(AddrIn)
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
.add(t1CondCodeOp())
.addReg(AddrIn)
.addImm(StSize)
.add(predOps(ARMCC::AL));
} else if (IsThumb2) {
BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
.addReg(Data)
.addReg(AddrIn)
.addImm(StSize)
.add(predOps(ARMCC::AL));
} else { // arm
BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
.addReg(Data)
.addReg(AddrIn)
.addReg(0)
.addImm(StSize)
.add(predOps(ARMCC::AL));
}
}
MachineBasicBlock *
ARMTargetLowering::EmitStructByval(MachineInstr &MI,
MachineBasicBlock *BB) const {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = ++BB->getIterator();
unsigned dest = MI.getOperand(0).getReg();
unsigned src = MI.getOperand(1).getReg();
unsigned SizeVal = MI.getOperand(2).getImm();
unsigned Align = MI.getOperand(3).getImm();
DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UnitSize = 0;
const TargetRegisterClass *TRC = nullptr;
const TargetRegisterClass *VecTRC = nullptr;
bool IsThumb1 = Subtarget->isThumb1Only();
bool IsThumb2 = Subtarget->isThumb2();
bool IsThumb = Subtarget->isThumb();
if (Align & 1) {
UnitSize = 1;
} else if (Align & 2) {
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
if ((Align % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
else if ((Align % 8 == 0) && SizeVal >= 8)
UnitSize = 8;
}
// Can't use NEON instructions.
if (UnitSize == 0)
UnitSize = 4;
}
// Select the correct opcode and register class for unit size load/store
bool IsNeon = UnitSize >= 8;
TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
if (IsNeon)
VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
: UnitSize == 8 ? &ARM::DPRRegClass
: nullptr;
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
// Use LDR and STR to copy.
// [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
// [destOut] = STR_POST(scratch, destIn, UnitSize)
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
IsThumb1, IsThumb2);
emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
// Handle the leftover bytes with LDRB and STRB.
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
for (unsigned i = 0; i < BytesLeft; i++) {
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
unsigned scratch = MRI.createVirtualRegister(TRC);
emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
IsThumb1, IsThumb2);
emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
MI.eraseFromParent(); // The instruction is gone now.
return BB;
}
// Expand the pseudo op to a loop.
// thisMBB:
// ...
// movw varEnd, # --> with thumb2
// movt varEnd, #
// ldrcp varEnd, idx --> without thumb2
// fallthrough --> loopMBB
// loopMBB:
// PHI varPhi, varEnd, varLoop
// PHI srcPhi, src, srcLoop
// PHI destPhi, dst, destLoop
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSize)
// subs varLoop, varPhi, #UnitSize
// bne loopMBB
// fallthrough --> exitMBB
// exitMBB:
// epilogue to handle left-over bytes
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
if (Subtarget->useMovt(*MF)) {
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
.addImm(LoopSize & 0xFFFF)
.add(predOps(ARMCC::AL));
if ((LoopSize & 0xFFFF0000) != 0)
BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
.addReg(Vtmp)
.addImm(LoopSize >> 16)
.add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
// MachineConstantPool wants an explicit alignment.
unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
if (Align == 0)
Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
if (IsThumb)
BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
.addReg(varEnd, RegState::Define)
.addConstantPoolIndex(Idx)
.add(predOps(ARMCC::AL));
else
BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
.addReg(varEnd, RegState::Define)
.addConstantPoolIndex(Idx)
.addImm(0)
.add(predOps(ARMCC::AL));
}
BB->addSuccessor(loopMBB);
// Generate the loop body:
// varPhi = PHI(varLoop, varEnd)
// srcPhi = PHI(srcLoop, src)
// destPhi = PHI(destLoop, dst)
MachineBasicBlock *entryBB = BB;
BB = loopMBB;
unsigned varLoop = MRI.createVirtualRegister(TRC);
unsigned varPhi = MRI.createVirtualRegister(TRC);
unsigned srcLoop = MRI.createVirtualRegister(TRC);
unsigned srcPhi = MRI.createVirtualRegister(TRC);
unsigned destLoop = MRI.createVirtualRegister(TRC);
unsigned destPhi = MRI.createVirtualRegister(TRC);
BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
.addReg(varLoop).addMBB(loopMBB)
.addReg(varEnd).addMBB(entryBB);
BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
.addReg(srcLoop).addMBB(loopMBB)
.addReg(src).addMBB(entryBB);
BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
.addReg(destLoop).addMBB(loopMBB)
.addReg(dest).addMBB(entryBB);
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
IsThumb1, IsThumb2);
emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
IsThumb1, IsThumb2);
// Decrement loop variable by UnitSize.
if (IsThumb1) {
BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
.add(t1CondCodeOp())
.addReg(varPhi)
.addImm(UnitSize)
.add(predOps(ARMCC::AL));
} else {
MachineInstrBuilder MIB =
BuildMI(*BB, BB->end(), dl,
TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
MIB.addReg(varPhi)
.addImm(UnitSize)
.add(predOps(ARMCC::AL))
.add(condCodeOp());
MIB->getOperand(5).setReg(ARM::CPSR);
MIB->getOperand(5).setIsDef(true);
}
BuildMI(*BB, BB->end(), dl,
TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
// loopMBB can loop back to loopMBB or fall through to exitMBB.
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// Add epilogue to handle BytesLeft.
BB = exitMBB;
auto StartOfExit = exitMBB->begin();
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
unsigned scratch = MRI.createVirtualRegister(TRC);
emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
IsThumb1, IsThumb2);
emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
MI.eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
MachineBasicBlock *MBB) const {
const TargetMachine &TM = getTargetMachine();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
assert(Subtarget->isTargetWindows() &&
"__chkstk is only supported on Windows");
assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
// __chkstk takes the number of words to allocate on the stack in R4, and
// returns the stack adjustment in number of bytes in R4. This will not
// clober any other registers (other than the obvious lr).
//
// Although, technically, IP should be considered a register which may be
// clobbered, the call itself will not touch it. Windows on ARM is a pure
// thumb-2 environment, so there is no interworking required. As a result, we
// do not expect a veneer to be emitted by the linker, clobbering IP.
//
// Each module receives its own copy of __chkstk, so no import thunk is
// required, again, ensuring that IP is not clobbered.
//
// Finally, although some linkers may theoretically provide a trampoline for
// out of range calls (which is quite common due to a 32M range limitation of
// branches for Thumb), we can generate the long-call version via
// -mcmodel=large, alleviating the need for the trampoline which may clobber
// IP.
switch (TM.getCodeModel()) {
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Default:
case CodeModel::Kernel:
BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
.add(predOps(ARMCC::AL))
.addExternalSymbol("__chkstk")
.addReg(ARM::R4, RegState::Implicit | RegState::Kill)
.addReg(ARM::R4, RegState::Implicit | RegState::Define)
.addReg(ARM::R12,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(ARM::CPSR,
RegState::Implicit | RegState::Define | RegState::Dead);
break;
case CodeModel::Large:
case CodeModel::JITDefault: {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
.addExternalSymbol("__chkstk");
BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
.add(predOps(ARMCC::AL))
.addReg(Reg, RegState::Kill)
.addReg(ARM::R4, RegState::Implicit | RegState::Kill)
.addReg(ARM::R4, RegState::Implicit | RegState::Define)
.addReg(ARM::R12,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(ARM::CPSR,
RegState::Implicit | RegState::Define | RegState::Dead);
break;
}
}
BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
.addReg(ARM::SP, RegState::Kill)
.addReg(ARM::R4, RegState::Kill)
.setMIFlags(MachineInstr::FrameSetup)
.add(predOps(ARMCC::AL))
.add(condCodeOp());
MI.eraseFromParent();
return MBB;
}
MachineBasicBlock *
ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
MF->insert(++MBB->getIterator(), ContBB);
ContBB->splice(ContBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
ContBB->transferSuccessorsAndUpdatePHIs(MBB);
MBB->addSuccessor(ContBB);
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
MF->push_back(TrapBB);
MBB->addSuccessor(TrapBB);
BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
.addReg(MI.getOperand(0).getReg())
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::EQ)
.addReg(ARM::CPSR);
MI.eraseFromParent();
return ContBB;
}
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
switch (MI.getOpcode()) {
default: {
MI.print(errs());
llvm_unreachable("Unexpected instr type to insert");
}
// Thumb1 post-indexed loads are really just single-register LDMs.
case ARM::tLDR_postidx: {
BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
.add(MI.getOperand(1)) // Rn_wb
.add(MI.getOperand(2)) // Rn
.add(MI.getOperand(3)) // PredImm
.add(MI.getOperand(4)) // PredReg
.add(MI.getOperand(0)); // Rt
MI.eraseFromParent();
return BB;
}
// The Thumb2 pre-indexed stores have the same MI operands, they just
// define them differently in the .td files from the isel patterns, so
// they need pseudos.
case ARM::t2STR_preidx:
MI.setDesc(TII->get(ARM::t2STR_PRE));
return BB;
case ARM::t2STRB_preidx:
MI.setDesc(TII->get(ARM::t2STRB_PRE));
return BB;
case ARM::t2STRH_preidx:
MI.setDesc(TII->get(ARM::t2STRH_PRE));
return BB;
case ARM::STRi_preidx:
case ARM::STRBi_preidx: {
unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
: ARM::STRB_PRE_IMM;
// Decode the offset.
unsigned Offset = MI.getOperand(4).getImm();
bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
Offset = ARM_AM::getAM2Offset(Offset);
if (isSub)
Offset = -Offset;
MachineMemOperand *MMO = *MI.memoperands_begin();
BuildMI(*BB, MI, dl, TII->get(NewOpc))
.add(MI.getOperand(0)) // Rn_wb
.add(MI.getOperand(1)) // Rt
.add(MI.getOperand(2)) // Rn
.addImm(Offset) // offset (skip GPR==zero_reg)
.add(MI.getOperand(5)) // pred
.add(MI.getOperand(6))
.addMemOperand(MMO);
MI.eraseFromParent();
return BB;
}
case ARM::STRr_preidx:
case ARM::STRBr_preidx:
case ARM::STRH_preidx: {
unsigned NewOpc;
switch (MI.getOpcode()) {
default: llvm_unreachable("unexpected opcode!");
case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
for (unsigned i = 0; i < MI.getNumOperands(); ++i)
MIB.add(MI.getOperand(i));
MI.eraseFromParent();
return BB;
}
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
// TrueVal = ...
// cmpTY ccX, r1, r2
// bCC copy1MBB
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
BuildMI(BB, dl, TII->get(ARM::tBcc))
.addMBB(sinkMBB)
.addImm(MI.getOperand(3).getImm())
.addReg(MI.getOperand(4).getReg());
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
BB = copy0MBB;
// Update machine-CFG edges
BB->addSuccessor(sinkMBB);
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
.addReg(MI.getOperand(1).getReg())
.addMBB(copy0MBB)
.addReg(MI.getOperand(2).getReg())
.addMBB(thisMBB);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
case ARM::BCCi64:
case ARM::BCCZi64: {
// If there is an unconditional branch to the other successor, remove it.
BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
// Compare both parts that make up the double comparison separately for
// equality.
bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
unsigned LHS1 = MI.getOperand(1).getReg();
unsigned LHS2 = MI.getOperand(2).getReg();
if (RHSisZero) {
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(LHS1)
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(LHS2).addImm(0)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
} else {
unsigned RHS1 = MI.getOperand(3).getReg();
unsigned RHS2 = MI.getOperand(4).getReg();
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(LHS1)
.addReg(RHS1)
.add(predOps(ARMCC::AL));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(LHS2).addReg(RHS2)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
}
MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
if (MI.getOperand(0).getImm() == ARMCC::NE)
std::swap(destMBB, exitMBB);
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
if (isThumb2)
BuildMI(BB, dl, TII->get(ARM::t2B))
.addMBB(exitMBB)
.add(predOps(ARMCC::AL));
else
BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
case ARM::Int_eh_sjlj_setjmp:
case ARM::Int_eh_sjlj_setjmp_nofp:
case ARM::tInt_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
return BB;
case ARM::Int_eh_sjlj_setup_dispatch:
EmitSjLjDispatchBlock(MI, BB);
return BB;
case ARM::ABS:
case ARM::t2ABS: {
// To insert an ABS instruction, we have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// source vreg to test against 0, the destination vreg to set,
// the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
// It transforms
// V1 = ABS V0
// into
// V2 = MOVS V0
// BCC (branch to SinkBB if V0 >= 0)
// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
// SinkBB: V1 = PHI(V2, V3)
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator BBI = ++BB->getIterator();
MachineFunction *Fn = BB->getParent();
MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
Fn->insert(BBI, RSBBB);
Fn->insert(BBI, SinkBB);
unsigned int ABSSrcReg = MI.getOperand(1).getReg();
unsigned int ABSDstReg = MI.getOperand(0).getReg();
bool ABSSrcKIll = MI.getOperand(1).isKill();
bool isThumb2 = Subtarget->isThumb2();
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
unsigned NewRsbDstReg =
MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
SinkBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(RSBBB);
BB->addSuccessor(SinkBB);
// fall through to SinkMBB
RSBBB->addSuccessor(SinkBB);
// insert a cmp at the end of BB
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(ABSSrcReg)
.addImm(0)
.add(predOps(ARMCC::AL));
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
.addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
// insert rsbri in RSBBB
// Note: BCC and rsbri will be converted into predicated rsbmi
// by if-conversion pass
BuildMI(*RSBBB, RSBBB->begin(), dl,
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
.addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
.addImm(0)
.add(predOps(ARMCC::AL))
.add(condCodeOp());
// insert PHI in SinkBB,
// reuse ABSDstReg to not change uses of ABS instruction
BuildMI(*SinkBB, SinkBB->begin(), dl,
TII->get(ARM::PHI), ABSDstReg)
.addReg(NewRsbDstReg).addMBB(RSBBB)
.addReg(ABSSrcReg).addMBB(BB);
// remove ABS instruction
MI.eraseFromParent();
// return last added BB
return SinkBB;
}
case ARM::COPY_STRUCT_BYVAL_I32:
++NumLoopByVals;
return EmitStructByval(MI, BB);
case ARM::WIN__CHKSTK:
return EmitLowered__chkstk(MI, BB);
case ARM::WIN__DBZCHK:
return EmitLowered__dbzchk(MI, BB);
}
}
/// \brief Attaches vregs to MEMCPY that it will use as scratch registers
/// when it is expanded into LDM/STM. This is done as a post-isel lowering
/// instead of as a custom inserter because we need the use list from the SDNode.
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
MachineInstr &MI, const SDNode *Node) {
bool isThumb1 = Subtarget->isThumb1Only();
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MI.getParent()->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineInstrBuilder MIB(*MF, MI);
// If the new dst/src is unused mark it as dead.
if (!Node->hasAnyUseOfValue(0)) {
MI.getOperand(0).setIsDead(true);
}
if (!Node->hasAnyUseOfValue(1)) {
MI.getOperand(1).setIsDead(true);
}
// The MEMCPY both defines and kills the scratch registers.
for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
: &ARM::GPRRegClass);
MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
}
}
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
if (MI.getOpcode() == ARM::MEMCPY) {
attachMEMCPYScratchRegs(Subtarget, MI, Node);
return;
}
const MCInstrDesc *MCID = &MI.getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
// operand is still set to noreg. If needed, set the optional operand's
// register to CPSR, and remove the redundant implicit def.
//
// e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
unsigned ccOutIdx;
if (NewOpc) {
const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
MCID = &TII->get(NewOpc);
assert(MCID->getNumOperands() ==
MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
&& "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)");
MI.setDesc(*MCID);
// Add the optional cc_out operand
MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
// On Thumb1, move all input operands to the end, then add the predicate
if (Subtarget->isThumb1Only()) {
for (unsigned c = MCID->getNumOperands() - 4; c--;) {
MI.addOperand(MI.getOperand(1));
MI.RemoveOperand(1);
}
// Restore the ties
for (unsigned i = MI.getNumOperands(); i--;) {
const MachineOperand& op = MI.getOperand(i);
if (op.isReg() && op.isUse()) {
int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
if (DefIdx != -1)
MI.tieOperands(DefIdx, i);
}
}
MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
ccOutIdx = 1;
} else
ccOutIdx = MCID->getNumOperands() - 1;
} else
ccOutIdx = MCID->getNumOperands() - 1;
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
assert(!NewOpc && "Optional cc_out operand required");
return;
}
// Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
// since we already have an optional CPSR def.
bool definesCPSR = false;
bool deadCPSR = false;
for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
++i) {
const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
definesCPSR = true;
if (MO.isDead())
deadCPSR = true;
MI.RemoveOperand(i);
break;
}
}
if (!definesCPSR) {
assert(!NewOpc && "Optional cc_out operand required");
return;
}
assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
if (deadCPSR) {
assert(!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand");
// Thumb1 instructions must have the S bit even if the CPSR is dead.
if (!Subtarget->isThumb1Only())
return;
}
// If this instruction was defined with an optional CPSR def and its dag node
// had a live implicit CPSR def, then activate the optional CPSR def.
MachineOperand &MO = MI.getOperand(ccOutIdx);
MO.setReg(ARM::CPSR);
MO.setIsDef(true);
}
//===----------------------------------------------------------------------===//
// ARM Optimization Hooks
//===----------------------------------------------------------------------===//
// Helper function that checks if N is a null or all ones constant.
static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
}
// Return true if N is conditionally 0 or all ones.
// Detects these expressions where cc is an i1 value:
//
// (select cc 0, y) [AllOnes=0]
// (select cc y, 0) [AllOnes=0]
// (zext cc) [AllOnes=0]
// (sext cc) [AllOnes=0/1]
// (select cc -1, y) [AllOnes=1]
// (select cc y, -1) [AllOnes=1]
//
// Invert is set when N is the null/all ones constant when CC is false.
// OtherOp is set to the alternative value of N.
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
SDValue &CC, bool &Invert,
SDValue &OtherOp,
SelectionDAG &DAG) {
switch (N->getOpcode()) {
default: return false;
case ISD::SELECT: {
CC = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
if (isZeroOrAllOnes(N1, AllOnes)) {
Invert = false;
OtherOp = N2;
return true;
}
if (isZeroOrAllOnes(N2, AllOnes)) {
Invert = true;
OtherOp = N1;
return true;
}
return false;
}
case ISD::ZERO_EXTEND:
// (zext cc) can never be the all ones value.
if (AllOnes)
return false;
LLVM_FALLTHROUGH;
case ISD::SIGN_EXTEND: {
SDLoc dl(N);
EVT VT = N->getValueType(0);
CC = N->getOperand(0);
if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
return false;
Invert = !AllOnes;
if (AllOnes)
// When looking for an AllOnes constant, N is an sext, and the 'other'
// value is 0.
OtherOp = DAG.getConstant(0, dl, VT);
else if (N->getOpcode() == ISD::ZERO_EXTEND)
// When looking for a 0 constant, N can be zext or sext.
OtherOp = DAG.getConstant(1, dl, VT);
else
OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
VT);
return true;
}
}
}
// Combine a constant select operand into its use:
//
// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
//
// The transform is rejected if the select doesn't have a constant operand that
// is null, or all ones when AllOnes is set.
//
// Also recognize sext/zext from i1:
//
// (add (zext cc), x) -> (select cc (add x, 1), x)
// (add (sext cc), x) -> (select cc (add x, -1), x)
//
// These transformations eventually create predicated instructions.
//
// @param N The node to transform.
// @param Slct The N operand that is a select.
// @param OtherOp The other N operand (x above).
// @param DCI Context.
// @param AllOnes Require the select constant to be all ones instead of null.
// @returns The new node, or SDValue() on failure.
static
SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
TargetLowering::DAGCombinerInfo &DCI,
bool AllOnes = false) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
SDValue NonConstantVal;
SDValue CCOp;
bool SwapSelectOps;
if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
NonConstantVal, DAG))
return SDValue();
// Slct is now know to be the desired identity constant when CC is true.
SDValue TrueVal = OtherOp;
SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
OtherOp, NonConstantVal);
// Unless SwapSelectOps says CC should be false.
if (SwapSelectOps)
std::swap(TrueVal, FalseVal);
return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
CCOp, TrueVal, FalseVal);
}
// Attempt combineSelectAndUse on each operand of a commutative operator N.
static
SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (N0.getNode()->hasOneUse())
if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
return Result;
if (N1.getNode()->hasOneUse())
if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
return Result;
return SDValue();
}
static bool IsVUZPShuffleNode(SDNode *N) {
// VUZP shuffle node.
if (N->getOpcode() == ARMISD::VUZP)
return true;
// "VUZP" on i32 is an alias for VTRN.
if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
return true;
return false;
}
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Look for ADD(VUZP.0, VUZP.1).
if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
N0 == N1)
return SDValue();
// Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
if (!N->getValueType(0).is64BitVector())
return SDValue();
// Generate vpadd.
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc dl(N);
SDNode *Unzip = N0.getNode();
EVT VT = N->getValueType(0);
SmallVector<SDValue, 8> Ops;
Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
TLI.getPointerTy(DAG.getDataLayout())));
Ops.push_back(Unzip->getOperand(0));
Ops.push_back(Unzip->getOperand(1));
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
}
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Check for two extended operands.
if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
N1.getOpcode() == ISD::SIGN_EXTEND) &&
!(N0.getOpcode() == ISD::ZERO_EXTEND &&
N1.getOpcode() == ISD::ZERO_EXTEND))
return SDValue();
SDValue N00 = N0.getOperand(0);
SDValue N10 = N1.getOperand(0);
// Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
N00 == N10)
return SDValue();
// We only recognize Q register paddl here; this can't be reached until
// after type legalization.
if (!N00.getValueType().is64BitVector() ||
!N0.getValueType().is128BitVector())
return SDValue();
// Generate vpaddl.
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc dl(N);
EVT VT = N->getValueType(0);
SmallVector<SDValue, 8> Ops;
// Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
unsigned Opcode;
if (N0.getOpcode() == ISD::SIGN_EXTEND)
Opcode = Intrinsic::arm_neon_vpaddls;
else
Opcode = Intrinsic::arm_neon_vpaddlu;
Ops.push_back(DAG.getConstant(Opcode, dl,
TLI.getPointerTy(DAG.getDataLayout())));
EVT ElemTy = N00.getValueType().getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
N00.getOperand(0), N00.getOperand(1));
Ops.push_back(Concat);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
}
// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
// much easier to match.
static SDValue
AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Only perform optimization if after legalize, and if NEON is available. We
// also expected both operands to be BUILD_VECTORs.
if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
|| N0.getOpcode() != ISD::BUILD_VECTOR
|| N1.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
// Check output type since VPADDL operand elements can only be 8, 16, or 32.
EVT VT = N->getValueType(0);
if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
return SDValue();
// Check that the vector operands are of the right form.
// N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
// operands, where N is the size of the formed vector.
// Each EXTRACT_VECTOR should have the same input vector and odd or even
// index such that we have a pair wise add pattern.
// Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
SDValue Vec = N0->getOperand(0)->getOperand(0);
SDNode *V = Vec.getNode();
unsigned nextIndex = 0;
// For each operands to the ADD which are BUILD_VECTORs,
// check to see if each of their operands are an EXTRACT_VECTOR with
// the same vector and appropriate index.
for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&& N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
SDValue ExtVec0 = N0->getOperand(i);
SDValue ExtVec1 = N1->getOperand(i);
// First operand is the vector, verify its the same.
if (V != ExtVec0->getOperand(0).getNode() ||
V != ExtVec1->getOperand(0).getNode())
return SDValue();
// Second is the constant, verify its correct.
ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
// For the constant, we want to see all the even or all the odd.
if (!C0 || !C1 || C0->getZExtValue() != nextIndex
|| C1->getZExtValue() != nextIndex+1)
return SDValue();
// Increment index.
nextIndex+=2;
} else
return SDValue();
}
// Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
// we're using the entire input vector, otherwise there's a size/legality
// mismatch somewhere.
if (nextIndex != Vec.getValueType().getVectorNumElements() ||
Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
return SDValue();
// Create VPADDL node.
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc dl(N);
// Build operand list.
SmallVector<SDValue, 8> Ops;
Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
TLI.getPointerTy(DAG.getDataLayout())));
// Input is the vector.
Ops.push_back(Vec);
// Get widened type and narrowed type.
MVT widenType;
unsigned numElem = VT.getVectorNumElements();
EVT inputLaneType = Vec.getValueType().getVectorElementType();
switch (inputLaneType.getSimpleVT().SimpleTy) {
case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
default:
llvm_unreachable("Invalid vector element type for padd optimization.");
}
SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
return DAG.getNode(ExtOp, dl, VT, tmp);
}
static SDValue findMUL_LOHI(SDValue V) {
if (V->getOpcode() == ISD::UMUL_LOHI ||
V->getOpcode() == ISD::SMUL_LOHI)
return V;
return SDValue();
}
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
if (Subtarget->isThumb()) {
if (!Subtarget->hasDSP())
return SDValue();
} else if (!Subtarget->hasV5TEOps())
return SDValue();
// SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
// accumulates the product into a 64-bit value. The 16-bit values will
// be sign extended somehow or SRA'd into 32-bit values
// (addc (adde (mul 16bit, 16bit), lo), hi)
SDValue Mul = AddcNode->getOperand(0);
SDValue Lo = AddcNode->getOperand(1);
if (Mul.getOpcode() != ISD::MUL) {
Lo = AddcNode->getOperand(0);
Mul = AddcNode->getOperand(1);
if (Mul.getOpcode() != ISD::MUL)
return SDValue();
}
SDValue SRA = AddeNode->getOperand(0);
SDValue Hi = AddeNode->getOperand(1);
if (SRA.getOpcode() != ISD::SRA) {
SRA = AddeNode->getOperand(1);
Hi = AddeNode->getOperand(0);
if (SRA.getOpcode() != ISD::SRA)
return SDValue();
}
if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
if (Const->getZExtValue() != 31)
return SDValue();
} else
return SDValue();
if (SRA.getOperand(0) != Mul)
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(AddcNode);
unsigned Opcode = 0;
SDValue Op0;
SDValue Op1;
if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
Opcode = ARMISD::SMLALBB;
Op0 = Mul.getOperand(0);
Op1 = Mul.getOperand(1);
} else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
Opcode = ARMISD::SMLALBT;
Op0 = Mul.getOperand(0);
Op1 = Mul.getOperand(1).getOperand(0);
} else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
Opcode = ARMISD::SMLALTB;
Op0 = Mul.getOperand(0).getOperand(0);
Op1 = Mul.getOperand(1);
} else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
Opcode = ARMISD::SMLALTT;
Op0 = Mul->getOperand(0).getOperand(0);
Op1 = Mul->getOperand(1).getOperand(0);
}
if (!Op0 || !Op1)
return SDValue();
SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
Op0, Op1, Lo, Hi);
// Replace the ADDs' nodes uses by the MLA node's values.
SDValue HiMLALResult(SMLAL.getNode(), 1);
SDValue LoMLALResult(SMLAL.getNode(), 0);
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
// Return original node to notify the driver to stop replacing.
SDValue resNode(AddcNode, 0);
return resNode;
}
static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Look for multiply add opportunities.
// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
// each add nodes consumes a value from ISD::UMUL_LOHI and there is
// a glue link from the first add to the second add.
// If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
// a S/UMLAL instruction.
// UMUL_LOHI
// / :lo \ :hi
// / \ [no multiline comment]
// loAdd -> ADDE |
// \ :glue /
// \ /
// ADDC <- hiAdd
//
assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE");
assert(AddeNode->getNumOperands() == 3 &&
AddeNode->getOperand(2).getValueType() == MVT::i32 &&
"ADDE node has the wrong inputs");
// Check that we have a glued ADDC node.
SDNode* AddcNode = AddeNode->getOperand(2).getNode();
if (AddcNode->getOpcode() != ARMISD::ADDC)
return SDValue();
SDValue AddcOp0 = AddcNode->getOperand(0);
SDValue AddcOp1 = AddcNode->getOperand(1);
// Check if the two operands are from the same mul_lohi node.
if (AddcOp0.getNode() == AddcOp1.getNode())
return SDValue();
assert(AddcNode->getNumValues() == 2 &&
AddcNode->getValueType(0) == MVT::i32 &&
"Expect ADDC with two result values. First: i32");
// Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
// maybe a SMLAL which multiplies two 16-bit values.
if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
AddcOp1->getOpcode() != ISD::SMUL_LOHI)
return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget);
// Check for the triangle shape.
SDValue AddeOp0 = AddeNode->getOperand(0);
SDValue AddeOp1 = AddeNode->getOperand(1);
// Make sure that the ADDE operands are not coming from the same node.
if (AddeOp0.getNode() == AddeOp1.getNode())
return SDValue();
// Find the MUL_LOHI node walking up ADDE's operands.
bool IsLeftOperandMUL = false;
SDValue MULOp = findMUL_LOHI(AddeOp0);
if (MULOp == SDValue())
MULOp = findMUL_LOHI(AddeOp1);
else
IsLeftOperandMUL = true;
if (MULOp == SDValue())
return SDValue();
// Figure out the right opcode.
unsigned Opc = MULOp->getOpcode();
unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
// Figure out the high and low input values to the MLAL node.
SDValue* HiAdd = nullptr;
SDValue* LoMul = nullptr;
SDValue* LowAdd = nullptr;
// Ensure that ADDE is from high result of ISD::SMUL_LOHI.
if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
return SDValue();
if (IsLeftOperandMUL)
HiAdd = &AddeOp1;
else
HiAdd = &AddeOp0;
// Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node
// whose low result is fed to the ADDC we are checking.
if (AddcOp0 == MULOp.getValue(0)) {
LoMul = &AddcOp0;
LowAdd = &AddcOp1;
}
if (AddcOp1 == MULOp.getValue(0)) {
LoMul = &AddcOp1;
LowAdd = &AddcOp0;
}
if (!LoMul)
return SDValue();
// Create the merged node.
SelectionDAG &DAG = DCI.DAG;
// Build operand list.
SmallVector<SDValue, 8> Ops;
Ops.push_back(LoMul->getOperand(0));
Ops.push_back(LoMul->getOperand(1));
Ops.push_back(*LowAdd);
Ops.push_back(*HiAdd);
SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
DAG.getVTList(MVT::i32, MVT::i32), Ops);
// Replace the ADDs' nodes uses by the MLA node's values.
SDValue HiMLALResult(MLALNode.getNode(), 1);
DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
SDValue LoMLALResult(MLALNode.getNode(), 0);
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
// Return original node to notify the driver to stop replacing.
return SDValue(AddeNode, 0);
}
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// UMAAL is similar to UMLAL except that it adds two unsigned values.
// While trying to combine for the other MLAL nodes, first search for the
// chance to use UMAAL. Check if Addc uses a node which has already
// been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
// as the addend, and it's handled in PerformUMLALCombine.
if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
// Check that we have a glued ADDC node.
SDNode* AddcNode = AddeNode->getOperand(2).getNode();
if (AddcNode->getOpcode() != ARMISD::ADDC)
return SDValue();
// Find the converted UMAAL or quit if it doesn't exist.
SDNode *UmlalNode = nullptr;
SDValue AddHi;
if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
UmlalNode = AddcNode->getOperand(0).getNode();
AddHi = AddcNode->getOperand(1);
} else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
UmlalNode = AddcNode->getOperand(1).getNode();
AddHi = AddcNode->getOperand(0);
} else {
return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
}
// The ADDC should be glued to an ADDE node, which uses the same UMLAL as
// the ADDC as well as Zero.
if (!isNullConstant(UmlalNode->getOperand(3)))
return SDValue();
if ((isNullConstant(AddeNode->getOperand(0)) &&
AddeNode->getOperand(1).getNode() == UmlalNode) ||
(AddeNode->getOperand(0).getNode() == UmlalNode &&
isNullConstant(AddeNode->getOperand(1)))) {
SelectionDAG &DAG = DCI.DAG;
SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
UmlalNode->getOperand(2), AddHi };
SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
DAG.getVTList(MVT::i32, MVT::i32), Ops);
// Replace the ADDs' nodes uses by the UMAAL node's values.
DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
// Return original node to notify the driver to stop replacing.
return SDValue(AddeNode, 0);
}
return SDValue();
}
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
return SDValue();
// Check that we have a pair of ADDC and ADDE as operands.
// Both addends of the ADDE must be zero.
SDNode* AddcNode = N->getOperand(2).getNode();
SDNode* AddeNode = N->getOperand(3).getNode();
if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
(AddeNode->getOpcode() == ARMISD::ADDE) &&
isNullConstant(AddeNode->getOperand(0)) &&
isNullConstant(AddeNode->getOperand(1)) &&
(AddeNode->getOperand(2).getNode() == AddcNode))
return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
DAG.getVTList(MVT::i32, MVT::i32),
{N->getOperand(0), N->getOperand(1),
AddcNode->getOperand(0), AddcNode->getOperand(1)});
else
return SDValue();
}
static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
if (Subtarget->isThumb1Only()) {
SDValue RHS = N->getOperand(1);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
int32_t imm = C->getSExtValue();
if (imm < 0 && imm > INT_MIN) {
SDLoc DL(N);
RHS = DAG.getConstant(-imm, DL, MVT::i32);
unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
: ARMISD::ADDC;
return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
}
}
}
return SDValue();
}
static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
if (Subtarget->isThumb1Only()) {
SDValue RHS = N->getOperand(1);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
int64_t imm = C->getSExtValue();
if (imm < 0) {
SDLoc DL(N);
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already
// accounts for part of the negation.
RHS = DAG.getConstant(~imm, DL, MVT::i32);
unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
: ARMISD::ADDE;
return DAG.getNode(Opcode, DL, N->getVTList(),
N->getOperand(0), RHS, N->getOperand(2));
}
}
}
return SDValue();
}
/// PerformADDECombine - Target-specific dag combine transform from
/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
static SDValue PerformADDECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Only ARM and Thumb2 support UMLAL/SMLAL.
if (Subtarget->isThumb1Only())
return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
// Only perform the checks after legalize when the pattern is available.
if (DCI.isBeforeLegalize()) return SDValue();
return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
}
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
/// operands N0 and N1. This is a helper for PerformADDCombine that is
/// called with the default operands, and if that fails, with commuted
/// operands.
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget){
// Attempt to create vpadd for this add.
if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
return Result;
// Attempt to create vpaddl for this add.
if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
return Result;
if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
Subtarget))
return Result;
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
if (N0.getNode()->hasOneUse())
if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
return Result;
return SDValue();
}
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
///
static SDValue PerformADDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// First try with the default operand order.
if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
return Result;
// If that didn't work, try again with the operands commuted.
return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
}
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
///
static SDValue PerformSUBCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
if (N1.getNode()->hasOneUse())
if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
return Result;
return SDValue();
}
/// PerformVMULCombine
/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
/// special multiplier accumulator forwarding.
/// vmul d3, d0, d2
/// vmla d3, d1, d2
/// is faster than
/// vadd d3, d0, d1
/// vmul d3, d3, d2
// However, for (A + B) * (A + B),
// vadd d2, d0, d1
// vmul d3, d0, d2
// vmla d3, d1, d2
// is slower than
// vadd d2, d0, d1
// vmul d3, d2, d2
static SDValue PerformVMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
if (!Subtarget->hasVMLxForwarding())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned Opcode = N0.getOpcode();
if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
Opcode != ISD::FADD && Opcode != ISD::FSUB) {
Opcode = N1.getOpcode();
if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
Opcode != ISD::FADD && Opcode != ISD::FSUB)
return SDValue();
std::swap(N0, N1);
}
if (N0 == N1)
return SDValue();
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
return DAG.getNode(Opcode, DL, VT,
DAG.getNode(ISD::MUL, DL, VT, N00, N1),
DAG.getNode(ISD::MUL, DL, VT, N01, N1));
}
static SDValue PerformMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
if (Subtarget->isThumb1Only())
return SDValue();
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
EVT VT = N->getValueType(0);
if (VT.is64BitVector() || VT.is128BitVector())
return PerformVMULCombine(N, DCI, Subtarget);
if (VT != MVT::i32)
return SDValue();
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
int64_t MulAmt = C->getSExtValue();
unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
ShiftAmt = ShiftAmt & (32 - 1);
SDValue V = N->getOperand(0);
SDLoc DL(N);
SDValue Res;
MulAmt >>= ShiftAmt;
if (MulAmt >= 0) {
if (isPowerOf2_32(MulAmt - 1)) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
Res = DAG.getNode(ISD::ADD, DL, VT,
V,
DAG.getNode(ISD::SHL, DL, VT,
V,
DAG.getConstant(Log2_32(MulAmt - 1), DL,
MVT::i32)));
} else if (isPowerOf2_32(MulAmt + 1)) {
// (mul x, 2^N - 1) => (sub (shl x, N), x)
Res = DAG.getNode(ISD::SUB, DL, VT,
DAG.getNode(ISD::SHL, DL, VT,
V,
DAG.getConstant(Log2_32(MulAmt + 1), DL,
MVT::i32)),
V);
} else
return SDValue();
} else {
uint64_t MulAmtAbs = -MulAmt;
if (isPowerOf2_32(MulAmtAbs + 1)) {
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
Res = DAG.getNode(ISD::SUB, DL, VT,
V,
DAG.getNode(ISD::SHL, DL, VT,
V,
DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
MVT::i32)));
} else if (isPowerOf2_32(MulAmtAbs - 1)) {
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
Res = DAG.getNode(ISD::ADD, DL, VT,
V,
DAG.getNode(ISD::SHL, DL, VT,
V,
DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
MVT::i32)));
Res = DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, MVT::i32), Res);
} else
return SDValue();
}
if (ShiftAmt != 0)
Res = DAG.getNode(ISD::SHL, DL, VT,
Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
}
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
SDLoc dl(N);
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN &&
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
EVT VbicVT;
SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VbicVT, VT.is128BitVector(),
OtherModImm);
if (Val.getNode()) {
SDValue Input =
DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
}
}
}
if (!Subtarget->isThumb1Only()) {
// fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
return Result;
}
return SDValue();
}
// Try combining OR nodes to SMULWB, SMULWT.
static SDValue PerformORCombineToSMULWBT(SDNode *OR,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
if (!Subtarget->hasV6Ops() ||
(Subtarget->isThumb() &&
(!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
return SDValue();
SDValue SRL = OR->getOperand(0);
SDValue SHL = OR->getOperand(1);
if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
SRL = OR->getOperand(1);
SHL = OR->getOperand(0);
}
if (!isSRL16(SRL) || !isSHL16(SHL))
return SDValue();
// The first operands to the shifts need to be the two results from the
// same smul_lohi node.
if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
return SDValue();
SDNode *SMULLOHI = SRL.getOperand(0).getNode();
if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
SHL.getOperand(0) != SDValue(SMULLOHI, 1))
return SDValue();
// Now we have:
// (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
// For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
// For SMUWB the 16-bit value will signed extended somehow.
// For SMULWT only the SRA is required.
// Check both sides of SMUL_LOHI
SDValue OpS16 = SMULLOHI->getOperand(0);
SDValue OpS32 = SMULLOHI->getOperand(1);
SelectionDAG &DAG = DCI.DAG;
if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
OpS16 = OpS32;
OpS32 = SMULLOHI->getOperand(0);
}
SDLoc dl(OR);
unsigned Opcode = 0;
if (isS16(OpS16, DAG))
Opcode = ARMISD::SMULWB;
else if (isSRA16(OpS16)) {
Opcode = ARMISD::SMULWT;
OpS16 = OpS16->getOperand(0);
}
else
return SDValue();
SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
return SDValue(OR, 0);
}
/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Attempt to use immediate-form VORR
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
SDLoc dl(N);
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN && Subtarget->hasNEON() &&
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
EVT VorrVT;
SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VorrVT, VT.is128BitVector(),
OtherModImm);
if (Val.getNode()) {
SDValue Input =
DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
}
}
}
if (!Subtarget->isThumb1Only()) {
// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
return Result;
}
// The code below optimizes (or (and X, Y), Z).
// The AND operand needs to have a single user to make these optimizations
// profitable.
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
return SDValue();
SDValue N1 = N->getOperand(1);
// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
APInt SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
APInt SplatBits0, SplatBits1;
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
// Ensure that the second operand of both ands are constants
if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
HasAnyUndefs) && !HasAnyUndefs) {
if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
HasAnyUndefs) && !HasAnyUndefs) {
// Ensure that the bit width of the constants are the same and that
// the splat arguments are logical inverses as per the pattern we
// are trying to simplify.
if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
SplatBits0 == ~SplatBits1) {
// Canonicalize the vector type to make instruction selection
// simpler.
EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
N0->getOperand(1),
N0->getOperand(0),
N1->getOperand(0));
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}
}
}
}
// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.
// BFI is only available on V6T2+
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
return SDValue();
SDLoc DL(N);
// 1) or (and A, mask), val => ARMbfi A, val, mask
// iff (val & mask) == val
//
// 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
// 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
// && mask == ~mask2
// 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
// && ~mask == mask2
// (i.e., copy a bitfield value into another bitfield of the same width)
if (VT != MVT::i32)
return SDValue();
SDValue N00 = N0.getOperand(0);
// The value and the mask need to be constants so we can verify this is
// actually a bitfield set. If the mask is 0xffff, we can do better
// via a movt instruction, so don't use BFI in that case.
SDValue MaskOp = N0.getOperand(1);
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
if (!MaskC)
return SDValue();
unsigned Mask = MaskC->getZExtValue();
if (Mask == 0xffff)
return SDValue();
SDValue Res;
// Case (1): or (and A, mask), val => ARMbfi A, val, mask
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N1C) {
unsigned Val = N1C->getZExtValue();
if ((Val & ~Mask) != Val)
return SDValue();
if (ARM::isBitFieldInvertedMask(Mask)) {
Val >>= countTrailingZeros(~Mask);
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
DAG.getConstant(Val, DL, MVT::i32),
DAG.getConstant(Mask, DL, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
}
} else if (N1.getOpcode() == ISD::AND) {
// case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C)
return SDValue();
unsigned Mask2 = N11C->getZExtValue();
// Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
// as is to match.
if (ARM::isBitFieldInvertedMask(Mask) &&
(Mask == ~Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
if (Subtarget->hasDSP() &&
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
unsigned amt = countTrailingZeros(Mask2);
Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
DAG.getConstant(amt, DL, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
DAG.getConstant(Mask, DL, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
} else if (ARM::isBitFieldInvertedMask(~Mask) &&
(~Mask == Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
if (Subtarget->hasDSP() &&
(Mask2 == 0xffff || Mask2 == 0xffff0000))
return SDValue();
// 2b
unsigned lsb = countTrailingZeros(Mask);
Res = DAG.getNode(ISD::SRL, DL, VT, N00,
DAG.getConstant(lsb, DL, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
DAG.getConstant(Mask2, DL, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
}
}
if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
ARM::isBitFieldInvertedMask(~Mask)) {
// Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
// where lsb(mask) == #shamt and masked bits of B are known zero.
SDValue ShAmt = N00.getOperand(1);
unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
unsigned LSB = countTrailingZeros(Mask);
if (ShAmtC != LSB)
return SDValue();
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
DAG.getConstant(~Mask, DL, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
}
return SDValue();
}
static SDValue PerformXORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
if (!Subtarget->isThumb1Only()) {
// fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
}
return SDValue();
}
// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
// their position in "to" (Rd).
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
assert(N->getOpcode() == ARMISD::BFI);
SDValue From = N->getOperand(1);
ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
// If the Base came from a SHR #C, we can deduce that it is really testing bit
// #C in the base of the SHR.
if (From->getOpcode() == ISD::SRL &&
isa<ConstantSDNode>(From->getOperand(1))) {
APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
assert(Shift.getLimitedValue() < 32 && "Shift too large!");
FromMask <<= Shift.getLimitedValue(31);
From = From->getOperand(0);
}
return From;
}
// If A and B contain one contiguous set of bits, does A | B == A . B?
//
// Neither A nor B must be zero.
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
unsigned LastActiveBitInA = A.countTrailingZeros();
unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
return LastActiveBitInA - 1 == FirstActiveBitInB;
}
static SDValue FindBFIToCombineWith(SDNode *N) {
// We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
// if one exists.
APInt ToMask, FromMask;
SDValue From = ParseBFI(N, ToMask, FromMask);
SDValue To = N->getOperand(0);
// Now check for a compatible BFI to merge with. We can pass through BFIs that
// aren't compatible, but not if they set the same bit in their destination as
// we do (or that of any BFI we're going to combine with).
SDValue V = To;
APInt CombinedToMask = ToMask;
while (V.getOpcode() == ARMISD::BFI) {
APInt NewToMask, NewFromMask;
SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
if (NewFrom != From) {
// This BFI has a different base. Keep going.
CombinedToMask |= NewToMask;
V = V.getOperand(0);
continue;
}
// Do the written bits conflict with any we've seen so far?
if ((NewToMask & CombinedToMask).getBoolValue())
// Conflicting bits - bail out because going further is unsafe.
return SDValue();
// Are the new bits contiguous when combined with the old bits?
if (BitsProperlyConcatenate(ToMask, NewToMask) &&
BitsProperlyConcatenate(FromMask, NewFromMask))
return V;
if (BitsProperlyConcatenate(NewToMask, ToMask) &&
BitsProperlyConcatenate(NewFromMask, FromMask))
return V;
// We've seen a write to some bits, so track it.
CombinedToMask |= NewToMask;
// Keep going...
V = V.getOperand(0);
}
return SDValue();
}
static SDValue PerformBFICombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() == ISD::AND) {
// (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
// the bits being cleared by the AND are not demanded by the BFI.
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C)
return SDValue();
unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
unsigned LSB = countTrailingZeros(~InvMask);
unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
assert(Width <
static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
"undefined behavior");
unsigned Mask = (1u << Width) - 1;
unsigned Mask2 = N11C->getZExtValue();
if ((Mask & (~Mask2)) == 0)
return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
N->getOperand(0), N1.getOperand(0),
N->getOperand(2));
} else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
// We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
// Keep track of any consecutive bits set that all come from the same base
// value. We can combine these together into a single BFI.
SDValue CombineBFI = FindBFIToCombineWith(N);
if (CombineBFI == SDValue())
return SDValue();
// We've found a BFI.
APInt ToMask1, FromMask1;
SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
APInt ToMask2, FromMask2;
SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
assert(From1 == From2);
(void)From2;
// First, unlink CombineBFI.
DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
// Then create a new BFI, combining the two together.
APInt NewFromMask = FromMask1 | FromMask2;
APInt NewToMask = ToMask1 | ToMask2;
EVT VT = N->getValueType(0);
SDLoc dl(N);
if (NewFromMask[0] == 0)
From1 = DCI.DAG.getNode(
ISD::SRL, dl, VT, From1,
DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
DCI.DAG.getConstant(~NewToMask, dl, VT));
}
return SDValue();
}
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// vmovrrd(vmovdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
// vmovrrd(load f64) -> (load i32), (load i32)
SDNode *InNode = InDouble.getNode();
if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
InNode->getValueType(0) == MVT::f64 &&
InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
!cast<LoadSDNode>(InNode)->isVolatile()) {
// TODO: Should this be done for non-FrameIndex operands?
LoadSDNode *LD = cast<LoadSDNode>(InNode);
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(LD);
SDValue BasePtr = LD->getBasePtr();
SDValue NewLD1 =
DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
LD->getAlignment(), LD->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
SDValue NewLD2 = DAG.getLoad(
MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
if (DCI.DAG.getDataLayout().isBigEndian())
std::swap (NewLD1, NewLD2);
SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
return Result;
}
return SDValue();
}
/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
// N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
if (Op0.getOpcode() == ISD::BITCAST)
Op0 = Op0.getOperand(0);
if (Op1.getOpcode() == ISD::BITCAST)
Op1 = Op1.getOperand(0);
if (Op0.getOpcode() == ARMISD::VMOVRRD &&
Op0.getNode() == Op1.getNode() &&
Op0.getResNo() == 0 && Op1.getResNo() == 1)
return DAG.getNode(ISD::BITCAST, SDLoc(N),
N->getValueType(0), Op0.getOperand(0));
return SDValue();
}
/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
/// are normal, non-volatile loads. If so, it is profitable to bitcast an
/// i64 vector to have f64 elements, since the value can then be loaded
/// directly into a VFP register.
static bool hasNormalLoadOperand(SDNode *N) {
unsigned NumElts = N->getValueType(0).getVectorNumElements();
for (unsigned i = 0; i < NumElts; ++i) {
SDNode *Elt = N->getOperand(i).getNode();
if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
return true;
}
return false;
}
/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
/// ISD::BUILD_VECTOR.
static SDValue PerformBUILD_VECTORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
// VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
// into a pair of GPRs, which is fine when the value is used as a scalar,
// but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
SelectionDAG &DAG = DCI.DAG;
if (N->getNumOperands() == 2)
if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
return RV;
// Load i64 elements as f64 values so that type legalization does not split
// them up into i32 values.
EVT VT = N->getValueType(0);
if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
return SDValue();
SDLoc dl(N);
SmallVector<SDValue, 8> Ops;
unsigned NumElts = VT.getVectorNumElements();
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
Ops.push_back(V);
// Make the DAGCombiner fold the bitcast.
DCI.AddToWorklist(V.getNode());
}
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static SDValue
PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
// ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
// At that time, we may have inserted bitcasts from integer to float.
// If these bitcasts have survived DAGCombine, change the lowering of this
// BUILD_VECTOR in something more vector friendly, i.e., that does not
// force to use floating point types.
// Make sure we can change the type of the vector.
// This is possible iff:
// 1. The vector is only used in a bitcast to a integer type. I.e.,
// 1.1. Vector is used only once.
// 1.2. Use is a bit convert to an integer type.
// 2. The size of its operands are 32-bits (64-bits are not legal).
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
// Check 1.1. and 2.
if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
return SDValue();
// By construction, the input type must be float.
assert(EltVT == MVT::f32 && "Unexpected type!");
// Check 1.2.
SDNode *Use = *N->use_begin();
if (Use->getOpcode() != ISD::BITCAST ||
Use->getValueType(0).isFloatingPoint())
return SDValue();
// Check profitability.
// Model is, if more than half of the relevant operands are bitcast from
// i32, turn the build_vector into a sequence of insert_vector_elt.
// Relevant operands are everything that is not statically
// (i.e., at compile time) bitcasted.
unsigned NumOfBitCastedElts = 0;
unsigned NumElts = VT.getVectorNumElements();
unsigned NumOfRelevantElts = NumElts;
for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
SDValue Elt = N->getOperand(Idx);
if (Elt->getOpcode() == ISD::BITCAST) {
// Assume only bit cast to i32 will go away.
if (Elt->getOperand(0).getValueType() == MVT::i32)
++NumOfBitCastedElts;
} else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
// Constants are statically casted, thus do not count them as
// relevant operands.
--NumOfRelevantElts;
}
// Check if more than half of the elements require a non-free bitcast.
if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
return SDValue();
SelectionDAG &DAG = DCI.DAG;
// Create the new vector type.
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
// Check if the type is legal.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(VecVT))
return SDValue();
// Combine:
// ARMISD::BUILD_VECTOR E1, E2, ..., EN.
// => BITCAST INSERT_VECTOR_ELT
// (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
// (BITCAST EN), N.
SDValue Vec = DAG.getUNDEF(VecVT);
SDLoc dl(N);
for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
SDValue V = N->getOperand(Idx);
if (V.isUndef())
continue;
if (V.getOpcode() == ISD::BITCAST &&
V->getOperand(0).getValueType() == MVT::i32)
// Fold obvious case.
V = V.getOperand(0);
else {
V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(V.getNode());
}
SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
}
Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(Vec.getNode());
return Vec;
}
/// PerformInsertEltCombine - Target-specific dag combine xforms for
/// ISD::INSERT_VECTOR_ELT.
static SDValue PerformInsertEltCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// Bitcast an i64 load inserted into a vector to f64.
// Otherwise, the i64 value will be legalized to a pair of i32 values.
EVT VT = N->getValueType(0);
SDNode *Elt = N->getOperand(1).getNode();
if (VT.getVectorElementType() != MVT::i64 ||
!ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
VT.getVectorNumElements());
SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(Vec.getNode());
DCI.AddToWorklist(V.getNode());
SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
Vec, V, N->getOperand(2));
return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
}
/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
/// ISD::VECTOR_SHUFFLE.
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
// The LLVM shufflevector instruction does not require the shuffle mask
// length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
// have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
// operands do not match the mask length, they are extended by concatenating
// them with undef vectors. That is probably the right thing for other
// targets, but for NEON it is better to concatenate two double-register
// size vector operands into a single quad-register size vector. Do that
// transformation here:
// shuffle(concat(v1, undef), concat(v2, undef)) ->
// shuffle(concat(v1, v2), undef)
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
Op1.getOpcode() != ISD::CONCAT_VECTORS ||
Op0.getNumOperands() != 2 ||
Op1.getNumOperands() != 2)
return SDValue();
SDValue Concat0Op1 = Op0.getOperand(1);
SDValue Concat1Op1 = Op1.getOperand(1);
if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
return SDValue();
// Skip the transformation if any of the types are illegal.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = N->getValueType(0);
if (!TLI.isTypeLegal(VT) ||
!TLI.isTypeLegal(Concat0Op1.getValueType()) ||
!TLI.isTypeLegal(Concat1Op1.getValueType()))
return SDValue();
SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
Op0.getOperand(0), Op1.getOperand(0));
// Translate the shuffle mask.
SmallVector<int, 16> NewMask;
unsigned NumElts = VT.getVectorNumElements();
unsigned HalfElts = NumElts/2;
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
for (unsigned n = 0; n < NumElts; ++n) {
int MaskElt = SVN->getMaskElt(n);
int NewElt = -1;
if (MaskElt < (int)HalfElts)
NewElt = MaskElt;
else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
NewElt = HalfElts + MaskElt - NumElts;
NewMask.push_back(NewElt);
}
return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
DAG.getUNDEF(VT), NewMask);
}
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
/// NEON load/store intrinsics, and generic vector load/stores, to merge
/// base address updates.
/// For generic load/stores, the memory type is assumed to be a vector.
/// The caller is assumed to have checked legality.
static SDValue CombineBaseUpdate(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
const bool isStore = N->getOpcode() == ISD::STORE;
const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
SDValue Addr = N->getOperand(AddrOpIdx);
MemSDNode *MemN = cast<MemSDNode>(N);
SDLoc dl(N);
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
if (User->getOpcode() != ISD::ADD ||
UI.getUse().getResNo() != Addr.getResNo())
continue;
// Check that the add is independent of the load/store. Otherwise, folding
// it would create a cycle.
if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
continue;
// Find the new opcode for the updating load/store.
bool isLoadOp = true;
bool isLaneOp = false;
unsigned NewOpc = 0;
unsigned NumVecs = 0;
if (isIntrinsic) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
default: llvm_unreachable("unexpected intrinsic for Neon base update");
case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
NumVecs = 1; break;
case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
NumVecs = 2; break;
case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
NumVecs = 3; break;
case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
NumVecs = 4; break;
case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
NumVecs = 2; isLaneOp = true; break;
case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
NumVecs = 3; isLaneOp = true; break;
case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
NumVecs = 4; isLaneOp = true; break;
case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
NumVecs = 1; isLoadOp = false; break;
case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
NumVecs = 2; isLoadOp = false; break;
case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
NumVecs = 3; isLoadOp = false; break;
case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
NumVecs = 4; isLoadOp = false; break;
case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
}
} else {
isLaneOp = true;
switch (N->getOpcode()) {
default: llvm_unreachable("unexpected opcode for Neon base update");
case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
NumVecs = 1; isLaneOp = false; break;
case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
}
}
// Find the size of memory referenced by the load/store.
EVT VecTy;
if (isLoadOp) {
VecTy = N->getValueType(0);
} else if (isIntrinsic) {
VecTy = N->getOperand(AddrOpIdx+1).getValueType();
} else {
assert(isStore && "Node has to be a load, a store, or an intrinsic!");
VecTy = N->getOperand(1).getValueType();
}
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (isLaneOp)
NumBytes /= VecTy.getVectorNumElements();
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
// VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
// separate instructions that make it harder to use a non-constant update.
continue;
}
// OK, we found an ADD we can fold into the base update.
// Now, create a _UPD node, taking care of not breaking alignment.
EVT AlignedVecTy = VecTy;
unsigned Alignment = MemN->getAlignment();
// If this is a less-than-standard-aligned load/store, change the type to
// match the standard alignment.
// The alignment is overlooked when selecting _UPD variants; and it's
// easier to introduce bitcasts here than fix that.
// There are 3 ways to get to this base-update combine:
// - intrinsics: they are assumed to be properly aligned (to the standard
// alignment of the memory type), so we don't need to do anything.
// - ARMISD::VLDx nodes: they are only generated from the aforementioned
// intrinsics, so, likewise, there's nothing to do.
// - generic load/store instructions: the alignment is specified as an
// explicit operand, rather than implicitly as the standard alignment
// of the memory type (like the intrisics). We need to change the
// memory type to match the explicit alignment. That way, we don't
// generate non-standard-aligned ARMISD::VLDx nodes.
if (isa<LSBaseSDNode>(N)) {
if (Alignment == 0)
Alignment = 1;
if (Alignment < VecTy.getScalarSizeInBits() / 8) {
MVT EltTy = MVT::getIntegerVT(Alignment * 8);
assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
assert(!isLaneOp && "Unexpected generic load/store lane.");
unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
}
// Don't set an explicit alignment on regular load/stores that we want
// to transform to VLD/VST 1_UPD nodes.
// This matches the behavior of regular load/stores, which only get an
// explicit alignment if the MMO alignment is larger than the standard
// alignment of the memory type.
// Intrinsics, however, always get an explicit alignment, set to the
// alignment of the MMO.
Alignment = 1;
}
// Create the new updating load/store node.
// First, create an SDVTList for the new updating node's results.
EVT Tys[6];
unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
Tys[n] = AlignedVecTy;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
// Then, gather the new node's operands.
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
Ops.push_back(Inc);
if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
// Try to match the intrinsic's signature
Ops.push_back(StN->getValue());
} else {
// Loads (and of course intrinsics) match the intrinsics' signature,
// so just add all but the alignment operand.
for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
Ops.push_back(N->getOperand(i));
}
// For all node types, the alignment operand is always the last one.
Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
// If this is a non-standard-aligned STORE, the penultimate operand is the
// stored value. Bitcast it to the aligned type.
if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
SDValue &StVal = Ops[Ops.size()-2];
StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
}
EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
MemN->getMemOperand());
// Update the uses.
SmallVector<SDValue, 5> NewResults;
for (unsigned i = 0; i < NumResultVecs; ++i)
NewResults.push_back(SDValue(UpdN.getNode(), i));
// If this is an non-standard-aligned LOAD, the first result is the loaded
// value. Bitcast it to the expected result type.
if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
SDValue &LdVal = NewResults[0];
LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
}
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
DCI.CombineTo(N, NewResults);
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
break;
}
return SDValue();
}
static SDValue PerformVLDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
return CombineBaseUpdate(N, DCI);
}
/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
/// return true.
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
// vldN-dup instructions only support 64-bit vectors for N > 1.
if (!VT.is64BitVector())
return false;
// Check if the VDUPLANE operand is a vldN-dup intrinsic.
SDNode *VLD = N->getOperand(0).getNode();
if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
return false;
unsigned NumVecs = 0;
unsigned NewOpc = 0;
unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
if (IntNo == Intrinsic::arm_neon_vld2lane) {
NumVecs = 2;
NewOpc = ARMISD::VLD2DUP;
} else if (IntNo == Intrinsic::arm_neon_vld3lane) {
NumVecs = 3;
NewOpc = ARMISD::VLD3DUP;
} else if (IntNo == Intrinsic::arm_neon_vld4lane) {
NumVecs = 4;
NewOpc = ARMISD::VLD4DUP;
} else {
return false;
}
// First check that all the vldN-lane uses are VDUPLANEs and that the lane
// numbers match the load.
unsigned VLDLaneNo =
cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
UI != UE; ++UI) {
// Ignore uses of the chain result.
if (UI.getUse().getResNo() == NumVecs)
continue;
SDNode *User = *UI;
if (User->getOpcode() != ARMISD::VDUPLANE ||
VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
return false;
}
// Create the vldN-dup node.
EVT Tys[5];
unsigned n;
for (n = 0; n < NumVecs; ++n)
Tys[n] = VT;
Tys[n] = MVT::Other;
SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
Ops, VLDMemInt->getMemoryVT(),
VLDMemInt->getMemOperand());
// Update the uses.
for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
UI != UE; ++UI) {
unsigned ResNo = UI.getUse().getResNo();
// Ignore uses of the chain result.
if (ResNo == NumVecs)
continue;
SDNode *User = *UI;
DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
}
// Now the vldN-lane intrinsic is dead except for its chain result.
// Update uses of the chain.
std::vector<SDValue> VLDDupResults;
for (unsigned n = 0; n < NumVecs; ++n)
VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
DCI.CombineTo(VLD, VLDDupResults);
return true;
}
/// PerformVDUPLANECombine - Target-specific dag combine xforms for
/// ARMISD::VDUPLANE.
static SDValue PerformVDUPLANECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue Op = N->getOperand(0);
// If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
// of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
if (CombineVLDDUP(N, DCI))
return SDValue(N, 0);
// If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
// redundant. Ignore bit_converts for now; element sizes are checked below.
while (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
return SDValue();
// Make sure the VMOV element size is not bigger than the VDUPLANE elements.
unsigned EltSize = Op.getScalarValueSizeInBits();
// The canonical VMOV for a zero vector uses a 32-bit element size.
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned EltBits;
if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
EltSize = 8;
EVT VT = N->getValueType(0);
if (EltSize > VT.getScalarSizeInBits())
return SDValue();
return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformVDUPCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDValue Op = N->getOperand(0);
// Match VDUP(LOAD) -> VLD1DUP.
// We match this pattern here rather than waiting for isel because the
// transform is only legal for unindexed loads.
LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
if (LD && Op.hasOneUse() && LD->isUnindexed() &&
LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1),
DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) };
SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys,
Ops, LD->getMemoryVT(),
LD->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
return VLDDup;
}
return SDValue();
}
static SDValue PerformLOADCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
// If this is a legal vector load, try to combine it into a VLD1_UPD.
if (ISD::isNormalLoad(N) && VT.isVector() &&
DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
return CombineBaseUpdate(N, DCI);
return SDValue();
}
/// PerformSTORECombine - Target-specific dag combine xforms for
/// ISD::STORE.
static SDValue PerformSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
StoreSDNode *St = cast<StoreSDNode>(N);
if (St->isVolatile())
return SDValue();
// Optimize trunc store (of multiple scalars) to shuffle and store. First,
// pack all of the elements in one place. Next, store to memory in fewer
// chunks.
SDValue StVal = St->getValue();
EVT VT = StVal.getValueType();
if (St->isTruncatingStore() && VT.isVector()) {
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT StVT = St->getMemoryVT();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromEltSz = VT.getScalarSizeInBits();
unsigned ToEltSz = StVT.getScalarSizeInBits();
// From, To sizes and ElemCount must be pow of two
if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
// We are going to use the original vector elt for storing.
// Accumulated smaller vector elements must be a multiple of the store size.
if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
unsigned SizeRatio = FromEltSz / ToEltSz;
assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
// Create a type on which we perform the shuffle.
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
SDLoc DL(St);
SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i < NumElems; ++i)
ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
? (i + 1) * SizeRatio - 1
: i * SizeRatio;
// Can't shuffle using an illegal type.
if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
DAG.getUNDEF(WideVec.getValueType()),
ShuffleVec);
// At this point all of the data is stored at the bottom of the
// register. We now need to save it to mem.
// Find the largest store unit
MVT StoreType = MVT::i8;
for (MVT Tp : MVT::integer_valuetypes()) {
if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
StoreType = Tp;
}
// Didn't find a legal store type.
if (!TLI.isTypeLegal(StoreType))
return SDValue();
// Bitcast the original vector into a vector of store-size units
EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
TLI.getPointerTy(DAG.getDataLayout()));
SDValue BasePtr = St->getBasePtr();
// Perform one or more big stores into memory.
unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
for (unsigned I = 0; I < E; I++) {
SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
StoreType, ShuffWide,
DAG.getIntPtrConstant(I, DL));
SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
Increment);
Chains.push_back(Ch);
}
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
if (!ISD::isNormalStore(St))
return SDValue();
// Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
// ARM stores of arguments in the same cache line.
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
bool isBigEndian = DAG.getDataLayout().isBigEndian();
SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(
St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
BasePtr, St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
return DAG.getStore(NewST1.getValue(0), DL,
StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
OffsetPtr, St->getPointerInfo(),
std::min(4U, St->getAlignment() / 2),
St->getMemOperand()->getFlags());
}
if (StVal.getValueType() == MVT::i64 &&
StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
// Bitcast an i64 store extracted from a vector to f64.
// Otherwise, the i64 value will be legalized to a pair of i32 values.
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(StVal);
SDValue IntVec = StVal.getOperand(0);
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
IntVec.getValueType().getVectorNumElements());
SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
Vec, StVal.getOperand(1));
dl = SDLoc(N);
SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(Vec.getNode());
DCI.AddToWorklist(ExtElt.getNode());
DCI.AddToWorklist(V.getNode());
return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags(), St->getAAInfo());
}
// If this is a legal vector store, try to combine it into a VST1_UPD.
if (ISD::isNormalStore(N) && VT.isVector() &&
DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
return CombineBaseUpdate(N, DCI);
return SDValue();
}
/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
/// can replace combinations of VMUL and VCVT (floating-point to integer)
/// when the VMUL has a constant operand that is a power of 2.
///
/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
/// vmul.f32 d16, d17, d16
/// vcvt.s32.f32 d16, d16
/// becomes:
/// vcvt.s32.f32 d16, d16, #3
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
SDValue Op = N->getOperand(0);
if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
Op.getOpcode() != ISD::FMUL)
return SDValue();
SDValue ConstVec = Op->getOperand(1);
if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
uint32_t FloatBits = FloatTy.getSizeInBits();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
uint32_t IntBits = IntTy.getSizeInBits();
unsigned NumLanes = Op.getValueType().getVectorNumElements();
if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
// These instructions only exist converting from f32 to i32. We can handle
// smaller integers by generating an extra truncate, but larger ones would
// be lossy. We also can't handle more then 4 lanes, since these intructions
// only support v2i32/v4i32 types.
return SDValue();
}
BitVector UndefElements;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
if (C == -1 || C == 0 || C > 32)
return SDValue();
SDLoc dl(N);
bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;
SDValue FixConv = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
DAG.getConstant(C, dl, MVT::i32));
if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
return FixConv;
}
/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
/// can replace combinations of VCVT (integer to floating-point) and VDIV
/// when the VDIV has a constant operand that is a power of 2.
///
/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
/// vcvt.f32.s32 d16, d16
/// vdiv.f32 d16, d17, d16
/// becomes:
/// vcvt.f32.s32 d16, d16, #3
static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
SDValue Op = N->getOperand(0);
unsigned OpOpcode = Op.getNode()->getOpcode();
if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
(OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
return SDValue();
SDValue ConstVec = N->getOperand(1);
if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
uint32_t FloatBits = FloatTy.getSizeInBits();
MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
uint32_t IntBits = IntTy.getSizeInBits();
unsigned NumLanes = Op.getValueType().getVectorNumElements();
if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
// These instructions only exist converting from i32 to f32. We can handle
// smaller integers by generating an extra extend, but larger ones would
// be lossy. We also can't handle more then 4 lanes, since these intructions
// only support v2i32/v4i32 types.
return SDValue();
}
BitVector UndefElements;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
if (C == -1 || C == 0 || C > 32)
return SDValue();
SDLoc dl(N);
bool isSigned = OpOpcode == ISD::SINT_TO_FP;
SDValue ConvInput = Op.getOperand(0);
if (IntBits < FloatBits)
ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
ConvInput);
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
Intrinsic::arm_neon_vcvtfxu2fp;
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
Op.getValueType(),
DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
ConvInput, DAG.getConstant(C, dl, MVT::i32));
}
/// Getvshiftimm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift operation, where all the elements of the
/// build_vector must have the same constant integer value.
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
// Ignore bit_converts.
while (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
HasAnyUndefs, ElementBits) ||
SplatBitSize > ElementBits)
return false;
Cnt = SplatBits.getSExtValue();
return true;
}
/// isVShiftLImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift left operation. That value must be in the range:
/// 0 <= Value < ElementBits for a left shift; or
/// 0 <= Value <= ElementBits for a long left shift.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
int64_t ElementBits = VT.getScalarSizeInBits();
if (! getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
}
/// isVShiftRImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift right operation. For a shift opcode, the value
/// is positive, but for an intrinsic the value count must be negative. The
/// absolute value must be in the range:
/// 1 <= |Value| <= ElementBits for a right shift; or
/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
int64_t ElementBits = VT.getScalarSizeInBits();
if (! getVShiftImm(Op, ElementBits, Cnt))
return false;
if (!isIntrinsic)
return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
Cnt = -Cnt;
return true;
}
return false;
}
/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IntNo) {
default:
// Don't do anything for most intrinsics.
break;
// Vector shifts: check for immediate versions and lower them.
// Note: This is done during DAG combining instead of DAG legalizing because
// the build_vectors for 64-bit vector element shift counts are generally
// not legal, and it is hard to see their values after they get legalized to
// loads from a constant pool.
case Intrinsic::arm_neon_vshifts:
case Intrinsic::arm_neon_vshiftu:
case Intrinsic::arm_neon_vrshifts:
case Intrinsic::arm_neon_vrshiftu:
case Intrinsic::arm_neon_vrshiftn:
case Intrinsic::arm_neon_vqshifts:
case Intrinsic::arm_neon_vqshiftu:
case Intrinsic::arm_neon_vqshiftsu:
case Intrinsic::arm_neon_vqshiftns:
case Intrinsic::arm_neon_vqshiftnu:
case Intrinsic::arm_neon_vqshiftnsu:
case Intrinsic::arm_neon_vqrshiftns:
case Intrinsic::arm_neon_vqrshiftnu:
case Intrinsic::arm_neon_vqrshiftnsu: {
EVT VT = N->getOperand(1).getValueType();
int64_t Cnt;
unsigned VShiftOpc = 0;
switch (IntNo) {
case Intrinsic::arm_neon_vshifts:
case Intrinsic::arm_neon_vshiftu:
if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
VShiftOpc = ARMISD::VSHL;
break;
}
if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
ARMISD::VSHRs : ARMISD::VSHRu);
break;
}
return SDValue();
case Intrinsic::arm_neon_vrshifts:
case Intrinsic::arm_neon_vrshiftu:
if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
break;
return SDValue();
case Intrinsic::arm_neon_vqshifts:
case Intrinsic::arm_neon_vqshiftu:
if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
break;
return SDValue();
case Intrinsic::arm_neon_vqshiftsu:
if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
break;
llvm_unreachable("invalid shift count for vqshlu intrinsic");
case Intrinsic::arm_neon_vrshiftn:
case Intrinsic::arm_neon_vqshiftns:
case Intrinsic::arm_neon_vqshiftnu:
case Intrinsic::arm_neon_vqshiftnsu:
case Intrinsic::arm_neon_vqrshiftns:
case Intrinsic::arm_neon_vqrshiftnu:
case Intrinsic::arm_neon_vqrshiftnsu:
// Narrowing shifts require an immediate right shift.
if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
break;
llvm_unreachable("invalid shift count for narrowing vector shift "
"intrinsic");
default:
llvm_unreachable("unhandled vector shift");
}
switch (IntNo) {
case Intrinsic::arm_neon_vshifts:
case Intrinsic::arm_neon_vshiftu:
// Opcode already set above.
break;
case Intrinsic::arm_neon_vrshifts:
VShiftOpc = ARMISD::VRSHRs; break;
case Intrinsic::arm_neon_vrshiftu:
VShiftOpc = ARMISD::VRSHRu; break;
case Intrinsic::arm_neon_vrshiftn:
VShiftOpc = ARMISD::VRSHRN; break;
case Intrinsic::arm_neon_vqshifts:
VShiftOpc = ARMISD::VQSHLs; break;
case Intrinsic::arm_neon_vqshiftu:
VShiftOpc = ARMISD::VQSHLu; break;
case Intrinsic::arm_neon_vqshiftsu:
VShiftOpc = ARMISD::VQSHLsu; break;
case Intrinsic::arm_neon_vqshiftns:
VShiftOpc = ARMISD::VQSHRNs; break;
case Intrinsic::arm_neon_vqshiftnu:
VShiftOpc = ARMISD::VQSHRNu; break;
case Intrinsic::arm_neon_vqshiftnsu:
VShiftOpc = ARMISD::VQSHRNsu; break;
case Intrinsic::arm_neon_vqrshiftns:
VShiftOpc = ARMISD::VQRSHRNs; break;
case Intrinsic::arm_neon_vqrshiftnu:
VShiftOpc = ARMISD::VQRSHRNu; break;
case Intrinsic::arm_neon_vqrshiftnsu:
VShiftOpc = ARMISD::VQRSHRNsu; break;
}
SDLoc dl(N);
return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
}
case Intrinsic::arm_neon_vshiftins: {
EVT VT = N->getOperand(1).getValueType();
int64_t Cnt;
unsigned VShiftOpc = 0;
if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
VShiftOpc = ARMISD::VSLI;
else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
VShiftOpc = ARMISD::VSRI;
else {
llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
}
SDLoc dl(N);
return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
N->getOperand(1), N->getOperand(2),
DAG.getConstant(Cnt, dl, MVT::i32));
}
case Intrinsic::arm_neon_vqrshifts:
case Intrinsic::arm_neon_vqrshiftu:
// No immediate versions of these to check for.
break;
}
return SDValue();
}
/// PerformShiftCombine - Checks for immediate versions of vector shifts and
/// lowers them. As with the vector shift intrinsics, this is done during DAG
/// combining instead of DAG legalizing because the build_vectors for 64-bit
/// vector element shift counts are generally not legal, and it is hard to see
/// their values after they get legalized to loads from a constant pool.
static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
// Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
// 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
SDValue N1 = N->getOperand(1);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
SDValue N0 = N->getOperand(0);
if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
DAG.MaskedValueIsZero(N0.getOperand(0),
APInt::getHighBitsSet(32, 16)))
return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
}
}
// Nothing to be done for scalar shifts.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
assert(ST->hasNEON() && "unexpected vector shift");
int64_t Cnt;
switch (N->getOpcode()) {
default: llvm_unreachable("unexpected shift opcode");
case ISD::SHL:
if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
SDLoc dl(N);
return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
DAG.getConstant(Cnt, dl, MVT::i32));
}
break;
case ISD::SRA:
case ISD::SRL:
if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
ARMISD::VSHRs : ARMISD::VSHRu);
SDLoc dl(N);
return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
DAG.getConstant(Cnt, dl, MVT::i32));
}
}
return SDValue();
}
/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
SDValue N0 = N->getOperand(0);
// Check for sign- and zero-extensions of vector extract operations of 8-
// and 16-bit vector elements. NEON supports these directly. They are
// handled during DAG combining because type legalization will promote them
// to 32-bit types and it is messy to recognize the operations after that.
if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
SDValue Vec = N0.getOperand(0);
SDValue Lane = N0.getOperand(1);
EVT VT = N->getValueType(0);
EVT EltVT = N0.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (VT == MVT::i32 &&
(EltVT == MVT::i8 || EltVT == MVT::i16) &&
TLI.isTypeLegal(Vec.getValueType()) &&
isa<ConstantSDNode>(Lane)) {
unsigned Opc = 0;
switch (N->getOpcode()) {
default: llvm_unreachable("unexpected opcode");
case ISD::SIGN_EXTEND:
Opc = ARMISD::VGETLANEs;
break;
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
Opc = ARMISD::VGETLANEu;
break;
}
return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
}
}
return SDValue();
}
SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
// If we have a CMOV, OR and AND combination such as:
// if (x & CN)
// y |= CM;
//
// And:
// * CN is a single bit;
// * All bits covered by CM are known zero in y
//
// Then we can convert this into a sequence of BFI instructions. This will
// always be a win if CM is a single bit, will always be no worse than the
// TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
// three bits (due to the extra IT instruction).
SDValue Op0 = CMOV->getOperand(0);
SDValue Op1 = CMOV->getOperand(1);
auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
auto CC = CCNode->getAPIntValue().getLimitedValue();
SDValue CmpZ = CMOV->getOperand(4);
// The compare must be against zero.
if (!isNullConstant(CmpZ->getOperand(1)))
return SDValue();
assert(CmpZ->getOpcode() == ARMISD::CMPZ);
SDValue And = CmpZ->getOperand(0);
if (And->getOpcode() != ISD::AND)
return SDValue();
ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
if (!AndC || !AndC->getAPIntValue().isPowerOf2())
return SDValue();
SDValue X = And->getOperand(0);
if (CC == ARMCC::EQ) {
// We're performing an "equal to zero" compare. Swap the operands so we
// canonicalize on a "not equal to zero" compare.
std::swap(Op0, Op1);
} else {
assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
}
if (Op1->getOpcode() != ISD::OR)
return SDValue();
ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
if (!OrC)
return SDValue();
SDValue Y = Op1->getOperand(0);
if (Op0 != Y)
return SDValue();
// Now, is it profitable to continue?
APInt OrCI = OrC->getAPIntValue();
unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
if (OrCI.countPopulation() > Heuristic)
return SDValue();
// Lastly, can we determine that the bits defined by OrCI
// are zero in Y?
KnownBits Known;
DAG.computeKnownBits(Y, Known);
if ((OrCI & Known.Zero) != OrCI)
return SDValue();
// OK, we can do the combine.
SDValue V = Y;
SDLoc dl(X);
EVT VT = X.getValueType();
unsigned BitInX = AndC->getAPIntValue().logBase2();
if (BitInX != 0) {
// We must shift X first.
X = DAG.getNode(ISD::SRL, dl, VT, X,
DAG.getConstant(BitInX, dl, VT));
}
for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
BitInY < NumActiveBits; ++BitInY) {
if (OrCI[BitInY] == 0)
continue;
APInt Mask(VT.getSizeInBits(), 0);
Mask.setBit(BitInY);
V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
// Confusingly, the operand is an *inverted* mask.
DAG.getConstant(~Mask, dl, VT));
}
return V;
}
/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
SDValue
ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
SDValue Cmp = N->getOperand(4);
if (Cmp.getOpcode() != ARMISD::CMPZ)
// Only looking at NE cases.
return SDValue();
EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);
SDValue Chain = N->getOperand(0);
SDValue BB = N->getOperand(1);
SDValue ARMcc = N->getOperand(2);
ARMCC::CondCodes CC =
(ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
// (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
// -> (brcond Chain BB CC CPSR Cmp)
if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
LHS->getOperand(0)->hasOneUse()) {
auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
if ((LHS00C && LHS00C->getZExtValue() == 0) &&
(LHS01C && LHS01C->getZExtValue() == 1) &&
(LHS1C && LHS1C->getZExtValue() == 1) &&
(RHSC && RHSC->getZExtValue() == 0)) {
return DAG.getNode(
ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
}
}
return SDValue();
}
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
SDValue
ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
SDValue Cmp = N->getOperand(4);
if (Cmp.getOpcode() != ARMISD::CMPZ)
// Only looking at EQ and NE cases.
return SDValue();
EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);
SDValue FalseVal = N->getOperand(0);
SDValue TrueVal = N->getOperand(1);
SDValue ARMcc = N->getOperand(2);
ARMCC::CondCodes CC =
(ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
// BFI is only available on V6T2+.
if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
SDValue R = PerformCMOVToBFICombine(N, DAG);
if (R)
return R;
}
// Simplify
// mov r1, r0
// cmp r1, x
// mov r0, y
// moveq r0, x
// to
// cmp r0, x
// movne r0, y
//
// mov r1, r0
// cmp r1, x
// mov r0, x
// movne r0, y
// to
// cmp r0, x
// movne r0, y
/// FIXME: Turn this into a target neutral optimization?
SDValue Res;
if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
N->getOperand(3), Cmp);
} else if (CC == ARMCC::EQ && TrueVal == RHS) {
SDValue ARMcc;
SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
N->getOperand(3), NewCmp);
}
// (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
// -> (cmov F T CC CPSR Cmp)
if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
if ((LHS0C && LHS0C->getZExtValue() == 0) &&
(LHS1C && LHS1C->getZExtValue() == 1) &&
(RHSC && RHSC->getZExtValue() == 0)) {
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
LHS->getOperand(2), LHS->getOperand(3),
LHS->getOperand(4));
}
}
if (Res.getNode()) {
KnownBits Known;
DAG.computeKnownBits(SDValue(N,0), Known);
// Capture demanded bits information that would be otherwise lost.
if (Known.Zero == 0xfffffffe)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
DAG.getValueType(MVT::i1));
else if (Known.Zero == 0xffffff00)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
DAG.getValueType(MVT::i8));
else if (Known.Zero == 0xffff0000)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
DAG.getValueType(MVT::i16));
}
return Res;
}
SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ARMISD::ADDC:
case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget);
case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
case ISD::STORE: return PerformSTORECombine(N, DCI);
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ARMISD::VDUP: return PerformVDUPCombine(N, DCI);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return PerformVCVTCombine(N, DCI.DAG, Subtarget);
case ISD::FDIV:
return PerformVDIVCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
case ISD::LOAD: return PerformLOADCombine(N, DCI);
case ARMISD::VLD1DUP:
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
case ARMISD::VLD4DUP:
return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
case ARMISD::SMULWB: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
return SDValue();
break;
}
case ARMISD::SMULWT: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
return SDValue();
break;
}
case ARMISD::SMLALBB: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
(SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
return SDValue();
break;
}
case ARMISD::SMLALBT: {
unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
(SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
return SDValue();
break;
}
case ARMISD::SMLALTB: {
unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
(SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
return SDValue();
break;
}
case ARMISD::SMLALTT: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
(SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
return SDValue();
break;
}
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
case Intrinsic::arm_neon_vld4:
case Intrinsic::arm_neon_vld2lane:
case Intrinsic::arm_neon_vld3lane:
case Intrinsic::arm_neon_vld4lane:
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
case Intrinsic::arm_neon_vst4:
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane:
return PerformVLDCombine(N, DCI);
default: break;
}
break;
}
return SDValue();
}
bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
EVT VT) const {
return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
}
bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned,
unsigned,
bool *Fast) const {
// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
switch (VT.getSimpleVT().SimpleTy) {
default:
return false;
case MVT::i8:
case MVT::i16:
case MVT::i32: {
// Unaligned access can use (for example) LRDB, LRDH, LDR
if (AllowsUnaligned) {
if (Fast)
*Fast = Subtarget->hasV7Ops();
return true;
}
return false;
}
case MVT::f64:
case MVT::v2f64: {
// For any little-endian targets with neon, we can support unaligned ld/st
// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
// A big-endian target may also explicitly support unaligned accesses
if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
if (Fast)
*Fast = true;
return true;
}
return false;
}
}
}
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
unsigned AlignCheck) {
return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
(DstAlign == 0 || DstAlign % AlignCheck == 0));
}
EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function *F = MF.getFunction();
// See if we can use NEON instructions for this...
if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
bool Fast;
if (Size >= 16 &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
return MVT::v2f64;
} else if (Size >= 8 &&
(memOpAlign(SrcAlign, DstAlign, 8) ||
(allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
Fast))) {
return MVT::f64;
}
}
// Let the target-independent logic figure it out.
return MVT::Other;
}
bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
if (Val.getOpcode() != ISD::LOAD)
return false;
EVT VT1 = Val.getValueType();
if (!VT1.isSimple() || !VT1.isInteger() ||
!VT2.isSimple() || !VT2.isInteger())
return false;
switch (VT1.getSimpleVT().SimpleTy) {
default: break;
case MVT::i1:
case MVT::i8:
case MVT::i16:
// 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
return true;
}
return false;
}
bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
EVT VT = ExtVal.getValueType();
if (!isTypeLegal(VT))
return false;
// Don't create a loadext if we can fold the extension into a wide/long
// instruction.
// If there's more than one user instruction, the loadext is desirable no
// matter what. There can be two uses by the same instruction.
if (ExtVal->use_empty() ||
!ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
return true;
SDNode *U = *ExtVal->use_begin();
if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
return false;
return true;
}
bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
if (!isTypeLegal(EVT::getEVT(Ty1)))
return false;
assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
// Assuming the caller doesn't have a zeroext or signext return parameter,
// truncation all the way down to i1 is valid.
return true;
}
int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
if (isLegalAddressingMode(DL, AM, Ty, AS)) {
if (Subtarget->hasFPAO())
return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
return 0;
}
return -1;
}
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
unsigned Scale = 1;
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
case MVT::i8:
// Scale == 1;
break;
case MVT::i16:
// Scale == 2;
Scale = 2;
break;
case MVT::i32:
// Scale == 4;
Scale = 4;
break;
}
if ((V & (Scale - 1)) != 0)
return false;
V /= Scale;
return V == (V & ((1LL << 5) - 1));
}
static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
const ARMSubtarget *Subtarget) {
bool isNeg = false;
if (V < 0) {
isNeg = true;
V = - V;
}
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
// + imm12 or - imm8
if (isNeg)
return V == (V & ((1LL << 8) - 1));
return V == (V & ((1LL << 12) - 1));
case MVT::f32:
case MVT::f64:
// Same as ARM mode. FIXME: NEON?
if (!Subtarget->hasVFP2())
return false;
if ((V & 3) != 0)
return false;
V >>= 2;
return V == (V & ((1LL << 8) - 1));
}
}
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode for load / store of the
/// given type.
static bool isLegalAddressImmediate(int64_t V, EVT VT,
const ARMSubtarget *Subtarget) {
if (V == 0)
return true;
if (!VT.isSimple())
return false;
if (Subtarget->isThumb1Only())
return isLegalT1AddressImmediate(V, VT);
else if (Subtarget->isThumb2())
return isLegalT2AddressImmediate(V, VT, Subtarget);
// ARM mode.
if (V < 0)
V = - V;
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
case MVT::i8:
case MVT::i32:
// +- imm12
return V == (V & ((1LL << 12) - 1));
case MVT::i16:
// +- imm8
return V == (V & ((1LL << 8) - 1));
case MVT::f32:
case MVT::f64:
if (!Subtarget->hasVFP2()) // FIXME: NEON?
return false;
if ((V & 3) != 0)
return false;
V >>= 2;
return V == (V & ((1LL << 8) - 1));
}
}
bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
EVT VT) const {
int Scale = AM.Scale;
if (Scale < 0)
return false;
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
if (Scale == 1)
return true;
// r + r << imm
Scale = Scale & ~1;
return Scale == 2 || Scale == 4 || Scale == 8;
case MVT::i64:
// r + r
if (((unsigned)AM.HasBaseReg + Scale) <= 2)
return true;
return false;
case MVT::isVoid:
// Note, we allow "void" uses (basically, uses that aren't loads or
// stores), because arm allows folding a scale into many arithmetic
// operations. This should be made more precise and revisited later.
// Allow r << imm, but the imm has to be a multiple of two.
if (Scale & 1) return false;
return isPowerOf2_32(Scale);
}
}
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
EVT VT = getValueType(DL, Ty, true);
if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
return false;
// Can never fold addr of global into load/store.
if (AM.BaseGV)
return false;
switch (AM.Scale) {
case 0: // no scale reg, must be "r+i" or "r", or "i".
break;
case 1:
if (Subtarget->isThumb1Only())
return false;
LLVM_FALLTHROUGH;
default:
// ARM doesn't support any R+R*scale+imm addr modes.
if (AM.BaseOffs)
return false;
if (!VT.isSimple())
return false;
if (Subtarget->isThumb2())
return isLegalT2ScaledAddressingMode(AM, VT);
int Scale = AM.Scale;
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
case MVT::i8:
case MVT::i32:
if (Scale < 0) Scale = -Scale;
if (Scale == 1)
return true;
// r + r << imm
return isPowerOf2_32(Scale & ~1);
case MVT::i16:
case MVT::i64:
// r + r
if (((unsigned)AM.HasBaseReg + Scale) <= 2)
return true;
return false;
case MVT::isVoid:
// Note, we allow "void" uses (basically, uses that aren't loads or
// stores), because arm allows folding a scale into many arithmetic
// operations. This should be made more precise and revisited later.
// Allow r << imm, but the imm has to be a multiple of two.
if (Scale & 1) return false;
return isPowerOf2_32(Scale);
}
}
return true;
}
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can compare
/// a register against the immediate without having to materialize the
/// immediate into a register.
bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// Thumb2 and ARM modes can use cmn for negative immediates.
if (!Subtarget->isThumb())
return ARM_AM::getSOImmVal(std::abs(Imm)) != -1;
if (Subtarget->isThumb2())
return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1;
// Thumb1 doesn't have cmn, and only 8-bit immediates.
return Imm >= 0 && Imm <= 255;
}
/// isLegalAddImmediate - Return true if the specified immediate is a legal add
/// *or sub* immediate, that is the target has add or sub instructions which can
/// add a register with the immediate without having to materialize the
/// immediate into a register.
bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Same encoding for add/sub, just flip the sign.
int64_t AbsImm = std::abs(Imm);
if (!Subtarget->isThumb())
return ARM_AM::getSOImmVal(AbsImm) != -1;
if (Subtarget->isThumb2())
return ARM_AM::getT2SOImmVal(AbsImm) != -1;
// Thumb1 only has 8-bit unsigned immediate.
return AbsImm >= 0 && AbsImm <= 255;
}
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
bool isSEXTLoad, SDValue &Base,
SDValue &Offset, bool &isInc,
SelectionDAG &DAG) {
if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
return false;
if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
// AddressingMode 3
Base = Ptr->getOperand(0);
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
if (RHSC < 0 && RHSC > -256) {
assert(Ptr->getOpcode() == ISD::ADD);
isInc = false;
Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
return true;
}
}
isInc = (Ptr->getOpcode() == ISD::ADD);
Offset = Ptr->getOperand(1);
return true;
} else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
// AddressingMode 2
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
if (RHSC < 0 && RHSC > -0x1000) {
assert(Ptr->getOpcode() == ISD::ADD);
isInc = false;
Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
Base = Ptr->getOperand(0);
return true;
}
}
if (Ptr->getOpcode() == ISD::ADD) {
isInc = true;
ARM_AM::ShiftOpc ShOpcVal=
ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
Base = Ptr->getOperand(1);
Offset = Ptr->getOperand(0);
} else {
Base = Ptr->getOperand(0);
Offset = Ptr->getOperand(1);
}
return true;
}
isInc = (Ptr->getOpcode() == ISD::ADD);
Base = Ptr->getOperand(0);
Offset = Ptr->getOperand(1);
return true;
}
// FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
return false;
}
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
bool isSEXTLoad, SDValue &Base,
SDValue &Offset, bool &isInc,
SelectionDAG &DAG) {
if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
return false;
Base = Ptr->getOperand(0);
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
assert(Ptr->getOpcode() == ISD::ADD);
isInc = false;
Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
return true;
} else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
isInc = Ptr->getOpcode() == ISD::ADD;
Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
return true;
}
}
return false;
}
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
bool
ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
if (Subtarget->isThumb1Only())
return false;
EVT VT;
SDValue Ptr;
bool isSEXTLoad = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
} else
return false;
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
Offset, isInc, DAG);
else
isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
Offset, isInc, DAG);
if (!isLegal)
return false;
AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
return true;
}
/// getPostIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if this node can be
/// combined with a load / store to form a post-indexed load / store.
bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
EVT VT;
SDValue Ptr;
bool isSEXTLoad = false, isNonExt;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
isNonExt = !ST->isTruncatingStore();
} else
return false;
if (Subtarget->isThumb1Only()) {
// Thumb-1 can do a limited post-inc load or store as an updating LDM. It
// must be non-extending/truncating, i32, with an offset of 4.
assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
if (Op->getOpcode() != ISD::ADD || !isNonExt)
return false;
auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!RHS || RHS->getZExtValue() != 4)
return false;
Offset = Op->getOperand(1);
Base = Op->getOperand(0);
AM = ISD::POST_INC;
return true;
}
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
else
isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
if (!isLegal)
return false;
if (Ptr != Base) {
// Swap base ptr and offset to catch more post-index load / store when
// it's legal. In Thumb2 mode, offset must be an immediate.
if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
!Subtarget->isThumb2())
std::swap(Base, Offset);
// Post-indexed load / store update the base pointer.
if (Ptr != Base)
return false;
}
AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
return true;
}
void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = Known.getBitWidth();
Known.resetAll();
switch (Op.getOpcode()) {
default: break;
case ARMISD::ADDC:
case ARMISD::ADDE:
case ARMISD::SUBC:
case ARMISD::SUBE:
// These nodes' second result is a boolean
if (Op.getResNo() == 0)
break;
Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1);
if (Known.isUnknown())
return;
KnownBits KnownRHS;
DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1);
Known.Zero &= KnownRHS.Zero;
Known.One &= KnownRHS.One;
return;
}
case ISD::INTRINSIC_W_CHAIN: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
switch (IntID) {
default: return;
case Intrinsic::arm_ldaex:
case Intrinsic::arm_ldrex: {
EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
return;
}
}
}
case ARMISD::BFI: {
// Conservatively, we can recurse down the first operand
// and just mask out all affected bits.
DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
// The operand to BFI is already a mask suitable for removing the bits it
// sets.
ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
const APInt &Mask = CI->getAPIntValue();
Known.Zero &= Mask;
Known.One &= Mask;
return;
}
}
}
//===----------------------------------------------------------------------===//
// ARM Inline Assembly Support
//===----------------------------------------------------------------------===//
bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
// Looking for "rev" which is V6+.
if (!Subtarget->hasV6Ops())
return false;
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
std::string AsmStr = IA->getAsmString();
SmallVector<StringRef, 4> AsmPieces;
SplitString(AsmStr, AsmPieces, ";\n");
switch (AsmPieces.size()) {
default: return false;
case 1:
AsmStr = AsmPieces[0];
AsmPieces.clear();
SplitString(AsmStr, AsmPieces, " \t,");
// rev $0, $1
if (AsmPieces.size() == 3 &&
AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (Ty && Ty->getBitWidth() == 32)
return IntrinsicLowering::LowerToByteSwap(CI);
}
break;
}
return false;
}
const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
// At this point, we have to lower this constraint to something else, so we
// lower it to an "r" or "w". However, by doing this we will force the result
// to be in register, while the X constraint is much more permissive.
//
// Although we are correct (we are free to emit anything, without
// constraints), we might break use cases that would expect us to be more
// efficient and emit something else.
if (!Subtarget->hasVFP2())
return "r";
if (ConstraintVT.isFloatingPoint())
return "w";
if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
(ConstraintVT.getSizeInBits() == 64 ||
ConstraintVT.getSizeInBits() == 128))
return "w";
return "r";
}
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
ARMTargetLowering::ConstraintType
ARMTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
case 'l': return C_RegisterClass;
case 'w': return C_RegisterClass;
case 'h': return C_RegisterClass;
case 'x': return C_RegisterClass;
case 't': return C_RegisterClass;
case 'j': return C_Other; // Constant for movw.
// An address with a single base register. Due to the way we
// currently handle addresses it is the same as an 'r' memory constraint.
case 'Q': return C_Memory;
}
} else if (Constraint.size() == 2) {
switch (Constraint[0]) {
default: break;
// All 'U+' constraints are addresses.
case 'U': return C_Memory;
}
}
return TargetLowering::getConstraintType(Constraint);
}
/// Examine constraint type and operand type and determine a weight value.
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
TargetLowering::ConstraintWeight
ARMTargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
break;
case 'l':
if (type->isIntegerTy()) {
if (Subtarget->isThumb())
weight = CW_SpecificReg;
else
weight = CW_Register;
}
break;
case 'w':
if (type->isFloatingPointTy())
weight = CW_Register;
break;
}
return weight;
}
typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
// GCC ARM Constraint Letters
switch (Constraint[0]) {
case 'l': // Low regs or general regs.
if (Subtarget->isThumb())
return RCPair(0U, &ARM::tGPRRegClass);
return RCPair(0U, &ARM::GPRRegClass);
case 'h': // High regs or no regs.
if (Subtarget->isThumb())
return RCPair(0U, &ARM::hGPRRegClass);
break;
case 'r':
if (Subtarget->isThumb1Only())
return RCPair(0U, &ARM::tGPRRegClass);
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
if (VT == MVT::Other)
break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::DPRRegClass);
if (VT.getSizeInBits() == 128)
return RCPair(0U, &ARM::QPRRegClass);
break;
case 'x':
if (VT == MVT::Other)
break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::DPR_8RegClass);
if (VT.getSizeInBits() == 128)
return RCPair(0U, &ARM::QPR_8RegClass);
break;
case 't':
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPRRegClass);
break;
}
}
if (StringRef("{cc}").equals_lower(Constraint))
return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Currently only support length 1 constraints.
if (Constraint.length() != 1) return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default: break;
case 'j':
case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O':
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
if (!C)
return;
int64_t CVal64 = C->getSExtValue();
int CVal = (int) CVal64;
// None of these constraints allow values larger than 32 bits. Check
// that the value fits in an int.
if (CVal != CVal64)
return;
switch (ConstraintLetter) {
case 'j':
// Constant suitable for movw, must be between 0 and
// 65535.
if (Subtarget->hasV6T2Ops())
if (CVal >= 0 && CVal <= 65535)
break;
return;
case 'I':
if (Subtarget->isThumb1Only()) {
// This must be a constant between 0 and 255, for ADD
// immediates.
if (CVal >= 0 && CVal <= 255)
break;
} else if (Subtarget->isThumb2()) {
// A constant that can be used as an immediate value in a
// data-processing instruction.
if (ARM_AM::getT2SOImmVal(CVal) != -1)
break;
} else {
// A constant that can be used as an immediate value in a
// data-processing instruction.
if (ARM_AM::getSOImmVal(CVal) != -1)
break;
}
return;
case 'J':
if (Subtarget->isThumb1Only()) {
// This must be a constant between -255 and -1, for negated ADD
// immediates. This can be used in GCC with an "n" modifier that
// prints the negated value, for use with SUB instructions. It is
// not useful otherwise but is implemented for compatibility.
if (CVal >= -255 && CVal <= -1)
break;
} else {
// This must be a constant between -4095 and 4095. It is not clear
// what this constraint is intended for. Implemented for
// compatibility with GCC.
if (CVal >= -4095 && CVal <= 4095)
break;
}
return;
case 'K':
if (Subtarget->isThumb1Only()) {
// A 32-bit value where only one byte has a nonzero value. Exclude
// zero to match GCC. This constraint is used by GCC internally for
// constants that can be loaded with a move/shift combination.
// It is not useful otherwise but is implemented for compatibility.
if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
break;
} else if (Subtarget->isThumb2()) {
// A constant whose bitwise inverse can be used as an immediate
// value in a data-processing instruction. This can be used in GCC
// with a "B" modifier that prints the inverted value, for use with
// BIC and MVN instructions. It is not useful otherwise but is
// implemented for compatibility.
if (ARM_AM::getT2SOImmVal(~CVal) != -1)
break;
} else {
// A constant whose bitwise inverse can be used as an immediate
// value in a data-processing instruction. This can be used in GCC
// with a "B" modifier that prints the inverted value, for use with
// BIC and MVN instructions. It is not useful otherwise but is
// implemented for compatibility.
if (ARM_AM::getSOImmVal(~CVal) != -1)
break;
}
return;
case 'L':
if (Subtarget->isThumb1Only()) {
// This must be a constant between -7 and 7,
// for 3-operand ADD/SUB immediate instructions.
if (CVal >= -7 && CVal < 7)
break;
} else if (Subtarget->isThumb2()) {
// A constant whose negation can be used as an immediate value in a
// data-processing instruction. This can be used in GCC with an "n"
// modifier that prints the negated value, for use with SUB
// instructions. It is not useful otherwise but is implemented for
// compatibility.
if (ARM_AM::getT2SOImmVal(-CVal) != -1)
break;
} else {
// A constant whose negation can be used as an immediate value in a
// data-processing instruction. This can be used in GCC with an "n"
// modifier that prints the negated value, for use with SUB
// instructions. It is not useful otherwise but is implemented for
// compatibility.
if (ARM_AM::getSOImmVal(-CVal) != -1)
break;
}
return;
case 'M':
if (Subtarget->isThumb1Only()) {
// This must be a multiple of 4 between 0 and 1020, for
// ADD sp + immediate.
if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
break;
} else {
// A power of two or a constant between 0 and 32. This is used in
// GCC for the shift amount on shifted register operands, but it is
// useful in general for any shift amounts.
if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
break;
}
return;
case 'N':
if (Subtarget->isThumb()) { // FIXME thumb2
// This must be a constant between 0 and 31, for shift amounts.
if (CVal >= 0 && CVal <= 31)
break;
}
return;
case 'O':
if (Subtarget->isThumb()) { // FIXME thumb2
// This must be a multiple of 4 between -508 and 508, for
// ADD/SUB sp = sp + immediate.
if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
break;
}
return;
}
Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
break;
}
if (Result.getNode()) {
Ops.push_back(Result);
return;
}
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
static RTLIB::Libcall getDivRemLibcall(
const SDNode *N, MVT::SimpleValueType SVT) {
assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
"Unhandled Opcode in getDivRemLibcall");
bool isSigned = N->getOpcode() == ISD::SDIVREM ||
N->getOpcode() == ISD::SREM;
RTLIB::Libcall LC;
switch (SVT) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
}
return LC;
}
static TargetLowering::ArgListTy getDivRemArgList(
const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
"Unhandled Opcode in getDivRemArgList");
bool isSigned = N->getOpcode() == ISD::SDIVREM ||
N->getOpcode() == ISD::SREM;
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
EVT ArgVT = N->getOperand(i).getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*Context);
Entry.Node = N->getOperand(i);
Entry.Ty = ArgTy;
Entry.IsSExt = isSigned;
Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
if (Subtarget->isTargetWindows() && Args.size() >= 2)
std::swap(Args[0], Args[1]);
return Args;
}
SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
Subtarget->isTargetWindows()) &&
"Register-based DivRem lowering only");
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
"Invalid opcode for Div/Rem lowering");
bool isSigned = (Opcode == ISD::SDIVREM);
EVT VT = Op->getValueType(0);
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
SDLoc dl(Op);
// If the target has hardware divide, use divide + multiply + subtract:
// div = a / b
// rem = a - b * div
// return {div, rem}
// This should be lowered into UDIV/SDIV + MLS later on.
bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
: Subtarget->hasDivideInARMMode();
if (hasDivide && Op->getValueType(0).isSimple() &&
Op->getSimpleValueType(0) == MVT::i32) {
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
const SDValue Dividend = Op->getOperand(0);
const SDValue Divisor = Op->getOperand(1);
SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
SDValue Values[2] = {Div, Rem};
return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
}
RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
VT.getSimpleVT().SimpleTy);
SDValue InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
DAG.getContext(),
Subtarget);
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
Type *RetTy = StructType::get(Ty, Ty);
if (Subtarget->isTargetWindows())
InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return CallInfo.first;
}
// Lowers REM using divmod helpers
// see RTABI section 4.2/4.3
SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
// Build return types (div and rem)
std::vector<Type*> RetTyParams;
Type *RetTyElement;
switch (N->getValueType(0).getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
}
RetTyParams.push_back(RetTyElement);
RetTyParams.push_back(RetTyElement);
ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
Type *RetTy = StructType::get(*DAG.getContext(), ret);
RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
SimpleTy);
SDValue InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(),
Subtarget);
bool isSigned = N->getOpcode() == ISD::SREM;
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
if (Subtarget->isTargetWindows())
InChain = WinDBZCheckDenominator(DAG, N, InChain);
// Lower call
CallLoweringInfo CLI(DAG);
CLI.setChain(InChain)
.setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
.setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
// Return second (rem) result operand (first contains div)
SDNode *ResNode = CallResult.first.getNode();
assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
return ResNode->getOperand(1);
}
SDValue
ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "unsupported target platform");
SDLoc DL(Op);
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
DAG.getConstant(2, DL, MVT::i32));
SDValue Flag;
Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
Flag = Chain.getValue(1);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
Chain = NewSP.getValue(1);
SDValue Ops[2] = { NewSP, Chain };
return DAG.getMergeValues(Ops, DL);
}
SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
"Unexpected type for custom-lowering FP_EXTEND");
RTLIB::Libcall LC;
LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
SDLoc(Op)).first;
}
SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOperand(0).getValueType() == MVT::f64 &&
Subtarget->isFPOnlySP() &&
"Unexpected type for custom-lowering FP_ROUND");
RTLIB::Libcall LC;
LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
SDLoc(Op)).first;
}
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
return false;
}
bool ARM::isBitFieldInvertedMask(unsigned v) {
if (v == 0xffffffff)
return false;
// there can be 1's on either or both "outsides", all the "inside"
// bits must be 0's
return isShiftedMask_32(~v);
}
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
if (!Subtarget->hasVFP3())
return false;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
return ARM_AM::getFP64Imm(Imm) != -1;
return false;
}
/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
/// specified in the intrinsic calls.
bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
switch (Intrinsic) {
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
case Intrinsic::arm_neon_vld4:
case Intrinsic::arm_neon_vld2lane:
case Intrinsic::arm_neon_vld3lane:
case Intrinsic::arm_neon_vld4lane: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
Info.vol = false; // volatile loads with NEON intrinsics not supported
Info.readMem = true;
Info.writeMem = false;
return true;
}
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
case Intrinsic::arm_neon_vst4:
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
unsigned NumElts = 0;
for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
Info.vol = false; // volatile stores with NEON intrinsics not supported
Info.readMem = false;
Info.writeMem = true;
return true;
}
case Intrinsic::arm_ldaex:
case Intrinsic::arm_ldrex: {
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = true;
Info.writeMem = false;
return true;
}
case Intrinsic::arm_stlex:
case Intrinsic::arm_strex: {
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = false;
Info.writeMem = true;
return true;
}
case Intrinsic::arm_stlexd:
case Intrinsic::arm_strexd:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = 8;
Info.vol = true;
Info.readMem = false;
Info.writeMem = true;
return true;
case Intrinsic::arm_ldaexd:
case Intrinsic::arm_ldrexd:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = 8;
Info.vol = true;
Info.readMem = true;
Info.writeMem = false;
return true;
default:
break;
}
return false;
}
/// \brief Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
assert(Ty->isIntegerTy());
unsigned Bits = Ty->getPrimitiveSizeInBits();
if (Bits == 0 || Bits > 32)
return false;
return true;
}
bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
return (Index == 0 || Index == ResVT.getVectorNumElements());
}
Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
ARM_MB::MemBOpt Domain) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
// First, if the target has no DMB, see what fallback we can use.
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
Builder.getInt32(0), Builder.getInt32(7),
Builder.getInt32(10), Builder.getInt32(5)};
return Builder.CreateCall(MCR, args);
} else {
// Instead of using barriers, atomic accesses on these subtargets use
// libcalls.
llvm_unreachable("makeDMB on a target so old that it has no barriers");
}
} else {
Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
// Only a full system barrier exists in the M-class architectures.
Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
Constant *CDomain = Builder.getInt32(Domain);
return Builder.CreateCall(DMB, CDomain);
}
}
// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
switch (Ord) {
case AtomicOrdering::NotAtomic:
case AtomicOrdering::Unordered:
llvm_unreachable("Invalid fence: unordered/non-atomic");
case AtomicOrdering::Monotonic:
case AtomicOrdering::Acquire:
return nullptr; // Nothing to do
case AtomicOrdering::SequentiallyConsistent:
if (!Inst->hasAtomicStore())
return nullptr; // Nothing to do
/*FALLTHROUGH*/
case AtomicOrdering::Release:
case AtomicOrdering::AcquireRelease:
if (Subtarget->preferISHSTBarriers())
return makeDMB(Builder, ARM_MB::ISHST);
// FIXME: add a comment with a link to documentation justifying this.
else
return makeDMB(Builder, ARM_MB::ISH);
}
llvm_unreachable("Unknown fence ordering in emitLeadingFence");
}
Instruction *ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
switch (Ord) {
case AtomicOrdering::NotAtomic:
case AtomicOrdering::Unordered:
llvm_unreachable("Invalid fence: unordered/not-atomic");
case AtomicOrdering::Monotonic:
case AtomicOrdering::Release:
return nullptr; // Nothing to do
case AtomicOrdering::Acquire:
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
return makeDMB(Builder, ARM_MB::ISH);
}
llvm_unreachable("Unknown fence ordering in emitTrailingFence");
}
// Loads and stores less than 64-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
// anything for those.
bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
return (Size == 64) && !Subtarget->isMClass();
}
// Loads and stores less than 64-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
// anything for those.
// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
// guarantee, see DDI0406C ARM architecture reference manual,
// sections A8.8.72-74 LDRD)
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
: AtomicExpansionKind::None;
}
// For the real atomic operations, we have ldrex/strex up to 32 bits,
// and up to 64 bits on the non-M profiles
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
? AtomicExpansionKind::LLSC
: AtomicExpansionKind::None;
}
bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
bool hasAtomicCmpXchg =
!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg;
}
bool ARMTargetLowering::shouldInsertFencesForAtomic(
const Instruction *I) const {
return InsertFencesForAtomic;
}
// This has so far only been implemented for MachO.
bool ARMTargetLowering::useLoadStackGuardNode() const {
return Subtarget->isTargetMachO();
}
bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const {
// If we do not have NEON, vector types are not natively supported.
if (!Subtarget->hasNEON())
return false;
// Floating point values and vector values map to the same register file.
// Therefore, although we could do a store extract of a vector type, this is
// better to leave at float as we have more freedom in the addressing mode for
// those.
if (VectorTy->isFPOrFPVectorTy())
return false;
// If the index is unknown at compile time, this is very expensive to lower
// and it is not possible to combine the store with the extract.
if (!isa<ConstantInt>(Idx))
return false;
assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
// We can do a store + vector extract on any vector that fits perfectly in a D
// or Q register.
if (BitWidth == 64 || BitWidth == 128) {
Cost = 0;
return true;
}
return false;
}
bool ARMTargetLowering::isCheapToSpeculateCttz() const {
return Subtarget->hasV6T2Ops();
}
bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget->hasV6T2Ops();
}
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
bool IsAcquire = isAcquireOrStronger(Ord);
// Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i32, i32} and we have to recombine them into a
// single i64 here.
if (ValTy->getPrimitiveSizeInBits() == 64) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
Function *Ldrex = Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
if (!Subtarget->isLittle())
std::swap (Lo, Hi);
Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
}
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateTruncOrBitCast(
Builder.CreateCall(Ldrex, Addr),
cast<PointerType>(Addr->getType())->getElementType());
}
void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
IRBuilder<> &Builder) const {
if (!Subtarget->hasV7Ops())
return;
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
}
Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
bool IsRelease = isReleaseOrStronger(Ord);
// Since the intrinsics must have legal type, the i64 intrinsics take two
// parameters: "i32, i32". We must marshal Val into the appropriate form
// before the call.
if (Val->getType()->getPrimitiveSizeInBits() == 64) {
Intrinsic::ID Int =
IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
Function *Strex = Intrinsic::getDeclaration(M, Int);
Type *Int32Ty = Type::getInt32Ty(M->getContext());
Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
if (!Subtarget->isLittle())
std::swap (Lo, Hi);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
return Builder.CreateCall(Strex, {Lo, Hi, Addr});
}
Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
Type *Tys[] = { Addr->getType() };
Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateCall(
Strex, {Builder.CreateZExtOrBitCast(
Val, Strex->getFunctionType()->getParamType(0)),
Addr});
}
/// A helper function for determining the number of interleaved accesses we
/// will generate when lowering accesses of the given type.
unsigned
ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
const DataLayout &DL) const {
return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
}
bool ARMTargetLowering::isLegalInterleavedAccessType(
VectorType *VecTy, const DataLayout &DL) const {
unsigned VecSize = DL.getTypeSizeInBits(VecTy);
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
// Ensure the vector doesn't have f16 elements. Even though we could do an
// i16 vldN, we can't hold the f16 vectors and will end up converting via
// f32.
if (VecTy->getElementType()->isHalfTy())
return false;
// Ensure the number of vector elements is greater than 1.
if (VecTy->getNumElements() < 2)
return false;
// Ensure the element type is legal.
if (ElSize != 8 && ElSize != 16 && ElSize != 32)
return false;
// Ensure the total vector size is 64 or a multiple of 128. Types larger than
// 128 will be split into multiple interleaved accesses.
return VecSize == 64 || VecSize % 128 == 0;
}
/// \brief Lower an interleaved load into a vldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
///
/// Into:
/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
bool ARMTargetLowering::lowerInterleavedLoad(
LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices, unsigned Factor) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
assert(!Shuffles.empty() && "Empty shufflevector input");
assert(Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices");
VectorType *VecTy = Shuffles[0]->getType();
Type *EltTy = VecTy->getVectorElementType();
const DataLayout &DL = LI->getModule()->getDataLayout();
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
return false;
unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
if (EltTy->isPointerTy())
VecTy =
VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
IRBuilder<> Builder(LI);
// The base address of the load.
Value *BaseAddr = LI->getPointerOperand();
if (NumLoads > 1) {
// If we're going to generate more than one load, reset the sub-vector type
// to something legal.
VecTy = VectorType::get(VecTy->getVectorElementType(),
VecTy->getVectorNumElements() / NumLoads);
// We will compute the pointer operand of each load from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr, VecTy->getVectorElementType()->getPointerTo(
LI->getPointerAddressSpace()));
}
assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
Type *Tys[] = {VecTy, Int8Ptr};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
Intrinsic::arm_neon_vld3,
Intrinsic::arm_neon_vld4};
Function *VldnFunc =
Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
// Holds sub-vectors extracted from the load intrinsic return values. The
// sub-vectors are associated with the shufflevector instructions they will
// replace.
DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
BaseAddr = Builder.CreateConstGEP1_32(
BaseAddr, VecTy->getVectorNumElements() * Factor);
SmallVector<Value *, 2> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
Ops.push_back(Builder.getInt32(LI->getAlignment()));
CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
// Replace uses of each shufflevector with the corresponding vector loaded
// by ldN.
for (unsigned i = 0; i < Shuffles.size(); i++) {
ShuffleVectorInst *SV = Shuffles[i];
unsigned Index = Indices[i];
Value *SubVec = Builder.CreateExtractValue(VldN, Index);
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
SubVec = Builder.CreateIntToPtr(
SubVec, VectorType::get(SV->getType()->getVectorElementType(),
VecTy->getVectorNumElements()));
SubVecs[SV].push_back(SubVec);
}
}
// Replace uses of the shufflevector instructions with the sub-vectors
// returned by the load intrinsic. If a shufflevector instruction is
// associated with more than one sub-vector, those sub-vectors will be
// concatenated into a single wide vector.
for (ShuffleVectorInst *SVI : Shuffles) {
auto &SubVec = SubVecs[SVI];
auto *WideVec =
SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
SVI->replaceAllUsesWith(WideVec);
}
return true;
}
/// \brief Lower an interleaved store into a vstN intrinsic.
///
/// E.g. Lower an interleaved store (Factor = 3):
/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
///
/// Into:
/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
///
/// Note that the new shufflevectors will be removed and we'll only generate one
/// vst3 instruction in CodeGen.
///
/// Example for a more general valid mask (Factor 3). Lower:
/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
///
/// Into:
/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
VectorType *VecTy = SVI->getType();
assert(VecTy->getVectorNumElements() % Factor == 0 &&
"Invalid interleaved store");
unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
Type *EltTy = VecTy->getVectorElementType();
VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
return false;
unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
// StN intrinsics don't support pointer vectors as arguments. Convert pointer
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
Type *IntTy = DL.getIntPtrType(EltTy);
// Convert to the corresponding integer vector.
Type *IntVecTy =
VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
SubVecTy = VectorType::get(IntTy, LaneLen);
}
// The base address of the store.
Value *BaseAddr = SI->getPointerOperand();
if (NumStores > 1) {
// If we're going to generate more than one store, reset the lane length
// and sub-vector type to something legal.
LaneLen /= NumStores;
SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
// We will compute the pointer operand of each store from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
SI->getPointerAddressSpace()));
}
assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
auto Mask = SVI->getShuffleMask();
Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
Type *Tys[] = {Int8Ptr, SubVecTy};
static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
Intrinsic::arm_neon_vst3,
Intrinsic::arm_neon_vst4};
for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
SmallVector<Value *, 6> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
Function *VstNFunc =
Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
// Split the shufflevector operands into sub vectors for the new vstN call.
for (unsigned i = 0; i < Factor; i++) {
unsigned IdxI = StoreCount * LaneLen * Factor + i;
if (Mask[IdxI] >= 0) {
Ops.push_back(Builder.CreateShuffleVector(
Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
} else {
unsigned StartMask = 0;
for (unsigned j = 1; j < LaneLen; j++) {
unsigned IdxJ = StoreCount * LaneLen * Factor + j;
if (Mask[IdxJ * Factor + IdxI] >= 0) {
StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
break;
}
}
// Note: If all elements in a chunk are undefs, StartMask=0!
// Note: Filling undef gaps with random elements is ok, since
// those elements were being written anyway (with undefs).
// In the case of all undefs we're defaulting to using elems from 0
// Note: StartMask cannot be negative, it's checked in
// isReInterleaveMask
Ops.push_back(Builder.CreateShuffleVector(
Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
}
}
Ops.push_back(Builder.getInt32(SI->getAlignment()));
Builder.CreateCall(VstNFunc, Ops);
}
return true;
}
enum HABaseType {
HA_UNKNOWN = 0,
HA_FLOAT,
HA_DOUBLE,
HA_VECT64,
HA_VECT128
};
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
uint64_t &Members) {
if (auto *ST = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0; i < ST->getNumElements(); ++i) {
uint64_t SubMembers = 0;
if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
return false;
Members += SubMembers;
}
} else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
uint64_t SubMembers = 0;
if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
return false;
Members += SubMembers * AT->getNumElements();
} else if (Ty->isFloatTy()) {
if (Base != HA_UNKNOWN && Base != HA_FLOAT)
return false;
Members = 1;
Base = HA_FLOAT;
} else if (Ty->isDoubleTy()) {
if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
return false;
Members = 1;
Base = HA_DOUBLE;
} else if (auto *VT = dyn_cast<VectorType>(Ty)) {
Members = 1;
switch (Base) {
case HA_FLOAT:
case HA_DOUBLE:
return false;
case HA_VECT64:
return VT->getBitWidth() == 64;
case HA_VECT128:
return VT->getBitWidth() == 128;
case HA_UNKNOWN:
switch (VT->getBitWidth()) {
case 64:
Base = HA_VECT64;
return true;
case 128:
Base = HA_VECT128;
return true;
default:
return false;
}
}
}
return (Members > 0 && Members <= 4);
}
/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
/// passing according to AAPCS rules.
bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
if (getEffectiveCallingConv(CallConv, isVarArg) !=
CallingConv::ARM_AAPCS_VFP)
return false;
HABaseType Base = HA_UNKNOWN;
uint64_t Members = 0;
bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
return IsHA || IsIntArray;
}
unsigned ARMTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
// Platforms which do not use SjLj EH may return values in these registers
// via the personality function.
return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
}
unsigned ARMTargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
// Platforms which do not use SjLj EH may return values in these registers
// via the personality function.
return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
}
void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
// Update IsSplitCSR in ARMFunctionInfo.
ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
AFI->setIsSplitCSR(true);
}
void ARMTargetLowering::insertCopiesSplitCSR(
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
if (!IStart)
return;
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
MachineBasicBlock::iterator MBBI = Entry->begin();
for (const MCPhysReg *I = IStart; *I; ++I) {
const TargetRegisterClass *RC = nullptr;
if (ARM::GPRRegClass.contains(*I))
RC = &ARM::GPRRegClass;
else if (ARM::DPRRegClass.contains(*I))
RC = &ARM::DPRRegClass;
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
unsigned NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
assert(Entry->getParent()->getFunction()->hasFnAttribute(
Attribute::NoUnwind) &&
"Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
// Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
TII->get(TargetOpcode::COPY), *I)
.addReg(NewVR);
}
}
void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const {
MF.getFrameInfo().computeMaxCallFrameSize(MF);
TargetLoweringBase::finalizeLowering(MF);
}
Index: head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp (revision 322854)
+++ head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp (revision 322855)
@@ -1,36742 +1,36749 @@
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86CallingConv.h"
#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86IntrinsicsInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86ShuffleDecodeConstantPool.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <bitset>
#include <cctype>
#include <numeric>
using namespace llvm;
#define DEBUG_TYPE "x86-isel"
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool> ExperimentalVectorWideningLegalization(
"x86-experimental-vector-widening-legalization", cl::init(false),
cl::desc("Enable an experimental vector type legalization through widening "
"rather than promotion."),
cl::Hidden);
static cl::opt<int> ExperimentalPrefLoopAlignment(
"x86-experimental-pref-loop-alignment", cl::init(4),
cl::desc("Sets the preferable loop alignment for experiments "
"(the last x86-experimental-pref-loop-alignment bits"
" of the loop header PC will be 0)."),
cl::Hidden);
static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like "
"SHIFT, LEA, etc."),
cl::Hidden);
/// Call this when the user attempts to do something unsupported, like
/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
/// report_fatal_error, so calling code should attempt to recover without
/// crashing.
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
const char *Msg) {
MachineFunction &MF = DAG.getMachineFunction();
DAG.getContext()->diagnose(
DiagnosticInfoUnsupported(*MF.getFunction(), Msg, dl.getDebugLoc()));
}
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
X86ScalarSSEf64 = Subtarget.hasSSE2();
X86ScalarSSEf32 = Subtarget.hasSSE1();
MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
// Set up the TargetLowering object.
// X86 is weird. It always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
// X86-SSE is even stranger. It uses -1 or 0 for vector masks.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// For 64-bit, since we have so many registers, use the ILP scheduler.
// For 32-bit, use the register pressure specific scheduling.
// For Atom, always use ILP scheduling.
if (Subtarget.isAtom())
setSchedulingPreference(Sched::ILP);
else if (Subtarget.is64Bit())
setSchedulingPreference(Sched::ILP);
else
setSchedulingPreference(Sched::RegPressure);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
// Bypass expensive divides and use cheaper ones.
if (TM.getOptLevel() >= CodeGenOpt::Default) {
if (Subtarget.hasSlowDivide32())
addBypassSlowDiv(32, 8);
if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
addBypassSlowDiv(64, 32);
}
if (Subtarget.isTargetKnownWindowsMSVC() ||
Subtarget.isTargetWindowsItanium()) {
// Setup Windows compiler runtime calls.
setLibcallName(RTLIB::SDIV_I64, "_alldiv");
setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
setLibcallName(RTLIB::SREM_I64, "_allrem");
setLibcallName(RTLIB::UREM_I64, "_aullrem");
setLibcallName(RTLIB::MUL_I64, "_allmul");
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
}
if (Subtarget.isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(false);
setUseUnderscoreLongJmp(false);
} else if (Subtarget.isTargetWindowsGNU()) {
// MS runtime is weird: it exports _setjmp, but longjmp!
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(false);
} else {
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
}
// Set up the register classes.
addRegisterClass(MVT::i8, &X86::GR8RegClass);
addRegisterClass(MVT::i16, &X86::GR16RegClass);
addRegisterClass(MVT::i32, &X86::GR32RegClass);
if (Subtarget.is64Bit())
addRegisterClass(MVT::i64, &X86::GR64RegClass);
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// We don't accept any truncstore of integer registers.
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
setTruncStoreAction(MVT::i32, MVT::i16, Expand);
setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// SETOEQ and SETUNE require checking two conditions.
setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
if (Subtarget.is64Bit()) {
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
// f32/f64 are legal, f80 is custom.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
else
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
} else if (!Subtarget.useSoftFloat()) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD or VCVTUSI2SS/SD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
}
// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
if (!Subtarget.useSoftFloat()) {
// SSE has no i16 to fp conversion, only i32.
if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
// f32 and f64 cases are Legal, f80 case is not
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
} else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
}
} else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
}
// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
if (!Subtarget.useSoftFloat()) {
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
if (X86ScalarSSEf32) {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
// f32 and f64 cases are Legal, f80 case is not
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
}
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
}
// Handle FP_TO_UINT by promoting the destination to a larger signed
// conversion.
setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
if (Subtarget.is64Bit()) {
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
// FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
} else {
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
}
} else if (!Subtarget.useSoftFloat()) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
// With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
// With SSE3 we can use fisttpll to convert to a signed i64; without
// SSE, we're stuck with a fistpll.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!X86ScalarSSEf64) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
} else if (!Subtarget.is64Bit())
setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
// the two-result form to trivial CSE, which is able to combine x/y and x%y
// into a single instruction.
//
// Scalar integer multiply-high is also lowered to use two-result
// operations, to match the available instructions. However, plain multiply
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
}
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::BR_CC, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
}
if (Subtarget.is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
// Promote the i8 variants and force them on up to i32 which has a shorter
// encoding.
setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
if (!Subtarget.hasBMI()) {
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
}
}
if (Subtarget.hasLZCNT()) {
// When promoting the i8 variants, force them to i32 for a shorter
// encoding.
setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
} else {
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
}
}
// Special handling for half-precision floating point conversions.
// If we don't have F16C support, then lower half float conversions
// into library calls.
if (Subtarget.useSoftFloat() ||
(!Subtarget.hasF16C() && !Subtarget.hasAVX512())) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
// There's never any support for operations beyond MVT::f32.
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f80, MVT::f16, Expand);
if (Subtarget.hasPOPCNT()) {
setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
} else {
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
if (Subtarget.is64Bit())
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
if (!Subtarget.hasMOVBE())
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
// These should be promoted to a larger select which is supported.
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
// X86 wants to expand cmov itself.
for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
}
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
}
// Custom action for SELECT MMX and expand action for SELECT_CC MMX
setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
// other SjLj exception interfaces are implemented and please don't build
// your own exception handling based on them.
// LLVM/Clang supports zero-cost DWARF exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
// Darwin ABI issue.
for (auto VT : { MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::ConstantPool , VT, Custom);
setOperationAction(ISD::JumpTable , VT, Custom);
setOperationAction(ISD::GlobalAddress , VT, Custom);
setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
setOperationAction(ISD::ExternalSymbol , VT, Custom);
setOperationAction(ISD::BlockAddress , VT, Custom);
}
// 64-bit shl, sra, srl (iff 32-bit x86)
for (auto VT : { MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::SHL_PARTS, VT, Custom);
setOperationAction(ISD::SRA_PARTS, VT, Custom);
setOperationAction(ISD::SRL_PARTS, VT, Custom);
}
if (Subtarget.hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
// Expand certain atomics
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
if (Subtarget.hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
// FIXME - use subtarget debug flags
if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
!Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
bool Is64Bit = Subtarget.is64Bit();
setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
// GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
: &X86::FR32RegClass);
addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
: &X86::FR64RegClass);
for (auto VT : { MVT::f32, MVT::f64 }) {
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS, VT, Custom);
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG, VT, Custom);
// Use ANDPD and ORPD to simulate FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , VT, Expand);
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
// Lower this to MOVMSK plus an AND.
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
addLegalFPImmediate(APFloat(+0.0f)); // xorps
} else if (UseX87 && X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
: &X86::FR32RegClass);
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
setOperationAction(ISD::FABS , MVT::f32, Custom);
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f32, Custom);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
// Use ANDPS and ORPS to simulate FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Special cases we handle for FP constants.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
}
} else if (UseX87) {
// f32 and f64 in x87.
// Set up the FP register classes.
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
addRegisterClass(MVT::f32, &X86::RFP32RegClass);
for (auto VT : { MVT::f32, MVT::f64 }) {
setOperationAction(ISD::UNDEF, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , VT, Expand);
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
}
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
addLegalFPImmediate(APFloat(+0.0f)); // FLD0
addLegalFPImmediate(APFloat(+1.0f)); // FLD1
addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
}
// We don't support FMA.
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Long double always uses X87, except f128 in MMX.
if (UseX87) {
if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
addRegisterClass(MVT::f128, &X86::FR128RegClass);
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
setOperationAction(ISD::FABS , MVT::f128, Custom);
setOperationAction(ISD::FNEG , MVT::f128, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
}
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
addLegalFPImmediate(TmpFlt); // FLD0
TmpFlt.changeSign();
addLegalFPImmediate(TmpFlt); // FLD0/FCHS
bool ignored;
APFloat TmpFlt2(+1.0);
TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
&ignored);
addLegalFPImmediate(TmpFlt2); // FLD1
TmpFlt2.changeSign();
addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
}
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f80, Expand);
setOperationAction(ISD::FCOS , MVT::f80, Expand);
setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
}
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
setOperationAction(ISD::FCEIL, MVT::f80, Expand);
setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
setOperationAction(ISD::FRINT, MVT::f80, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
}
// Always use a library call for pow.
setOperationAction(ISD::FPOW , MVT::f32 , Expand);
setOperationAction(ISD::FPOW , MVT::f64 , Expand);
setOperationAction(ISD::FPOW , MVT::f80 , Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
setOperationAction(ISD::FLOG10, MVT::f80, Expand);
setOperationAction(ISD::FEXP, MVT::f80, Expand);
setOperationAction(ISD::FEXP2, MVT::f80, Expand);
setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
// Some FP actions are always expanded for vector types.
for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
}
// First set operation action for all vector types to either promote
// (for widening) or expand (for scalarization). Then we will selectively
// turn on ones that can be effectively codegen'd.
for (MVT VT : MVT::vector_valuetypes()) {
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::SETCC, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
setOperationAction(ISD::TRUNCATE, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(InnerVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
// N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
// types, we have to deal with them whether we ask for Expansion or not.
// Setting Expand causes its own optimisation problems though, so leave
// them legal.
if (VT.getVectorElementType() == MVT::i1)
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
// EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
// split/scalarized right now.
if (VT.getVectorElementType() == MVT::f16)
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
}
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
// No operations on x86mmx supported, everything uses intrinsics.
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
// FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
// registers cannot be used even for integer operations.
addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
}
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
// We support custom legalizing of sext and anyext loads for specific
// memory vector types which we can load as a scalar (or sequence of
// scalars) and extend in-register to a legal 128-bit vector type. For sext
// loads these must work with a single scalar load.
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
}
for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
if (VT == MVT::v2i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
}
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
// In the customized shift lowering, the legal v4i32/v2i64 cases
// in AVX2 will be recognized.
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
}
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
setOperationAction(ISD::ABS, MVT::v16i8, Legal);
setOperationAction(ISD::ABS, MVT::v8i16, Legal);
setOperationAction(ISD::ABS, MVT::v4i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
setOperationAction(ISD::FCEIL, RoundedTy, Legal);
setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
setOperationAction(ISD::FRINT, RoundedTy, Legal);
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
}
setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
// We directly match byte blends in the backend as they match the VSELECT
// condition form.
setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
// SSE41 brings specific instructions for doing vector sign extend even in
// cases where we don't have SRA.
for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
}
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
}
// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
}
// i8 vectors are custom because the source register and source
// source memory operand types are not the same width.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
setOperationAction(ISD::ROTL, VT, Custom);
// XOP can efficiently perform BITREVERSE with VPPERM.
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
setOperationAction(ISD::BITREVERSE, VT, Custom);
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
setOperationAction(ISD::BITREVERSE, VT, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
bool HasInt256 = Subtarget.hasInt256();
addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
// even though v8i16 is a legal type.
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
// In the customized shift lowering, the legal v8i32/v4i64 cases
// in AVX2 will be recognized.
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
}
setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
}
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
}
if (Subtarget.hasAnyFMA()) {
for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
MVT::v2f64, MVT::v4f64 })
setOperationAction(ISD::FMA, VT, Legal);
}
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
}
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MUL, MVT::v32i8, Custom);
setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
}
if (HasInt256) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
// AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
}
}
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
}
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
// Custom lower several nodes for 256-bit types.
for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
MVT::v8f32, MVT::v4f64 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
if (HasInt256)
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
}
}
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
}
for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16,
MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32,
MVT::v8i64, MVT::v32i16, MVT::v64i8}) {
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom);
setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MaskVT, Custom);
setTruncStoreAction(VT, MaskVT, Custom);
}
for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
if (Subtarget.hasVLX()){
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
} else {
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
}
}
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
if (Subtarget.hasDQI()) {
for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) {
setOperationAction(ISD::SINT_TO_FP, VT, Legal);
setOperationAction(ISD::UINT_TO_FP, VT, Legal);
setOperationAction(ISD::FP_TO_SINT, VT, Legal);
setOperationAction(ISD::FP_TO_UINT, VT, Legal);
}
if (Subtarget.hasVLX()) {
// Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
}
}
if (Subtarget.hasVLX()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
// FIXME. This commands are available on SSE/AVX2, add relevant patterns.
setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
}
setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
}
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
// Without BWI we need to use custom lowering to handle MVT::v64i8 input.
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
setOperationAction(ISD::ABS, MVT::v4i64, Legal);
setOperationAction(ISD::ABS, MVT::v2i64, Legal);
for (auto VT : { MVT::v8i1, MVT::v16i1 }) {
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
}
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
}
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64,
MVT::v8i64}) {
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
}
// Need to promote to 64-bit even though we have 32-bit masked instructions
// because the IR optimizers rearrange bitcasts around logic ops leaving
// too many variations to handle if we don't promote them.
setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
if (Subtarget.hasCDI()) {
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
MVT::v4i64, MVT::v8i64}) {
setOperationAction(ISD::CTLZ, VT, Legal);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
} // Subtarget.hasCDI()
if (Subtarget.hasDQI()) {
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i64, Legal);
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
}
if (Subtarget.hasVPOPCNTDQ()) {
// VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512
// version of popcntd/q.
for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64,
MVT::v4i32, MVT::v2i64})
setOperationAction(ISD::CTPOP, VT, Legal);
}
// Custom lower several nodes.
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
// Extract subvector is special because the value type
// (result) is 256-bit but the source is 512-bit wide.
// 128-bit was made Custom under AVX1.
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
MVT::v8f32, MVT::v4f64, MVT::v1i1 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
MVT::v16i1, MVT::v32i1, MVT::v64i1 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::MGATHER, VT, Legal);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
}
}// has AVX-512
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
setOperationAction(ISD::ADD, MVT::v32i1, Custom);
setOperationAction(ISD::ADD, MVT::v64i1, Custom);
setOperationAction(ISD::SUB, MVT::v32i1, Custom);
setOperationAction(ISD::SUB, MVT::v64i1, Custom);
setOperationAction(ISD::MUL, MVT::v32i1, Custom);
setOperationAction(ISD::MUL, MVT::v64i1, Custom);
setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
setOperationAction(ISD::MUL, MVT::v32i16, Legal);
setOperationAction(ISD::MUL, MVT::v64i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i1, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v32i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v64i1, Custom);
setOperationAction(ISD::VSELECT, MVT::v32i1, Expand);
setOperationAction(ISD::VSELECT, MVT::v64i1, Expand);
setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
if (Subtarget.hasVLX()) {
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom;
for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
setOperationAction(ISD::MLOAD, VT, Action);
setOperationAction(ISD::MSTORE, VT, Action);
}
if (Subtarget.hasCDI()) {
setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
}
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
}
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
if (Subtarget.hasVLX()) {
// FIXME. This commands are available on SSE/AVX2, add relevant patterns.
setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal);
setLoadExtAction(ExtType, MVT::v8i16, MVT::v8i8, Legal);
}
}
}
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
}
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
if (!Subtarget.is64Bit()) {
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
}
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
//
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
// Add/Sub/Mul with overflow operations are custom lowered.
setOperationAction(ISD::SADDO, VT, Custom);
setOperationAction(ISD::UADDO, VT, Custom);
setOperationAction(ISD::SSUBO, VT, Custom);
setOperationAction(ISD::USUBO, VT, Custom);
setOperationAction(ISD::SMULO, VT, Custom);
setOperationAction(ISD::UMULO, VT, Custom);
// Support carry in as value rather than glue.
setOperationAction(ISD::ADDCARRY, VT, Custom);
setOperationAction(ISD::SUBCARRY, VT, Custom);
setOperationAction(ISD::SETCCCARRY, VT, Custom);
}
if (!Subtarget.is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
}
// Combine sin / cos into one node or libcall if possible.
if (Subtarget.hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
if (Subtarget.isTargetDarwin()) {
// For MacOSX, we don't want the normal expansion of a libcall to sincos.
// We want to issue a libcall to __sincos_stret to avoid memory traffic.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
}
}
if (Subtarget.isTargetWin64()) {
setOperationAction(ISD::SDIV, MVT::i128, Custom);
setOperationAction(ISD::UDIV, MVT::i128, Custom);
setOperationAction(ISD::SREM, MVT::i128, Custom);
setOperationAction(ISD::UREM, MVT::i128, Custom);
setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
}
// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
// is. We should promote the value to 64-bits to solve this.
// This is what the CRT headers do - `fmodf` is an inline header
// function casting to f64 and calling `fmod`.
if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
Subtarget.isTargetWindowsItanium()))
for (ISD::NodeType Op :
{ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
ISD::FLOG10, ISD::FPOW, ISD::FSIN})
if (isOperationExpand(Op, MVT::f32))
setOperationAction(Op, MVT::f32, Promote);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FNEG);
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::FMINNUM);
setTargetDAGCombine(ISD::FMAXNUM);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MLOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::MSTORE);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::MSCATTER);
setTargetDAGCombine(ISD::MGATHER);
computeRegisterProperties(Subtarget.getRegisterInfo());
MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = 4;
// TODO: These control memcmp expansion in CGP and could be raised higher, but
// that needs to benchmarked and balanced with the potential use of vector
// load/store types (PR33329, PR33914).
MaxLoadsPerMemcmp = 2;
MaxLoadsPerMemcmpOptSize = 2;
// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
// An out-of-order CPU can speculatively execute past a predictable branch,
// but a conditional move could be stalled by an expensive earlier operation.
PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
EnableExtLdPromotion = true;
setPrefFunctionAlignment(4); // 2^4 bytes.
verifyIntrinsicTables();
}
// This has so far only been implemented for 64-bit MachO.
bool X86TargetLowering::useLoadStackGuardNode() const {
return Subtarget.isTargetMachO() && Subtarget.is64Bit();
}
TargetLoweringBase::LegalizeTypeAction
X86TargetLowering::getPreferredVectorAction(EVT VT) const {
if (ExperimentalVectorWideningLegalization &&
VT.getVectorNumElements() != 1 &&
VT.getVectorElementType().getSimpleVT() != MVT::i1)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext& Context,
EVT VT) const {
if (!VT.isVector())
return MVT::i8;
if (VT.isSimple()) {
MVT VVT = VT.getSimpleVT();
const unsigned NumElts = VVT.getVectorNumElements();
MVT EltVT = VVT.getVectorElementType();
if (VVT.is512BitVector()) {
if (Subtarget.hasAVX512())
if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
EltVT == MVT::f32 || EltVT == MVT::f64)
switch(NumElts) {
case 8: return MVT::v8i1;
case 16: return MVT::v16i1;
}
if (Subtarget.hasBWI())
if (EltVT == MVT::i8 || EltVT == MVT::i16)
switch(NumElts) {
case 32: return MVT::v32i1;
case 64: return MVT::v64i1;
}
}
if (Subtarget.hasBWI() && Subtarget.hasVLX())
return MVT::getVectorVT(MVT::i1, NumElts);
if (!isTypeLegal(VT) && getTypeAction(Context, VT) == TypePromoteInteger) {
EVT LegalVT = getTypeToTransformTo(Context, VT);
EltVT = LegalVT.getVectorElementType().getSimpleVT();
}
if (Subtarget.hasVLX() && EltVT.getSizeInBits() >= 32)
switch(NumElts) {
case 2: return MVT::v2i1;
case 4: return MVT::v4i1;
case 8: return MVT::v8i1;
}
}
return VT.changeVectorElementTypeToInteger();
}
/// Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (MaxAlign == 16)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (VTy->getBitWidth() == 128)
MaxAlign = 16;
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (auto *EltTy : STy->elements()) {
unsigned EltAlign = 0;
getMaxByValAlign(EltTy, EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == 16)
break;
}
}
}
/// Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
if (Subtarget.is64Bit()) {
// Max of 8 and alignment of type.
unsigned TyAlign = DL.getABITypeAlignment(Ty);
if (TyAlign > 8)
return TyAlign;
return 8;
}
unsigned Align = 4;
if (Subtarget.hasSSE1())
getMaxByValAlign(Ty, Align);
return Align;
}
/// Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If 'IsMemset' is
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function *F = MF.getFunction();
if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(!Subtarget.isUnalignedMem16Slow() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) {
// FIXME: Check if unaligned 32-byte accesses are slow.
if (Size >= 32 && Subtarget.hasAVX()) {
// Although this isn't a well-supported type for AVX1, we'll let
// legalization and shuffle lowering produce the optimal codegen. If we
// choose an optimal type with a vector element larger than a byte,
// getMemsetStores() may create an intermediate splat (using an integer
// multiply) before we splat as a vector.
return MVT::v32i8;
}
if (Subtarget.hasSSE2())
return MVT::v16i8;
// TODO: Can SSE1 handle a byte vector?
if (Subtarget.hasSSE1())
return MVT::v4f32;
} else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
!Subtarget.is64Bit() && Subtarget.hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
// Also, do not use f64 to lower memset unless this is a memset of zeros.
// The gymnastics of splatting a byte value into an XMM register and then
// only using 8-byte stores (because this is a CPU with slow unaligned
// 16-byte accesses) makes that a loser.
return MVT::f64;
}
}
// This is a compromise. If we reach here, unaligned accesses may be slow on
// this target. However, creating smaller, aligned accesses could be even
// slower and would certainly be a lot more code.
if (Subtarget.is64Bit() && Size >= 8)
return MVT::i64;
return MVT::i32;
}
bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
if (VT == MVT::f32)
return X86ScalarSSEf32;
else if (VT == MVT::f64)
return X86ScalarSSEf64;
return true;
}
bool
X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned,
unsigned,
bool *Fast) const {
if (Fast) {
switch (VT.getSizeInBits()) {
default:
// 8-byte and under are always assumed to be fast.
*Fast = true;
break;
case 128:
*Fast = !Subtarget.isUnalignedMem16Slow();
break;
case 256:
*Fast = !Subtarget.isUnalignedMem32Slow();
break;
// TODO: What about AVX-512 (512-bit) accesses?
}
}
// Misaligned accesses of any size are always allowed.
return true;
}
/// Return the entry encoding for a jump table in the
/// current function. The returned value is a member of the
/// MachineJumpTableInfo::JTEntryKind enum.
unsigned X86TargetLowering::getJumpTableEncoding() const {
// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
// symbol.
if (isPositionIndependent() && Subtarget.isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
}
bool X86TargetLowering::useSoftFloat() const {
return Subtarget.useSoftFloat();
}
void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
ArgListTy &Args) const {
// Only relabel X86-32 for C / Stdcall CCs.
if (Subtarget.is64Bit())
return;
if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
return;
unsigned ParamRegs = 0;
if (auto *M = MF->getFunction()->getParent())
ParamRegs = M->getNumberRegisterParameters();
// Mark the first N int arguments as having reg
for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
Type *T = Args[Idx].Ty;
if (T->isPointerTy() || T->isIntegerTy())
if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
unsigned numRegs = 1;
if (MF->getDataLayout().getTypeAllocSize(T) > 4)
numRegs = 2;
if (ParamRegs < numRegs)
return;
ParamRegs -= numRegs;
Args[Idx].IsInReg = true;
}
}
}
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned uid,MCContext &Ctx) const{
assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
return MCSymbolRefExpr::create(MBB->getSymbol(),
MCSymbolRefExpr::VK_GOTOFF, Ctx);
}
/// Returns relocation base for the given PIC jumptable.
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
if (!Subtarget.is64Bit())
// This doesn't have SDLoc associated with it, but is not really the
// same as a Register.
return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
getPointerTy(DAG.getDataLayout()));
return Table;
}
/// This returns the relocation base for the given PIC jumptable,
/// the same as getPICJumpTableRelocBase, but as an MCExpr.
const MCExpr *X86TargetLowering::
getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
MCContext &Ctx) const {
// X86-64 uses RIP relative addressing based on the jump table label.
if (Subtarget.isPICStyleRIPRel())
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
// Otherwise, the reference is relative to the PIC base.
return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
}
std::pair<const TargetRegisterClass *, uint8_t>
X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
const TargetRegisterClass *RRC = nullptr;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(TRI, VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
break;
case MVT::x86mmx:
RRC = &X86::VR64RegClass;
break;
case MVT::f32: case MVT::f64:
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
case MVT::v8f32: case MVT::v4f64:
case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
case MVT::v16f32: case MVT::v8f64:
RRC = &X86::VR128XRegClass;
break;
}
return std::make_pair(RRC, Cost);
}
unsigned X86TargetLowering::getAddressSpace() const {
if (Subtarget.is64Bit())
return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
return 256;
}
static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
(TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
}
static Constant* SegmentOffset(IRBuilder<> &IRB,
unsigned Offset, unsigned AddressSpace) {
return ConstantExpr::getIntToPtr(
ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
}
Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
// glibc, bionic, and Fuchsia have a special slot for the stack guard in
// tcbhead_t; use it instead of the usual global variable (see
// sysdeps/{i386,x86_64}/nptl/tls.h)
if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
if (Subtarget.isTargetFuchsia()) {
// <magenta/tls.h> defines MX_TLS_STACK_GUARD_OFFSET with this value.
return SegmentOffset(IRB, 0x10, getAddressSpace());
} else {
// %fs:0x28, unless we're using a Kernel code model, in which case
// it's %gs:0x28. gs:0x14 on i386.
unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
return SegmentOffset(IRB, Offset, getAddressSpace());
}
}
return TargetLowering::getIRStackGuard(IRB);
}
void X86TargetLowering::insertSSPDeclarations(Module &M) const {
// MSVC CRT provides functionalities for stack protection.
if (Subtarget.getTargetTriple().isOSMSVCRT()) {
// MSVC CRT has a global variable holding security cookie.
M.getOrInsertGlobal("__security_cookie",
Type::getInt8PtrTy(M.getContext()));
// MSVC CRT has a function to validate security cookie.
auto *SecurityCheckCookie = cast<Function>(
M.getOrInsertFunction("__security_check_cookie",
Type::getVoidTy(M.getContext()),
Type::getInt8PtrTy(M.getContext())));
SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
return;
}
// glibc, bionic, and Fuchsia have a special slot for the stack guard.
if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
return;
TargetLowering::insertSSPDeclarations(M);
}
Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
// MSVC CRT has a global variable holding security cookie.
if (Subtarget.getTargetTriple().isOSMSVCRT())
return M.getGlobalVariable("__security_cookie");
return TargetLowering::getSDagStackGuard(M);
}
Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
// MSVC CRT has a function to validate security cookie.
if (Subtarget.getTargetTriple().isOSMSVCRT())
return M.getFunction("__security_check_cookie");
return TargetLowering::getSSPStackGuardCheck(M);
}
Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
if (Subtarget.getTargetTriple().isOSContiki())
return getDefaultSafeStackPointerLocation(IRB, false);
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
if (Subtarget.isTargetAndroid()) {
// %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
// %gs:0x24 on i386
unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
return SegmentOffset(IRB, Offset, getAddressSpace());
}
// Fuchsia is similar.
if (Subtarget.isTargetFuchsia()) {
// <magenta/tls.h> defines MX_TLS_UNSAFE_SP_OFFSET with this value.
return SegmentOffset(IRB, 0x18, getAddressSpace());
}
return TargetLowering::getSafeStackPointerLocation(IRB);
}
bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
assert(SrcAS != DestAS && "Expected different address spaces!");
return SrcAS < 256 && DestAS < 256;
}
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
#include "X86GenCallingConv.inc"
bool X86TargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
return ScratchRegs;
}
/// Lowers masks values (v*i1) to the local register values
/// \returns DAG node after lowering to register type
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
const SDLoc &Dl, SelectionDAG &DAG) {
EVT ValVT = ValArg.getValueType();
if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
// Two stage lowering might be required
// bitcast: v8i1 -> i8 / v16i1 -> i16
// anyextend: i8 -> i32 / i16 -> i32
EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
if (ValLoc == MVT::i32)
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
return ValToCopy;
} else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
(ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
// One stage lowering is required
// bitcast: v32i1 -> i32 / v64i1 -> i64
return DAG.getBitcast(ValLoc, ValArg);
} else
return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
}
/// Breaks v64i1 value into two registers and adds the new node to the DAG
static void Passv64i1ArgInRegs(
const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
CCValAssign &NextVA, const X86Subtarget &Subtarget) {
assert((Subtarget.hasBWI() || Subtarget.hasBMI()) &&
"Expected AVX512BW or AVX512BMI target!");
assert(Subtarget.is32Bit() && "Expecting 32 bit target");
assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
assert(VA.isRegLoc() && NextVA.isRegLoc() &&
"The value should reside in two registers");
// Before splitting the value we cast it to i64
Arg = DAG.getBitcast(MVT::i64, Arg);
// Splitting the value into two i32 types
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
DAG.getConstant(0, Dl, MVT::i32));
Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
DAG.getConstant(1, Dl, MVT::i32));
// Attach the two i32 types into corresponding registers
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
}
SDValue
X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
// In some cases we need to disable registers from the default CSR list.
// For example, when they are used for argument passing.
bool ShouldDisableCalleeSavedRegister =
CallConv == CallingConv::X86_RegCall ||
MF.getFunction()->hasFnAttribute("no_caller_saved_registers");
if (CallConv == CallingConv::X86_INTR && !Outs.empty())
report_fatal_error("X86 interrupts may not return any value");
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
SDValue Flag;
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
MVT::i32));
// Copy the result values into the output registers.
for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
++I, ++OutsIndex) {
CCValAssign &VA = RVLocs[I];
assert(VA.isRegLoc() && "Can only return in registers!");
// Add the register to the CalleeSaveDisableRegs list.
if (ShouldDisableCalleeSavedRegister)
MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
SDValue ValToCopy = OutVals[OutsIndex];
EVT ValVT = ValToCopy.getValueType();
// Promote values to the appropriate types.
if (VA.getLocInfo() == CCValAssign::SExt)
ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::ZExt)
ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::AExt) {
if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
else
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
}
else if (VA.getLocInfo() == CCValAssign::BCvt)
ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
assert(VA.getLocInfo() != CCValAssign::FPExt &&
"Unexpected FP-extend for return value.");
// If this is x86-64, and we disabled SSE, we can't return FP values,
// or SSE or MMX vectors.
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
(Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
} else if (ValVT == MVT::f64 &&
(Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
// llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now.
errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
}
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
if (VA.getLocReg() == X86::FP0 ||
VA.getLocReg() == X86::FP1) {
// If this is a copy from an xmm register to ST(0), use an FPExtend to
// change the value to the FP stack register class.
if (isScalarFPTypeInSSEReg(VA.getValVT()))
ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
RetOps.push_back(ValToCopy);
// Don't emit a copytoreg.
continue;
}
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget.is64Bit()) {
if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
ValToCopy);
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
if (!Subtarget.hasSSE2())
ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
}
}
}
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
if (VA.needsCustom()) {
assert(VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs");
Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
Subtarget);
assert(2 == RegsToPass.size() &&
"Expecting two registers after Pass64BitArgInRegs");
// Add the second register to the CalleeSaveDisableRegs list.
if (ShouldDisableCalleeSavedRegister)
MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
} else {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
}
// Add nodes to the DAG and add the values into the RetOps list
for (auto &Reg : RegsToPass) {
Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
}
}
// Swift calling convention does not require we copy the sret argument
// into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
// All x86 ABIs require that for returning structs by value we copy
// the sret argument into %rax/%eax (depending on ABI) for the return.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into %rax/%eax.
//
// Checking Function.hasStructRetAttr() here is insufficient because the IR
// may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
// false, then an sret argument may be implicitly inserted in the SelDAG. In
// either case FuncInfo->setSRetReturnReg() will have been called.
if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
// When we have both sret and another return value, we should use the
// original Chain stored in RetOps[0], instead of the current Chain updated
// in the above loop. If we only have sret, RetOps[0] equals to Chain.
// For the case of sret and another return value, we have
// Chain_0 at the function entry
// Chain_1 = getCopyToReg(Chain_0) in the above loop
// If we use Chain_1 in getCopyFromReg, we will have
// Val = getCopyFromReg(Chain_1)
// Chain_2 = getCopyToReg(Chain_1, Val) from below
// getCopyToReg(Chain_0) will be glued together with
// getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
// in Unit B, and we will have cyclic dependency between Unit A and Unit B:
// Data dependency from Unit B to Unit A due to usage of Val in
// getCopyToReg(Chain_1, Val)
// Chain dependency from Unit A to Unit B
// So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
getPointerTy(MF.getDataLayout()));
unsigned RetValReg
= (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
X86::RAX : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
// RAX/EAX now acts like a return value.
RetOps.push_back(
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
// Add the returned register to the CalleeSaveDisableRegs list.
if (ShouldDisableCalleeSavedRegister)
MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
}
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
const MCPhysReg *I =
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
if (I) {
for (; *I; ++I) {
if (X86::GR64RegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::i64));
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
}
}
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
if (Flag.getNode())
RetOps.push_back(Flag);
X86ISD::NodeType opcode = X86ISD::RET_FLAG;
if (CallConv == CallingConv::X86_INTR)
opcode = X86ISD::IRET;
return DAG.getNode(opcode, dl, MVT::Other, RetOps);
}
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
return false;
SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
return false;
TCChain = Copy->getOperand(0);
} else if (Copy->getOpcode() != ISD::FP_EXTEND)
return false;
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != X86ISD::RET_FLAG)
return false;
// If we are returning more than one value, we can definitely
// not make a tail call see PR19530
if (UI->getNumOperands() > 4)
return false;
if (UI->getNumOperands() == 4 &&
UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
return false;
HasRet = true;
}
if (!HasRet)
return false;
Chain = TCChain;
return true;
}
EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT = MVT::i32;
bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
// The ABI does not require i1, i8 or i16 to be extended.
//
// On Darwin, there is code in the wild relying on Clang's old behaviour of
// always extending i8/i16 return values, so keep doing that for now.
// (PR26665).
ReturnMVT = MVT::i8;
}
EVT MinVT = getRegisterType(Context, ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;
}
/// Reads two 32 bit registers and creates a 64 bit mask value.
/// \param VA The current 32 bit value that need to be assigned.
/// \param NextVA The next 32 bit value that need to be assigned.
/// \param Root The parent DAG node.
/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
/// glue purposes. In the case the DAG is already using
/// physical register instead of virtual, we should glue
/// our new SDValue to InFlag SDvalue.
/// \return a new SDvalue of size 64bit.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
const SDLoc &Dl, const X86Subtarget &Subtarget,
SDValue *InFlag = nullptr) {
assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
assert(Subtarget.is32Bit() && "Expecting 32 bit target");
assert(VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type");
assert(NextVA.getValVT() == VA.getValVT() &&
"The locations should have the same type");
assert(VA.isRegLoc() && NextVA.isRegLoc() &&
"The values should reside in two registers");
SDValue Lo, Hi;
unsigned Reg;
SDValue ArgValueLo, ArgValueHi;
MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterClass *RC = &X86::GR32RegClass;
// Read a 32 bit value from the registers
if (nullptr == InFlag) {
// When no physical register is present,
// create an intermediate virtual register
Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
} else {
// When a physical register is available read the value from it and glue
// the reads together.
ArgValueLo =
DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
*InFlag = ArgValueLo.getValue(2);
ArgValueHi =
DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
*InFlag = ArgValueHi.getValue(2);
}
// Convert the i32 type into v32i1 type
Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
// Convert the i32 type into v32i1 type
Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
// Concatenate the two values together
return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
}
/// The function will lower a register of various sizes (8/16/32/64)
/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
/// \returns a DAG node contains the operand after lowering to mask type.
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
const EVT &ValLoc, const SDLoc &Dl,
SelectionDAG &DAG) {
SDValue ValReturned = ValArg;
if (ValVT == MVT::v1i1)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
if (ValVT == MVT::v64i1) {
// In 32 bit machine, this case is handled by getv64i1Argument
assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
// In 64 bit machine, There is no need to truncate the value only bitcast
} else {
MVT maskLen;
switch (ValVT.getSimpleVT().SimpleTy) {
case MVT::v8i1:
maskLen = MVT::i8;
break;
case MVT::v16i1:
maskLen = MVT::i16;
break;
case MVT::v32i1:
maskLen = MVT::i32;
break;
default:
llvm_unreachable("Expecting a vector of i1 types");
}
ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
}
return DAG.getBitcast(ValVT, ValReturned);
}
/// Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
SDValue X86TargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
uint32_t *RegMask) const {
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget.is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
++I, ++InsIndex) {
CCValAssign &VA = RVLocs[I];
EVT CopyVT = VA.getLocVT();
// In some calling conventions we need to remove the used registers
// from the register mask.
if (RegMask) {
for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
}
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
}
// If we prefer to use the value in xmm registers, copy it out as f80 and
// use a truncate to move it from fp stack reg to xmm reg.
bool RoundAfterCopy = false;
if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
isScalarFPTypeInSSEReg(VA.getValVT())) {
if (!Subtarget.hasX87())
report_fatal_error("X87 register return with X87 disabled");
CopyVT = MVT::f80;
RoundAfterCopy = (CopyVT != VA.getLocVT());
}
SDValue Val;
if (VA.needsCustom()) {
assert(VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs");
Val =
getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
.getValue(1);
Val = Chain.getValue(0);
InFlag = Chain.getValue(2);
}
if (RoundAfterCopy)
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
// This truncation won't change the value.
DAG.getIntPtrConstant(1, dl));
if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
if (VA.getValVT().isVector() &&
((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
(VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
// promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
} else
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
}
InVals.push_back(Val);
}
return Chain;
}
//===----------------------------------------------------------------------===//
// C & StdCall & Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
// StdCall calling convention seems to be standard for many Windows' API
// routines and around. It differs from C calling convention just a little:
// callee should clean up the stack, not caller. Symbols should be also
// decorated in some fancy way :) It doesn't support any vector arguments.
// For info on fast calling convention see Fast Calling Convention (tail call)
// implementation LowerX86_32FastCCCallTo.
/// CallIsStructReturn - Determines whether a call uses struct return
/// semantics.
enum StructReturnType {
NotStructReturn,
RegStructReturn,
StackStructReturn
};
static StructReturnType
callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
if (Outs.empty())
return NotStructReturn;
const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
if (Flags.isInReg() || IsMCU)
return RegStructReturn;
return StackStructReturn;
}
/// Determines whether a function uses struct return semantics.
static StructReturnType
argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
if (Ins.empty())
return NotStructReturn;
const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
if (Flags.isInReg() || IsMCU)
return RegStructReturn;
return StackStructReturn;
}
/// Make a copy of an aggregate at address specified by "Src" to address
/// "Dst" with size and alignment information specified by the specific
/// parameter attribute. The copy will be passed as a byval function parameter.
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
SDValue Chain, ISD::ArgFlagsTy Flags,
SelectionDAG &DAG, const SDLoc &dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile*/false, /*AlwaysInline=*/true,
/*isTailCall*/false,
MachinePointerInfo(), MachinePointerInfo());
}
/// Return true if the calling convention is one that we can guarantee TCO for.
static bool canGuaranteeTCO(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
CC == CallingConv::HHVM);
}
/// Return true if we might ever do TCO for calls with this calling convention.
static bool mayTailCallThisCC(CallingConv::ID CC) {
switch (CC) {
// C calling conventions:
case CallingConv::C:
case CallingConv::Win64:
case CallingConv::X86_64_SysV:
// Callee pop conventions:
case CallingConv::X86_ThisCall:
case CallingConv::X86_StdCall:
case CallingConv::X86_VectorCall:
case CallingConv::X86_FastCall:
return true;
default:
return canGuaranteeTCO(CC);
}
}
/// Return true if the function is being made into a tailcall target by
/// changing its ABI.
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
auto Attr =
CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
if (!CI->isTailCall() || Attr.getValueAsString() == "true")
return false;
ImmutableCallSite CS(CI);
CallingConv::ID CalleeCC = CS.getCallingConv();
if (!mayTailCallThisCC(CalleeCC))
return false;
return true;
}
SDValue
X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo &MFI, unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
bool AlwaysUseMutable = shouldGuaranteeTCO(
CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
MVT PtrVT = getPointerTy(DAG.getDataLayout());
// If value is passed by pointer we have address passed instead of the value
// itself. No need to extend if the mask value and location share the same
// absolute size.
bool ExtendedInMem =
VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
ValVT = VA.getLocVT();
else
ValVT = VA.getValVT();
// Calculate SP offset of interrupt parameter, re-arrange the slot normally
// taken by a return address.
int Offset = 0;
if (CallConv == CallingConv::X86_INTR) {
// X86 interrupts may take one or two arguments.
// On the stack there will be no return address as in regular call.
// Offset of last argument need to be set to -4/-8 bytes.
// Where offset of the first argument out of two, should be set to 0 bytes.
Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
if (Subtarget.is64Bit() && Ins.size() == 2) {
// The stack pointer needs to be realigned for 64 bit handlers with error
// code, so the argument offset changes by 8 bytes.
Offset += 8;
}
}
// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
if (Flags.isByVal()) {
unsigned Bytes = Flags.getByValSize();
if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
// Adjust SP offset of interrupt parameter.
if (CallConv == CallingConv::X86_INTR) {
MFI.setObjectOffset(FI, Offset);
}
return DAG.getFrameIndex(FI, PtrVT);
}
// This is an argument in memory. We might be able to perform copy elision.
if (Flags.isCopyElisionCandidate()) {
EVT ArgVT = Ins[i].ArgVT;
SDValue PartAddr;
if (Ins[i].PartOffset == 0) {
// If this is a one-part value or the first part of a multi-part value,
// create a stack object for the entire argument value type and return a
// load from our portion of it. This assumes that if the first part of an
// argument is in memory, the rest will also be in memory.
int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
/*Immutable=*/false);
PartAddr = DAG.getFrameIndex(FI, PtrVT);
return DAG.getLoad(
ValVT, dl, Chain, PartAddr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
} else {
// This is not the first piece of an argument in memory. See if there is
// already a fixed stack object including this offset. If so, assume it
// was created by the PartOffset == 0 branch above and create a load from
// the appropriate offset into it.
int64_t PartBegin = VA.getLocMemOffset();
int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
int FI = MFI.getObjectIndexBegin();
for (; MFI.isFixedObjectIndex(FI); ++FI) {
int64_t ObjBegin = MFI.getObjectOffset(FI);
int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
break;
}
if (MFI.isFixedObjectIndex(FI)) {
SDValue Addr =
DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
return DAG.getLoad(
ValVT, dl, Chain, Addr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
Ins[i].PartOffset));
}
}
}
int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
VA.getLocMemOffset(), isImmutable);
// Set SExt or ZExt flag.
if (VA.getLocInfo() == CCValAssign::ZExt) {
MFI.setObjectZExt(FI, true);
} else if (VA.getLocInfo() == CCValAssign::SExt) {
MFI.setObjectSExt(FI, true);
}
// Adjust SP offset of interrupt parameter.
if (CallConv == CallingConv::X86_INTR) {
MFI.setObjectOffset(FI, Offset);
}
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getLoad(
ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
return ExtendedInMem
? (VA.getValVT().isVector()
? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
: DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
: Val;
}
// FIXME: Get this from tablegen.
static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
const X86Subtarget &Subtarget) {
assert(Subtarget.is64Bit());
if (Subtarget.isCallingConvWin64(CallConv)) {
static const MCPhysReg GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
}
static const MCPhysReg GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
}
// FIXME: Get this from tablegen.
static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
CallingConv::ID CallConv,
const X86Subtarget &Subtarget) {
assert(Subtarget.is64Bit());
if (Subtarget.isCallingConvWin64(CallConv)) {
// The XMM registers which might contain var arg parameters are shadowed
// in their paired GPR. So we only need to save the GPR to their home
// slots.
// TODO: __vectorcall will change this.
return None;
}
const Function *Fn = MF.getFunction();
bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
bool isSoftFloat = Subtarget.useSoftFloat();
assert(!(isSoftFloat && NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
// Kernel mode asks for SSE to be disabled, so there are no XMM argument
// registers.
return None;
static const MCPhysReg XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
}
#ifndef NDEBUG
static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
[](const CCValAssign &A, const CCValAssign &B) -> bool {
return A.getValNo() < B.getValNo();
});
}
#endif
SDValue X86TargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
const Function *Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
Subtarget.isTargetCygMing() &&
Fn->getName() == "main")
FuncInfo->setForceFramePointer(true);
MachineFrameInfo &MFI = MF.getFrameInfo();
bool Is64Bit = Subtarget.is64Bit();
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
assert(
!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
if (CallConv == CallingConv::X86_INTR) {
bool isLegal = Ins.size() == 1 ||
(Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
(!Is64Bit && Ins[1].VT == MVT::i32)));
if (!isLegal)
report_fatal_error("X86 interrupts may take one or two arguments");
}
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64.
if (IsWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeArguments(Ins, CC_X86);
// In vectorcall calling convention a second pass is required for the HVA
// types.
if (CallingConv::X86_VectorCall == CallConv) {
CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
}
// The next loop assumes that the locations are in the same order of the
// input arguments.
assert(isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering");
SDValue ArgValue;
for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
++I, ++InsIndex) {
assert(InsIndex < Ins.size() && "Invalid Ins index");
CCValAssign &VA = ArgLocs[I];
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
if (VA.needsCustom()) {
assert(
VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs");
// v64i1 values, in regcall calling convention, that are
// compiled to 32 bit arch, are split up into two registers.
ArgValue =
getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
} else {
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = &X86::GR32RegClass;
else if (Is64Bit && RegVT == MVT::i64)
RC = &X86::GR64RegClass;
else if (RegVT == MVT::f32)
RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
else if (RegVT == MVT::f64)
RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
else if (RegVT == MVT::f80)
RC = &X86::RFP80RegClass;
else if (RegVT == MVT::f128)
RC = &X86::FR128RegClass;
else if (RegVT.is512BitVector())
RC = &X86::VR512RegClass;
else if (RegVT.is256BitVector())
RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
else if (RegVT.is128BitVector())
RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
else if (RegVT == MVT::x86mmx)
RC = &X86::VR64RegClass;
else if (RegVT == MVT::v1i1)
RC = &X86::VK1RegClass;
else if (RegVT == MVT::v8i1)
RC = &X86::VK8RegClass;
else if (RegVT == MVT::v16i1)
RC = &X86::VK16RegClass;
else if (RegVT == MVT::v32i1)
RC = &X86::VK32RegClass;
else if (RegVT == MVT::v64i1)
RC = &X86::VK64RegClass;
else
llvm_unreachable("Unknown argument type!");
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
}
// If this is an 8 or 16-bit value, it is really passed promoted to 32
// bits. Insert an assert[sz]ext to capture this, then truncate to the
// right size.
if (VA.getLocInfo() == CCValAssign::SExt)
ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::ZExt)
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::BCvt)
ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
else if (VA.getValVT().isVector() &&
VA.getValVT().getScalarType() == MVT::i1 &&
((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
(VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
// Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
} else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
}
} else {
assert(VA.isMemLoc());
ArgValue =
LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
}
// If value is passed via pointer - do a load.
if (VA.getLocInfo() == CCValAssign::Indirect)
ArgValue =
DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
InVals.push_back(ArgValue);
}
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
// Swift calling convention does not require we copy the sret argument
// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
if (CallConv == CallingConv::Swift)
continue;
// All x86 ABIs require that for returning structs by value we copy the
// sret argument into %rax/%eax (depending on ABI) for the return. Save
// the argument into a virtual register so that we can access it from the
// return points.
if (Ins[I].Flags.isSRet()) {
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
MVT PtrTy = getPointerTy(DAG.getDataLayout());
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
break;
}
}
unsigned StackSize = CCInfo.getNextStackOffset();
// Align stack specially for tail calls.
if (shouldGuaranteeTCO(CallConv,
MF.getTarget().Options.GuaranteedTailCallOpt))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start. We
// can skip this if there are no va_start calls.
if (MFI.hasVAStart() &&
(Is64Bit || (CallConv != CallingConv::X86_FastCall &&
CallConv != CallingConv::X86_ThisCall))) {
FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
}
// Figure out if XMM registers are in use.
assert(!(Subtarget.useSoftFloat() &&
Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
"SSE register cannot be used when SSE is disabled!");
// 64-bit calling conventions support varargs and register parameters, so we
// have to do extra work to spill them in the prologue.
if (Is64Bit && isVarArg && MFI.hasVAStart()) {
// Find the first unallocated argument registers.
ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
// Gather all the live in physical registers.
SmallVector<SDValue, 6> LiveGPRs;
SmallVector<SDValue, 8> LiveXMMRegs;
SDValue ALVal;
for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
LiveGPRs.push_back(
DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
}
if (!ArgXMMs.empty()) {
unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
LiveXMMRegs.push_back(
DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
}
}
if (IsWin64) {
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
FuncInfo->setRegSaveFrameIndex(
MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
// Fixup to set vararg frame on shadow area (4 x i64).
if (NumIntRegs < 4)
FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
} else {
// For X86-64, if there are vararg parameters that are passed via
// registers, then we must store them to their spots on the stack so
// they may be loaded by dereferencing the result of va_next.
FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
}
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy(DAG.getDataLayout()));
unsigned Offset = FuncInfo->getVarArgsGPOffset();
for (SDValue Val : LiveGPRs) {
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
RSFIN, DAG.getIntPtrConstant(Offset, dl));
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo::getFixedStack(
DAG.getMachineFunction(),
FuncInfo->getRegSaveFrameIndex(), Offset));
MemOps.push_back(Store);
Offset += 8;
}
if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
// Now store the XMM (fp + vector) parameter registers.
SmallVector<SDValue, 12> SaveXMMOps;
SaveXMMOps.push_back(Chain);
SaveXMMOps.push_back(ALVal);
SaveXMMOps.push_back(DAG.getIntPtrConstant(
FuncInfo->getRegSaveFrameIndex(), dl));
SaveXMMOps.push_back(DAG.getIntPtrConstant(
FuncInfo->getVarArgsFPOffset(), dl));
SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
LiveXMMRegs.end());
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
MVT::Other, SaveXMMOps));
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
}
if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
// Find the largest legal vector type.
MVT VecVT = MVT::Other;
// FIXME: Only some x86_32 calling conventions support AVX512.
if (Subtarget.hasAVX512() &&
(Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
CallConv == CallingConv::Intel_OCL_BI)))
VecVT = MVT::v16f32;
else if (Subtarget.hasAVX())
VecVT = MVT::v8f32;
else if (Subtarget.hasSSE2())
VecVT = MVT::v4f32;
// We forward some GPRs and some vector types.
SmallVector<MVT, 2> RegParmTypes;
MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
RegParmTypes.push_back(IntVT);
if (VecVT != MVT::Other)
RegParmTypes.push_back(VecVT);
// Compute the set of forwarded registers. The rest are scratch.
SmallVectorImpl<ForwardedRegister> &Forwards =
FuncInfo->getForwardedMustTailRegParms();
CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
// Conservatively forward AL on x86_64, since it might be used for varargs.
if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
}
// Copy all forwards from physical to virtual registers.
for (ForwardedRegister &F : Forwards) {
// FIXME: Can we use a less constrained schedule?
SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
}
}
// Some CCs need callee pop.
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
// X86 interrupts must pop the error code (and the alignment padding) if
// present.
FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
!Subtarget.getTargetTriple().isOSMSVCRT() &&
argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}
if (!Is64Bit) {
// RegSaveFrameIndex is X86-64 only.
FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
if (CallConv == CallingConv::X86_FastCall ||
CallConv == CallingConv::X86_ThisCall)
// fastcc functions can't have varargs.
FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
}
FuncInfo->setArgumentStackSize(StackSize);
if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
if (Personality == EHPersonality::CoreCLR) {
assert(Is64Bit);
// TODO: Add a mechanism to frame lowering that will allow us to indicate
// that we'd prefer this slot be allocated towards the bottom of the frame
// (i.e. near the stack pointer after allocating the frame). Every
// funclet needs a copy of this slot in its (mostly empty) frame, and the
// offset from the bottom of this and each funclet's frame must be the
// same, so the size of funclets' (mostly empty) frames is dictated by
// how far this slot is from the bottom (since they allocate just enough
// space to accommodate holding this slot at the correct offset).
int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
EHInfo->PSPSymFrameIdx = PSPSymFI;
}
}
if (CallConv == CallingConv::X86_RegCall ||
Fn->hasFnAttribute("no_caller_saved_registers")) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end()))
MF.getRegInfo().disableCalleeSavedRegister(Pair.first);
}
return Chain;
}
SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
SDValue Arg, const SDLoc &dl,
SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, PtrOff);
if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
return DAG.getStore(
Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
}
/// Emit a load of return address if tail call
/// optimization is performed and it is required.
SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
bool Is64Bit, int FPDiff, const SDLoc &dl) const {
// Adjust the Return address stack slot.
EVT VT = getPointerTy(DAG.getDataLayout());
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
return SDValue(OutRetAddr.getNode(), 1);
}
/// Emit a store of the return address if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
SDValue Chain, SDValue RetAddrFrIdx,
EVT PtrVT, unsigned SlotSize,
int FPDiff, const SDLoc &dl) {
// Store the return address to the appropriate stack slot.
if (!FPDiff) return Chain;
// Calculate the new stack slot for the return address.
int NewReturnAddrFI =
MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
false);
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
MachinePointerInfo::getFixedStack(
DAG.getMachineFunction(), NewReturnAddrFI));
return Chain;
}
/// Returns a vector_shuffle mask for an movs{s|d}, movd
/// operation of specified width.
static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
Mask.push_back(NumElems);
for (unsigned i = 1; i != NumElems; ++i)
Mask.push_back(i);
return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
}
SDValue
X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &dl = CLI.DL;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
CallingConv::ID CallConv = CLI.CallConv;
bool &isTailCall = CLI.IsTailCall;
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget.is64Bit();
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
bool IsSibcall = false;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
const CallInst *CI =
CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
(Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
if (CallConv == CallingConv::X86_INTR)
report_fatal_error("X86 interrupts may not be called directly");
if (Attr.getValueAsString() == "true")
isTailCall = false;
if (Subtarget.isPICStyleGOT() &&
!MF.getTarget().Options.GuaranteedTailCallOpt) {
// If we are using a GOT, disable tail calls to external symbols with
// default visibility. Tail calling such a symbol requires using a GOT
// relocation, which forces early binding of the symbol. This breaks code
// that require lazy function symbol resolution. Using musttail or
// GuaranteedTailCallOpt will override this.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
if (!G || (!G->getGlobal()->hasLocalLinkage() &&
G->getGlobal()->hasDefaultVisibility()))
isTailCall = false;
}
bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
if (IsMustTail) {
// Force this to be a tail call. The verifier rules are enough to ensure
// that we can lower this successfully without moving the return address
// around.
isTailCall = true;
} else if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, SR != NotStructReturn,
MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
IsSibcall = true;
if (isTailCall)
++NumTailCalls;
}
assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64.
if (IsWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeArguments(Outs, CC_X86);
// In vectorcall calling convention a second pass is required for the HVA
// types.
if (CallingConv::X86_VectorCall == CallConv) {
CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
}
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
if (IsSibcall)
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
canGuaranteeTCO(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
if (isTailCall && !IsSibcall && !IsMustTail) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
FPDiff = NumBytesCallerPushed - NumBytes;
// Set the delta of movement of the returnaddr stackslot.
// But only set if delta is greater than previous delta.
if (FPDiff < X86Info->getTCReturnAddrDelta())
X86Info->setTCReturnAddrDelta(FPDiff);
}
unsigned NumBytesToPush = NumBytes;
unsigned NumBytesToPop = NumBytes;
// If we have an inalloca argument, all stack space has already been allocated
// for us and be right at the top of the stack. We don't support multiple
// arguments passed in memory when using inalloca.
if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
NumBytesToPush = 0;
if (!ArgLocs.back().isMemLoc())
report_fatal_error("cannot use inalloca attribute on a register "
"parameter");
if (ArgLocs.back().getLocMemOffset() != 0)
report_fatal_error("any parameter with the inalloca attribute must be "
"the only memory argument");
}
if (!IsSibcall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
NumBytes - NumBytesToPush, dl);
SDValue RetAddrFrIdx;
// Load return address for tail calls.
if (isTailCall && FPDiff)
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
Is64Bit, FPDiff, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
// The next loop assumes that the locations are in the same order of the
// input arguments.
assert(isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering");
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
++I, ++OutIndex) {
assert(OutIndex < Outs.size() && "Invalid Out index");
// Skip inalloca arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
if (Flags.isInAlloca())
continue;
CCValAssign &VA = ArgLocs[I];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[OutIndex];
bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::AExt:
if (Arg.getValueType().isVector() &&
Arg.getValueType().getVectorElementType() == MVT::i1)
Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
else if (RegVT.is128BitVector()) {
// Special case: passing MMX values in XMM registers.
Arg = DAG.getBitcast(MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::BCvt:
Arg = DAG.getBitcast(RegVT, Arg);
break;
case CCValAssign::Indirect: {
// Store the argument.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Chain = DAG.getStore(
Chain, dl, Arg, SpillSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
Arg = SpillSlot;
break;
}
}
if (VA.needsCustom()) {
assert(VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs");
// Split v64i1 value into two registers
Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
Subtarget);
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
if (isVarArg && IsWin64) {
// Win64 ABI requires argument XMM reg to be copied to the corresponding
// shadow reg if callee is a varargs function.
unsigned ShadowReg = 0;
switch (VA.getLocReg()) {
case X86::XMM0: ShadowReg = X86::RCX; break;
case X86::XMM1: ShadowReg = X86::RDX; break;
case X86::XMM2: ShadowReg = X86::R8; break;
case X86::XMM3: ShadowReg = X86::R9; break;
}
if (ShadowReg)
RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
}
} else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
getPointerTy(DAG.getDataLayout()));
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
if (Subtarget.isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (!isTailCall) {
RegsToPass.push_back(std::make_pair(
unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
getPointerTy(DAG.getDataLayout()))));
} else {
// If we are tail calling and generating PIC/GOT style code load the
// address of the callee into ECX. The value in ecx is used as target of
// the tail jump. This is done to circumvent the ebx/callee-saved problem
// for tail calls on PIC/GOT architectures. Normally we would just put the
// address of GOT into ebx and then call target@PLT. But for tail calls
// ebx would be restored (since ebx is callee saved) before jumping to the
// target@PLT.
// Note: The actual moving to ECX is done further down.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
if (G && !G->getGlobal()->hasLocalLinkage() &&
G->getGlobal()->hasDefaultVisibility())
Callee = LowerGlobalAddress(Callee, DAG);
else if (isa<ExternalSymbolSDNode>(Callee))
Callee = LowerExternalSymbol(Callee, DAG);
}
}
if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
// the declaration) %al is used as hidden argument to specify the number
// of SSE registers used. The contents of %al do not need to match exactly
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
// Count the number of XMM registers allocated.
static const MCPhysReg XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
assert((Subtarget.hasSSE1() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
DAG.getConstant(NumXMMRegs, dl,
MVT::i8)));
}
if (isVarArg && IsMustTail) {
const auto &Forwards = X86Info->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
}
}
// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
// don't need this because the eligibility check rejects calls that require
// shuffling arguments passed in memory.
if (!IsSibcall && isTailCall) {
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments are stored to the stack, because the
// outgoing stack slots may alias the incoming argument stack slots, and
// the alias isn't otherwise explicit. This is slightly more conservative
// than necessary, because it means that each store effectively depends
// on every argument instead of just those arguments it would clobber.
SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
++I, ++OutsIndex) {
CCValAssign &VA = ArgLocs[I];
if (VA.isRegLoc()) {
if (VA.needsCustom()) {
assert((CallConv == CallingConv::X86_RegCall) &&
"Expecting custom case only in regcall calling convention");
// This means that we are in special case where one argument was
// passed through two register locations - Skip the next location
++I;
}
continue;
}
assert(VA.isMemLoc());
SDValue Arg = OutVals[OutsIndex];
ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
// Skip inalloca arguments. They don't require any work.
if (Flags.isInAlloca())
continue;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
if (Flags.isByVal()) {
// Copy relative to framepointer.
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
getPointerTy(DAG.getDataLayout()));
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, Source);
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
ArgChain,
Flags, DAG, dl));
} else {
// Store relative to framepointer.
MemOpChains2.push_back(DAG.getStore(
ArgChain, dl, Arg, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
}
}
if (!MemOpChains2.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
getPointerTy(DAG.getDataLayout()),
RegInfo->getSlotSize(), FPDiff, dl);
}
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
// through a register, since the call instruction's 32-bit
// pc-relative offset may not be large enough to hold the whole
// address.
} else if (Callee->getOpcode() == ISD::GlobalAddress) {
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
// it.
GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
const GlobalValue *GV = G->getGlobal();
if (!GV->hasDLLImportStorageClass()) {
unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
Callee = DAG.getTargetGlobalAddress(
GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
if (OpFlags == X86II::MO_GOTPCREL) {
// Add a wrapper.
Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
getPointerTy(DAG.getDataLayout()), Callee);
// Add extra indirection
Callee = DAG.getLoad(
getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
}
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
unsigned char OpFlags =
Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
Callee = DAG.getTargetExternalSymbol(
S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
} else if (Subtarget.isTarget64BitILP32() &&
Callee->getValueType(0) == MVT::i32) {
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
}
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall) {
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytesToPop, dl, true),
DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
Ops.push_back(Chain);
Ops.push_back(Callee);
if (isTailCall)
Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
// If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
// set X86_INTR calling convention because it has the same CSR mask
// (same preserved registers).
const uint32_t *Mask = RegInfo->getCallPreservedMask(
MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
// If this is an invoke in a 32-bit function using a funclet-based
// personality, assume the function clobbers all registers. If an exception
// is thrown, the runtime will not restore CSRs.
// FIXME: Model this more precisely so that we can register allocate across
// the normal edge and spill and fill across the exceptional edge.
if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
const Function *CallerFn = MF.getFunction();
EHPersonality Pers =
CallerFn->hasPersonalityFn()
? classifyEHPersonality(CallerFn->getPersonalityFn())
: EHPersonality::Unknown;
if (isFuncletEHPersonality(Pers))
Mask = RegInfo->getNoPreservedMask();
}
// Define a new register mask from the existing mask.
uint32_t *RegMask = nullptr;
// In some calling conventions we need to remove the used physical registers
// from the reg mask.
if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
// Allocate a new Reg Mask and copy Mask.
RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
// Make sure all sub registers of the argument registers are reset
// in the RegMask.
for (auto const &RegPair : RegsToPass)
for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
// Create the RegMask Operand according to our updated mask.
Ops.push_back(DAG.getRegisterMask(RegMask));
} else {
// Create the RegMask Operand according to the static mask.
Ops.push_back(DAG.getRegisterMask(Mask));
}
if (InFlag.getNode())
Ops.push_back(InFlag);
if (isTailCall) {
// We used to do:
//// If this is the first return lowered for this function, add the regs
//// to the liveout set for the function.
// This isn't right, although it's probably harmless on x86; liveouts
// should be computed from returns not tail calls. Consider a void
// function making a tail call to a function returning int.
MF.getFrameInfo().setHasTailCall();
return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
}
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPop;
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
DAG.getTarget().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPop = NumBytes; // Callee pops everything
else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
!Subtarget.getTargetTriple().isOSMSVCRT() &&
SR == StackStructReturn)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
// For MSVC Win32 targets, the caller pops the hidden struct pointer.
NumBytesForCalleeToPop = 4;
else
NumBytesForCalleeToPop = 0; // Callee pops nothing.
if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
// No need to reset the stack after the call if the call doesn't return. To
// make the MI verify, we'll pretend the callee does it for us.
NumBytesForCalleeToPop = NumBytes;
}
// Returns a flag for retval copy to use.
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytesToPop, dl, true),
DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
true),
InFlag, dl);
InFlag = Chain.getValue(1);
}
// Handle result values, copying them out of physregs into vregs that we
// return.
return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
InVals, RegMask);
}
//===----------------------------------------------------------------------===//
// Fast Calling Convention (tail call) implementation
//===----------------------------------------------------------------------===//
// Like std call, callee cleans arguments, convention except that ECX is
// reserved for storing the tail called function address. Only 2 registers are
// free for argument passing (inreg). Tail call optimization is performed
// provided:
// * tailcallopt is enabled
// * caller/callee are fastcc
// On X86_64 architecture with GOT-style position independent code only local
// (within module) calls are supported at the moment.
// To keep the stack aligned according to platform abi the function
// GetAlignedArgumentStackSize ensures that argument delta is always multiples
// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
// If a tail called function callee has more arguments than the caller the
// caller needs to make sure that there is room to move the RETADDR to. This is
// achieved by reserving an area the size of the argument delta right after the
// original RETADDR, but before the saved framepointer or the spilled registers
// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
// stack layout:
// arg1
// arg2
// RETADDR
// [ new RETADDR
// move area ]
// (possible EBP)
// ESI
// EDI
// local1 ..
/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
/// requirement.
unsigned
X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
unsigned SlotSize = RegInfo->getSlotSize();
if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
// Number smaller than 12 so just add the difference.
Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
} else {
// Mask out lower bits, add stackalignment once plus the 12 bytes.
Offset = ((~AlignMask) & Offset) + StackAlignment +
(StackAlignment-SlotSize);
}
return Offset;
}
/// Return true if the given stack call argument is already available in the
/// same position (relatively) of the caller's incoming argument stack.
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
const X86InstrInfo *TII, const CCValAssign &VA) {
unsigned Bytes = Arg.getValueSizeInBits() / 8;
for (;;) {
// Look through nodes that don't alter the bits of the incoming value.
unsigned Op = Arg.getOpcode();
if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
Arg = Arg.getOperand(0);
continue;
}
if (Op == ISD::TRUNCATE) {
const SDValue &TruncInput = Arg.getOperand(0);
if (TruncInput.getOpcode() == ISD::AssertZext &&
cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
Arg.getValueType()) {
Arg = TruncInput.getOperand(0);
continue;
}
}
break;
}
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!TargetRegisterInfo::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
return false;
if (!Flags.isByVal()) {
if (!TII->isLoadFromStackSlot(*Def, FI))
return false;
} else {
unsigned Opcode = Def->getOpcode();
if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
Opcode == X86::LEA64_32r) &&
Def->getOperand(1).isFI()) {
FI = Def->getOperand(1).getIndex();
Bytes = Flags.getByValSize();
} else
return false;
}
} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
if (Flags.isByVal())
// ByVal argument is passed in as a pointer but it's now being
// dereferenced. e.g.
// define @foo(%struct.X* %A) {
// tail call @bar(%struct.X* byval %A)
// }
return false;
SDValue Ptr = Ld->getBasePtr();
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
if (!FINode)
return false;
FI = FINode->getIndex();
} else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
FI = FINode->getIndex();
Bytes = Flags.getByValSize();
} else
return false;
assert(FI != INT_MAX);
if (!MFI.isFixedObjectIndex(FI))
return false;
if (Offset != MFI.getObjectOffset(FI))
return false;
// If this is not byval, check that the argument stack object is immutable.
// inalloca and argument copy elision can create mutable argument stack
// objects. Byval objects can be mutated, but a byval call intends to pass the
// mutated memory.
if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
return false;
if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
// If the argument location is wider than the argument type, check that any
// extension flags match.
if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
Flags.isSExt() != MFI.isObjectSExt(FI)) {
return false;
}
}
return Bytes == MFI.getObjectSize(FI);
}
/// Check whether the call is eligible for tail call optimization. Targets
/// that want to do tail call optimization should implement this function.
bool X86TargetLowering::IsEligibleForTailCallOptimization(
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
if (!mayTailCallThisCC(CalleeCC))
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
// If the function return type is x86_fp80 and the callee return type is not,
// then the FP_EXTEND of the call result is not a nop. It's not safe to
// perform a tailcall optimization here.
if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
return false;
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
// Win64 functions have extra shadow space for argument homing. Don't do the
// sibcall if the caller and callee have mismatched expectations for this
// space.
if (IsCalleeWin64 != IsCallerWin64)
return false;
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
return false;
}
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
// emit a special epilogue.
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
if (RegInfo->needsStackRealignment(MF))
return false;
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
// registers.
LLVMContext &C = *DAG.getContext();
if (isVarArg && !Outs.empty()) {
// Optimizing for varargs on Win64 is unlikely to be safe without
// additional testing.
if (IsCalleeWin64 || IsCallerWin64)
return false;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
if (!ArgLocs[i].isRegLoc())
return false;
}
// If the call result is in ST0 / ST1, it needs to be popped off the x87
// stack. Therefore, if it's not used by the call it is not safe to optimize
// this into a sibcall.
bool Unused = false;
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
if (!Ins[i].Used) {
Unused = true;
break;
}
}
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
return false;
}
}
// Check that the call results are passed in the same way.
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
RetCC_X86, RetCC_X86))
return false;
// The callee has to preserve all registers the caller needs to preserve.
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (!CCMatch) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
unsigned StackArgsSize = 0;
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
// Allocate shadow area for Win64
if (IsCalleeWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
StackArgsSize = CCInfo.getNextStackOffset();
if (CCInfo.getNextStackOffset()) {
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo &MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
if (!VA.isRegLoc()) {
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
MFI, MRI, TII, VA))
return false;
}
}
}
bool PositionIndependent = isPositionIndependent();
// If the tailcall address may be in a register, then make sure it's
// possible to register allocate for it. In 32-bit, the call address can
// only target EAX, EDX, or ECX since the tail call must be scheduled after
// callee-saved registers are restored. These happen to be the same
// registers used to pass 'inreg' arguments so watch out for those.
if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) ||
PositionIndependent)) {
unsigned NumInRegs = 0;
// In PIC we need an extra register to formulate the address computation
// for the callee.
unsigned MaxInRegs = PositionIndependent ? 2 : 3;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
continue;
unsigned Reg = VA.getLocReg();
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
if (++NumInRegs == MaxInRegs)
return false;
break;
}
}
}
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
return false;
}
bool CalleeWillPop =
X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
MF.getTarget().Options.GuaranteedTailCallOpt);
if (unsigned BytesToPop =
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
// If we have bytes to pop, the callee must pop them.
bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
if (!CalleePopMatches)
return false;
} else if (CalleeWillPop && StackArgsSize > 0) {
// If we don't have bytes to pop, make sure the callee doesn't pop any.
return false;
}
return true;
}
FastISel *
X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
return X86::createFastISel(funcInfo, libInfo);
}
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
static bool MayFoldLoad(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
}
static bool MayFoldIntoStore(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
}
static bool MayFoldIntoZeroExtend(SDValue Op) {
if (Op.hasOneUse()) {
unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
return (ISD::ZERO_EXTEND == Opcode);
}
return false;
}
static bool isTargetShuffle(unsigned Opcode) {
switch(Opcode) {
default: return false;
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
case X86ISD::INSERTPS:
case X86ISD::EXTRQI:
case X86ISD::INSERTQI:
case X86ISD::PALIGNR:
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
case X86ISD::MOVLPS:
case X86ISD::MOVLPD:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VBROADCAST:
case X86ISD::VPERMILPI:
case X86ISD::VPERMILPV:
case X86ISD::VPERM2X128:
case X86ISD::VPERMIL2:
case X86ISD::VPERMI:
case X86ISD::VPPERM:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
case X86ISD::VPERMIV3:
case X86ISD::VZEXT_MOVL:
return true;
}
}
static bool isTargetShuffleVariableMask(unsigned Opcode) {
switch (Opcode) {
default: return false;
// Target Shuffles.
case X86ISD::PSHUFB:
case X86ISD::VPERMILPV:
case X86ISD::VPERMIL2:
case X86ISD::VPPERM:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
case X86ISD::VPERMIV3:
return true;
// 'Faux' Target Shuffles.
case ISD::AND:
case X86ISD::ANDNP:
return true;
}
}
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
unsigned SlotSize = RegInfo->getSlotSize();
ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
-(int64_t)SlotSize,
false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
}
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement) {
// Offset should fit into 32 bit immediate field.
if (!isInt<32>(Offset))
return false;
// If we don't have a symbolic displacement - we don't have any extra
// restrictions.
if (!hasSymbolicDisplacement)
return true;
// FIXME: Some tweaks might be needed for medium code model.
if (M != CodeModel::Small && M != CodeModel::Kernel)
return false;
// For small code model we assume that latest object is 16MB before end of 31
// bits boundary. We may also accept pretty large negative constants knowing
// that all objects are in the positive half of address space.
if (M == CodeModel::Small && Offset < 16*1024*1024)
return true;
// For kernel code model we know that all object resist in the negative half
// of 32bits address space. We may not accept negative offsets, since they may
// be just off and we may accept pretty large positive ones.
if (M == CodeModel::Kernel && Offset >= 0)
return true;
return false;
}
/// Determines whether the callee is required to pop its own arguments.
/// Callee pop is necessary to support tail calls.
bool X86::isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
// If GuaranteeTCO is true, we force some calls to be callee pop so that we
// can guarantee TCO.
if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
return true;
switch (CallingConv) {
default:
return false;
case CallingConv::X86_StdCall:
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
case CallingConv::X86_VectorCall:
return !is64Bit;
}
}
/// \brief Return true if the condition is an unsigned comparison operation.
static bool isX86CCUnsigned(unsigned X86CC) {
switch (X86CC) {
default:
llvm_unreachable("Invalid integer condition!");
case X86::COND_E:
case X86::COND_NE:
case X86::COND_B:
case X86::COND_A:
case X86::COND_BE:
case X86::COND_AE:
return true;
case X86::COND_G:
case X86::COND_GE:
case X86::COND_L:
case X86::COND_LE:
return false;
}
}
static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
switch (SetCCOpcode) {
default: llvm_unreachable("Invalid integer condition!");
case ISD::SETEQ: return X86::COND_E;
case ISD::SETGT: return X86::COND_G;
case ISD::SETGE: return X86::COND_GE;
case ISD::SETLT: return X86::COND_L;
case ISD::SETLE: return X86::COND_LE;
case ISD::SETNE: return X86::COND_NE;
case ISD::SETULT: return X86::COND_B;
case ISD::SETUGT: return X86::COND_A;
case ISD::SETULE: return X86::COND_BE;
case ISD::SETUGE: return X86::COND_AE;
}
}
/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
/// condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
bool isFP, SDValue &LHS, SDValue &RHS,
SelectionDAG &DAG) {
if (!isFP) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
// X > -1 -> X == 0, jump !sign.
RHS = DAG.getConstant(0, DL, RHS.getValueType());
return X86::COND_NS;
}
if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
// X < 0 -> X == 0, jump on sign.
return X86::COND_S;
}
if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
// X < 1 -> X <= 0
RHS = DAG.getConstant(0, DL, RHS.getValueType());
return X86::COND_LE;
}
}
return TranslateIntegerX86CC(SetCCOpcode);
}
// First determine if it is required or is profitable to flip the operands.
// If LHS is a foldable load, but RHS is not, flip the condition.
if (ISD::isNON_EXTLoad(LHS.getNode()) &&
!ISD::isNON_EXTLoad(RHS.getNode())) {
SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
std::swap(LHS, RHS);
}
switch (SetCCOpcode) {
default: break;
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETUGT:
case ISD::SETUGE:
std::swap(LHS, RHS);
break;
}
// On a floating point condition, the flags are set as follows:
// ZF PF CF op
// 0 | 0 | 0 | X > Y
// 0 | 0 | 1 | X < Y
// 1 | 0 | 0 | X == Y
// 1 | 1 | 1 | unordered
switch (SetCCOpcode) {
default: llvm_unreachable("Condcode should be pre-legalized away");
case ISD::SETUEQ:
case ISD::SETEQ: return X86::COND_E;
case ISD::SETOLT: // flipped
case ISD::SETOGT:
case ISD::SETGT: return X86::COND_A;
case ISD::SETOLE: // flipped
case ISD::SETOGE:
case ISD::SETGE: return X86::COND_AE;
case ISD::SETUGT: // flipped
case ISD::SETULT:
case ISD::SETLT: return X86::COND_B;
case ISD::SETUGE: // flipped
case ISD::SETULE:
case ISD::SETLE: return X86::COND_BE;
case ISD::SETONE:
case ISD::SETNE: return X86::COND_NE;
case ISD::SETUO: return X86::COND_P;
case ISD::SETO: return X86::COND_NP;
case ISD::SETOEQ:
case ISD::SETUNE: return X86::COND_INVALID;
}
}
/// Is there a floating point cmov for the specific X86 condition code?
/// Current x86 isa includes the following FP cmov instructions:
/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
static bool hasFPCMov(unsigned X86CC) {
switch (X86CC) {
default:
return false;
case X86::COND_B:
case X86::COND_BE:
case X86::COND_E:
case X86::COND_P:
case X86::COND_A:
case X86::COND_AE:
case X86::COND_NE:
case X86::COND_NP:
return true;
}
}
bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
if (!IntrData)
return false;
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.readMem = false;
Info.writeMem = false;
Info.vol = false;
Info.offset = 0;
switch (IntrData->Type) {
case EXPAND_FROM_MEM: {
Info.ptrVal = I.getArgOperand(0);
Info.memVT = MVT::getVT(I.getType());
Info.align = 1;
Info.readMem = true;
break;
}
case COMPRESS_TO_MEM: {
Info.ptrVal = I.getArgOperand(0);
Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
Info.align = 1;
Info.writeMem = true;
break;
}
case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:
case TRUNCATE_TO_MEM_VI32: {
Info.ptrVal = I.getArgOperand(0);
MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
ScalarVT = MVT::i8;
else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
ScalarVT = MVT::i16;
else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
ScalarVT = MVT::i32;
Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
Info.align = 1;
Info.writeMem = true;
break;
}
default:
return false;
}
return true;
}
/// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
return true;
}
return false;
}
bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
// "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
// relocation target a movq or addq instruction: don't let the load shrink.
SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
return true;
}
/// \brief Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
if (BitSize == 0 || BitSize > 64)
return false;
return true;
}
bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
return (Index == 0 || Index == ResVT.getVectorNumElements());
}
bool X86TargetLowering::isCheapToSpeculateCttz() const {
// Speculate cttz only if we can directly use TZCNT.
return Subtarget.hasBMI();
}
bool X86TargetLowering::isCheapToSpeculateCtlz() const {
// Speculate ctlz only if we can directly use LZCNT.
return Subtarget.hasLZCNT();
}
bool X86TargetLowering::isCtlzFast() const {
return Subtarget.hasFastLZCNT();
}
bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
const Instruction &AndI) const {
return true;
}
bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
if (!Subtarget.hasBMI())
return false;
// There are only 32-bit and 64-bit forms for 'andn'.
EVT VT = Y.getValueType();
if (VT != MVT::i32 && VT != MVT::i64)
return false;
return true;
}
MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
MVT VT = MVT::getIntegerVT(NumBits);
if (isTypeLegal(VT))
return VT;
// PMOVMSKB can handle this.
if (NumBits == 128 && isTypeLegal(MVT::v16i8))
return MVT::v16i8;
// VPMOVMSKB can handle this.
if (NumBits == 256 && isTypeLegal(MVT::v32i8))
return MVT::v32i8;
// TODO: Allow 64-bit type for 32-bit target.
// TODO: 512-bit types should be allowed, but make sure that those
// cases are handled in combineVectorSizedSetCCEquality().
return MVT::INVALID_SIMPLE_VALUE_TYPE;
}
/// Val is the undef sentinel value or equal to the specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
return ((Val == SM_SentinelUndef) || (Val == CmpVal));
}
/// Val is either the undef or zero sentinel value.
static bool isUndefOrZero(int Val) {
return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
}
/// Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size is the undef sentinel value.
static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
if (Mask[i] != SM_SentinelUndef)
return false;
return true;
}
/// Return true if Val is undef or if its value falls within the
/// specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
return (Val == SM_SentinelUndef) || (Val >= Low && Val < Hi);
}
/// Return true if every element in Mask is undef or if its value
/// falls within the specified range (L, H].
static bool isUndefOrInRange(ArrayRef<int> Mask,
int Low, int Hi) {
for (int M : Mask)
if (!isUndefOrInRange(M, Low, Hi))
return false;
return true;
}
/// Return true if Val is undef, zero or if its value falls within the
/// specified range (L, H].
static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
}
/// Return true if every element in Mask is undef, zero or if its value
/// falls within the specified range (L, H].
static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
for (int M : Mask)
if (!isUndefOrZeroOrInRange(M, Low, Hi))
return false;
return true;
}
/// Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size, falls within the specified
/// sequential range (Low, Low+Size]. or is undef.
static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
unsigned Pos, unsigned Size, int Low) {
for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
if (!isUndefOrEqual(Mask[i], Low))
return false;
return true;
}
/// Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size, falls within the specified
/// sequential range (Low, Low+Size], or is undef or is zero.
static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
unsigned Size, int Low) {
for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
return false;
return true;
}
/// Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size is undef or is zero.
static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
unsigned Size) {
for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
if (!isUndefOrZero(Mask[i]))
return false;
return true;
}
/// \brief Helper function to test whether a shuffle mask could be
/// simplified by widening the elements being shuffled.
///
/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
/// leaves it in an unspecified state.
///
/// NOTE: This must handle normal vector shuffle masks and *target* vector
/// shuffle masks. The latter have the special property of a '-2' representing
/// a zero-ed lane of a vector.
static bool canWidenShuffleElements(ArrayRef<int> Mask,
SmallVectorImpl<int> &WidenedMask) {
WidenedMask.assign(Mask.size() / 2, 0);
for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
int M0 = Mask[i];
int M1 = Mask[i + 1];
// If both elements are undef, its trivial.
if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
WidenedMask[i / 2] = SM_SentinelUndef;
continue;
}
// Check for an undef mask and a mask value properly aligned to fit with
// a pair of values. If we find such a case, use the non-undef mask's value.
if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
WidenedMask[i / 2] = M1 / 2;
continue;
}
if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
WidenedMask[i / 2] = M0 / 2;
continue;
}
// When zeroing, we need to spread the zeroing across both lanes to widen.
if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
(M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
WidenedMask[i / 2] = SM_SentinelZero;
continue;
}
return false;
}
// Finally check if the two mask values are adjacent and aligned with
// a pair.
if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
WidenedMask[i / 2] = M0 / 2;
continue;
}
// Otherwise we can't safely widen the elements used in this shuffle.
return false;
}
assert(WidenedMask.size() == Mask.size() / 2 &&
"Incorrect size of mask after widening the elements!");
return true;
}
/// Helper function to scale a shuffle or target shuffle mask, replacing each
/// mask index with the scaled sequential indices for an equivalent narrowed
/// mask. This is the reverse process to canWidenShuffleElements, but can always
/// succeed.
static void scaleShuffleMask(int Scale, ArrayRef<int> Mask,
SmallVectorImpl<int> &ScaledMask) {
assert(0 < Scale && "Unexpected scaling factor");
int NumElts = Mask.size();
ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
for (int i = 0; i != NumElts; ++i) {
int M = Mask[i];
// Repeat sentinel values in every mask element.
if (M < 0) {
for (int s = 0; s != Scale; ++s)
ScaledMask[(Scale * i) + s] = M;
continue;
}
// Scale mask element and increment across each mask element.
for (int s = 0; s != Scale; ++s)
ScaledMask[(Scale * i) + s] = (Scale * M) + s;
}
}
/// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector
/// extract that is suitable for instruction that extract 128 or 256 bit vectors
static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
return false;
// The index should be aligned on a vecWidth-bit boundary.
uint64_t Index = N->getConstantOperandVal(1);
MVT VT = N->getSimpleValueType(0);
unsigned ElSize = VT.getScalarSizeInBits();
return (Index * ElSize) % vecWidth == 0;
}
/// Return true if the specified INSERT_SUBVECTOR
/// operand specifies a subvector insert that is suitable for input to
/// insertion of 128 or 256-bit subvectors
static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
return false;
// The index should be aligned on a vecWidth-bit boundary.
uint64_t Index = N->getConstantOperandVal(2);
MVT VT = N->getSimpleValueType(0);
unsigned ElSize = VT.getScalarSizeInBits();
return (Index * ElSize) % vecWidth == 0;
}
bool X86::isVINSERT128Index(SDNode *N) {
return isVINSERTIndex(N, 128);
}
bool X86::isVINSERT256Index(SDNode *N) {
return isVINSERTIndex(N, 256);
}
bool X86::isVEXTRACT128Index(SDNode *N) {
return isVEXTRACTIndex(N, 128);
}
bool X86::isVEXTRACT256Index(SDNode *N) {
return isVEXTRACTIndex(N, 256);
}
static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
assert(isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
"Illegal extract subvector for VEXTRACT");
uint64_t Index = N->getConstantOperandVal(1);
MVT VecVT = N->getOperand(0).getSimpleValueType();
unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits();
return Index / NumElemsPerChunk;
}
static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
assert(isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
"Illegal insert subvector for VINSERT");
uint64_t Index = N->getConstantOperandVal(2);
MVT VecVT = N->getSimpleValueType(0);
unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits();
return Index / NumElemsPerChunk;
}
/// Return the appropriate immediate to extract the specified
/// EXTRACT_SUBVECTOR index with VEXTRACTF128 and VINSERTI128 instructions.
unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
return getExtractVEXTRACTImmediate(N, 128);
}
/// Return the appropriate immediate to extract the specified
/// EXTRACT_SUBVECTOR index with VEXTRACTF64x4 and VINSERTI64x4 instructions.
unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
return getExtractVEXTRACTImmediate(N, 256);
}
/// Return the appropriate immediate to insert at the specified
/// INSERT_SUBVECTOR index with VINSERTF128 and VINSERTI128 instructions.
unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 128);
}
/// Return the appropriate immediate to insert at the specified
/// INSERT_SUBVECTOR index with VINSERTF46x4 and VINSERTI64x4 instructions.
unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 256);
}
/// Returns true if Elt is a constant zero or a floating point constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
return isNullConstant(Elt) || isNullFPConstant(Elt);
}
// Build a vector of constants.
// Use an UNDEF node if MaskElt == -1.
// Split 64-bit constants in the 32-bit mode.
static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
const SDLoc &dl, bool IsMask = false) {
SmallVector<SDValue, 32> Ops;
bool Split = false;
MVT ConstVecVT = VT;
unsigned NumElts = VT.getVectorNumElements();
bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
Split = true;
}
MVT EltVT = ConstVecVT.getVectorElementType();
for (unsigned i = 0; i < NumElts; ++i) {
bool IsUndef = Values[i] < 0 && IsMask;
SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
DAG.getConstant(Values[i], dl, EltVT);
Ops.push_back(OpNode);
if (Split)
Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
DAG.getConstant(0, dl, EltVT));
}
SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
if (Split)
ConstsNode = DAG.getBitcast(VT, ConstsNode);
return ConstsNode;
}
static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
assert(Bits.size() == Undefs.getBitWidth() &&
"Unequal constant and undef arrays");
SmallVector<SDValue, 32> Ops;
bool Split = false;
MVT ConstVecVT = VT;
unsigned NumElts = VT.getVectorNumElements();
bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
Split = true;
}
MVT EltVT = ConstVecVT.getVectorElementType();
for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
if (Undefs[i]) {
Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
continue;
}
const APInt &V = Bits[i];
assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes");
if (Split) {
Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
} else if (EltVT == MVT::f32) {
APFloat FV(APFloat::IEEEsingle(), V);
Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
} else if (EltVT == MVT::f64) {
APFloat FV(APFloat::IEEEdouble(), V);
Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
} else {
Ops.push_back(DAG.getConstant(V, dl, EltVT));
}
}
SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
return DAG.getBitcast(VT, ConstsNode);
}
/// Returns a vector of specified type with all zero elements.
static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
SelectionDAG &DAG, const SDLoc &dl) {
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||
VT.getVectorElementType() == MVT::i1) &&
"Unexpected vector type");
// Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
// type. This ensures they get CSE'd. But if the integer type is not
// available, use a floating-point +0.0 instead.
SDValue Vec;
if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
} else if (VT.getVectorElementType() == MVT::i1) {
assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type");
assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&
"Unexpected vector type");
Vec = DAG.getConstant(0, dl, VT);
} else {
unsigned Num32BitElts = VT.getSizeInBits() / 32;
Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
}
return DAG.getBitcast(VT, Vec);
}
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
const SDLoc &dl, unsigned vectorWidth) {
EVT VT = Vec.getValueType();
EVT ElVT = VT.getVectorElementType();
unsigned Factor = VT.getSizeInBits()/vectorWidth;
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
VT.getVectorNumElements()/Factor);
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
IdxVal &= ~(ElemsPerChunk - 1);
// If the input is a buildvector just emit a smaller one.
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getBuildVector(
ResultVT, dl, makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
}
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
/// instructions or a simple subregister reference. Idx is an index in the
/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
/// lowering EXTRACT_VECTOR_ELT operations easier.
static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, const SDLoc &dl) {
assert((Vec.getValueType().is256BitVector() ||
Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
return extractSubVector(Vec, IdxVal, DAG, dl, 128);
}
/// Generate a DAG to grab 256-bits from a 512-bit vector.
static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, const SDLoc &dl) {
assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
return extractSubVector(Vec, IdxVal, DAG, dl, 256);
}
static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, const SDLoc &dl,
unsigned vectorWidth) {
assert((vectorWidth == 128 || vectorWidth == 256) &&
"Unsupported vector width");
// Inserting UNDEF is Result
if (Vec.isUndef())
return Result;
EVT VT = Vec.getValueType();
EVT ElVT = VT.getVectorElementType();
EVT ResultVT = Result.getValueType();
// Insert the relevant vectorWidth bits.
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
IdxVal &= ~(ElemsPerChunk - 1);
SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
}
/// Generate a DAG to put 128-bits into a vector > 128 bits. This
/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
/// simple superregister reference. Idx is an index in the 128 bits
/// we want. It need not be aligned to a 128-bit boundary. That makes
/// lowering INSERT_VECTOR_ELT operations easier.
static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, const SDLoc &dl) {
assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
}
static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, const SDLoc &dl) {
assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
}
// Return true if the instruction zeroes the unused upper part of the
// destination and accepts mask.
static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
switch (Opcode) {
default:
return false;
case X86ISD::PCMPEQM:
case X86ISD::PCMPGTM:
case X86ISD::CMPM:
case X86ISD::CMPMU:
return true;
}
}
/// Insert i1-subvector to i1-vector.
static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
SDValue SubVec = Op.getOperand(1);
SDValue Idx = Op.getOperand(2);
if (!isa<ConstantSDNode>(Idx))
return SDValue();
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
return Op;
MVT OpVT = Op.getSimpleValueType();
MVT SubVecVT = SubVec.getSimpleValueType();
unsigned NumElems = OpVT.getVectorNumElements();
unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
assert(IdxVal + SubVecNumElems <= NumElems &&
IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR");
// There are 3 possible cases:
// 1. Subvector should be inserted in the lower part (IdxVal == 0)
// 2. Subvector should be inserted in the upper part
// (IdxVal + SubVecNumElems == NumElems)
// 3. Subvector should be inserted in the middle (for example v2i1
// to v16i1, index 2)
// If this node widens - by concatenating zeroes - the type of the result
// of a node with instruction that zeroes all upper (irrelevant) bits of the
// output register, mark this node as legal to enable replacing them with
// the v8i1 version of the previous instruction during instruction selection.
// For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg,
// while zeroing all the upper remaining 60 bits of the register. if the
// result of such instruction is inserted into an allZeroVector, then we can
// safely remove insert_vector (in instruction selection) as the cmp instr
// already zeroed the rest of the register.
if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 &&
(isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) ||
(SubVec.getOpcode() == ISD::AND &&
(isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) ||
isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode())))))
return Op;
// extend to natively supported kshift
MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
MVT WideOpVT = OpVT;
if (OpVT.getSizeInBits() < MinVT.getStoreSizeInBits())
WideOpVT = MinVT;
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
SDValue Undef = DAG.getUNDEF(WideOpVT);
SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
Undef, SubVec, ZeroIdx);
// Extract sub-vector if require.
auto ExtractSubVec = [&](SDValue V) {
return (WideOpVT == OpVT) ? V : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
OpVT, V, ZeroIdx);
};
if (Vec.isUndef()) {
if (IdxVal != 0) {
SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
ShiftBits);
}
return ExtractSubVec(WideSubVec);
}
if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
NumElems = WideOpVT.getVectorNumElements();
unsigned ShiftLeft = NumElems - SubVecNumElems;
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
DAG.getConstant(ShiftLeft, dl, MVT::i8));
Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec;
return ExtractSubVec(Vec);
}
if (IdxVal == 0) {
// Zero lower bits of the Vec
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
// Merge them together, SubVec should be zero extended.
WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
getZeroVector(WideOpVT, Subtarget, DAG, dl),
SubVec, ZeroIdx);
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
return ExtractSubVec(Vec);
}
// Simple case when we put subvector in the upper part
if (IdxVal + SubVecNumElems == NumElems) {
// Zero upper bits of the Vec
WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
return ExtractSubVec(Vec);
}
// Subvector should be inserted in the middle - use shuffle
WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
SubVec, ZeroIdx);
SmallVector<int, 64> Mask;
for (unsigned i = 0; i < NumElems; ++i)
Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
i : i + NumElems);
return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
}
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
/// instructions. This is used because creating CONCAT_VECTOR nodes of
/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
/// large BUILD_VECTORS.
static SDValue concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
unsigned NumElems, SelectionDAG &DAG,
const SDLoc &dl) {
SDValue V = insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
return insert128BitVector(V, V2, NumElems / 2, DAG, dl);
}
static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
unsigned NumElems, SelectionDAG &DAG,
const SDLoc &dl) {
SDValue V = insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
return insert256BitVector(V, V2, NumElems / 2, DAG, dl);
}
/// Returns a vector of specified type with all bits set.
/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
/// Then bitcast to their original type, ensuring they get CSE'd.
static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Expected a 128/256/512-bit vector type");
APInt Ones = APInt::getAllOnesValue(32);
unsigned NumElts = VT.getSizeInBits() / 32;
SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
return DAG.getBitcast(VT, Vec);
}
static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
SelectionDAG &DAG) {
EVT InVT = In.getValueType();
assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode");
if (VT.is128BitVector() && InVT.is128BitVector())
return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
: DAG.getZeroExtendVectorInReg(In, DL, VT);
// For 256-bit vectors, we only need the lower (128-bit) input half.
// For 512-bit vectors, we only need the lower input half or quarter.
if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
In = extractSubVector(In, 0, DAG, DL,
std::max(128, (int)VT.getSizeInBits() / Scale));
}
return DAG.getNode(Opc, DL, VT, In);
}
/// Generate unpacklo/unpackhi shuffle mask.
static void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
bool Unary) {
assert(Mask.empty() && "Expected an empty shuffle mask vector");
int NumElts = VT.getVectorNumElements();
int NumEltsInLane = 128 / VT.getScalarSizeInBits();
for (int i = 0; i < NumElts; ++i) {
unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
int Pos = (i % NumEltsInLane) / 2 + LaneStart;
Pos += (Unary ? 0 : NumElts * (i % 2));
Pos += (Lo ? 0 : NumEltsInLane / 2);
Mask.push_back(Pos);
}
}
/// Returns a vector_shuffle node for an unpackl operation.
static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
SDValue V1, SDValue V2) {
SmallVector<int, 8> Mask;
createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
}
/// Returns a vector_shuffle node for an unpackh operation.
static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
SDValue V1, SDValue V2) {
SmallVector<int, 8> Mask;
createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
}
/// Return a vector_shuffle of the specified vector of zero or undef vector.
/// This produces a shuffle where the low element of V2 is swizzled into the
/// zero/undef vector, landing at element Idx.
/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
bool IsZero,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = V2.getSimpleValueType();
SDValue V1 = IsZero
? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
int NumElems = VT.getVectorNumElements();
SmallVector<int, 16> MaskVec(NumElems);
for (int i = 0; i != NumElems; ++i)
// If this is the insertion idx, put the low elt of V2 here.
MaskVec[i] = (i == Idx) ? NumElems : i;
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
}
static SDValue peekThroughBitcasts(SDValue V) {
while (V.getNode() && V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
return V;
}
static SDValue peekThroughOneUseBitcasts(SDValue V) {
while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
V.getOperand(0).hasOneUse())
V = V.getOperand(0);
return V;
}
static const Constant *getTargetConstantFromNode(SDValue Op) {
Op = peekThroughBitcasts(Op);
auto *Load = dyn_cast<LoadSDNode>(Op);
if (!Load)
return nullptr;
SDValue Ptr = Load->getBasePtr();
if (Ptr->getOpcode() == X86ISD::Wrapper ||
Ptr->getOpcode() == X86ISD::WrapperRIP)
Ptr = Ptr->getOperand(0);
auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
if (!CNode || CNode->isMachineConstantPoolEntry())
return nullptr;
return dyn_cast<Constant>(CNode->getConstVal());
}
// Extract raw constant bits from constant pools.
static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
APInt &UndefElts,
SmallVectorImpl<APInt> &EltBits,
bool AllowWholeUndefs = true,
bool AllowPartialUndefs = true) {
assert(EltBits.empty() && "Expected an empty EltBits vector");
Op = peekThroughBitcasts(Op);
EVT VT = Op.getValueType();
unsigned SizeInBits = VT.getSizeInBits();
assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
unsigned NumElts = SizeInBits / EltSizeInBits;
// Bitcast a source array of element bits to the target size.
auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
unsigned NumSrcElts = UndefSrcElts.getBitWidth();
unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&
"Constant bit sizes don't match");
// Don't split if we don't allow undef bits.
bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
if (UndefSrcElts.getBoolValue() && !AllowUndefs)
return false;
// If we're already the right size, don't bother bitcasting.
if (NumSrcElts == NumElts) {
UndefElts = UndefSrcElts;
EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
return true;
}
// Extract all the undef/constant element data and pack into single bitsets.
APInt UndefBits(SizeInBits, 0);
APInt MaskBits(SizeInBits, 0);
for (unsigned i = 0; i != NumSrcElts; ++i) {
unsigned BitOffset = i * SrcEltSizeInBits;
if (UndefSrcElts[i])
UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
MaskBits.insertBits(SrcEltBits[i], BitOffset);
}
// Split the undef/constant single bitset data into the target elements.
UndefElts = APInt(NumElts, 0);
EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
for (unsigned i = 0; i != NumElts; ++i) {
unsigned BitOffset = i * EltSizeInBits;
APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
// Only treat an element as UNDEF if all bits are UNDEF.
if (UndefEltBits.isAllOnesValue()) {
if (!AllowWholeUndefs)
return false;
UndefElts.setBit(i);
continue;
}
// If only some bits are UNDEF then treat them as zero (or bail if not
// supported).
if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
return false;
APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
EltBits[i] = Bits.getZExtValue();
}
return true;
};
// Collect constant bits and insert into mask/undef bit masks.
auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
unsigned UndefBitIndex) {
if (!Cst)
return false;
if (isa<UndefValue>(Cst)) {
Undefs.setBit(UndefBitIndex);
return true;
}
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
Mask = CInt->getValue();
return true;
}
if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
Mask = CFP->getValueAPF().bitcastToAPInt();
return true;
}
return false;
};
// Extract constant bits from build vector.
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
const SDValue &Src = Op.getOperand(i);
if (Src.isUndef()) {
UndefSrcElts.setBit(i);
continue;
}
auto *Cst = cast<ConstantSDNode>(Src);
SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
}
return CastBitData(UndefSrcElts, SrcEltBits);
}
// Extract constant bits from constant pool vector.
if (auto *Cst = getTargetConstantFromNode(Op)) {
Type *CstTy = Cst->getType();
if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
return false;
unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
unsigned NumSrcElts = CstTy->getVectorNumElements();
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
for (unsigned i = 0; i != NumSrcElts; ++i)
if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
UndefSrcElts, i))
return false;
return CastBitData(UndefSrcElts, SrcEltBits);
}
// Extract constant bits from a broadcasted constant pool scalar.
if (Op.getOpcode() == X86ISD::VBROADCAST &&
EltSizeInBits <= VT.getScalarSizeInBits()) {
if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
if (UndefSrcElts[0])
UndefSrcElts.setBits(0, NumSrcElts);
SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
return CastBitData(UndefSrcElts, SrcEltBits);
}
}
}
// Extract a rematerialized scalar constant insertion.
if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits;
auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
return CastBitData(UndefSrcElts, SrcEltBits);
}
return false;
}
static bool getTargetShuffleMaskIndices(SDValue MaskNode,
unsigned MaskEltSizeInBits,
SmallVectorImpl<uint64_t> &RawMask) {
APInt UndefElts;
SmallVector<APInt, 64> EltBits;
// Extract the raw target constant bits.
// FIXME: We currently don't support UNDEF bits or mask entries.
if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
EltBits, /* AllowWholeUndefs */ false,
/* AllowPartialUndefs */ false))
return false;
// Insert the extracted elements into the mask.
for (APInt Elt : EltBits)
RawMask.push_back(Elt.getZExtValue());
return true;
}
/// Calculates the shuffle mask corresponding to the target-specific opcode.
/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
/// operands in \p Ops, and returns true.
/// Sets \p IsUnary to true if only one source is used. Note that this will set
/// IsUnary for shuffles which use a single input multiple times, and in those
/// cases it will adjust the mask to only have indices within that single input.
/// It is an error to call this with non-empty Mask/Ops vectors.
static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
SmallVectorImpl<SDValue> &Ops,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
SDValue ImmN;
assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector");
assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector");
IsUnary = false;
bool IsFakeUnary = false;
switch(N->getOpcode()) {
case X86ISD::BLENDI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::SHUFP:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::INSERTPS:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::EXTRQI:
if (isa<ConstantSDNode>(N->getOperand(1)) &&
isa<ConstantSDNode>(N->getOperand(2))) {
int BitLen = N->getConstantOperandVal(1);
int BitIdx = N->getConstantOperandVal(2);
DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask);
IsUnary = true;
}
break;
case X86ISD::INSERTQI:
if (isa<ConstantSDNode>(N->getOperand(2)) &&
isa<ConstantSDNode>(N->getOperand(3))) {
int BitLen = N->getConstantOperandVal(2);
int BitIdx = N->getConstantOperandVal(3);
DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
}
break;
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::UNPCKL:
DecodeUNPCKLMask(VT, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVLHPS:
DecodeMOVLHPSMask(NumElems, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::PALIGNR:
assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
Ops.push_back(N->getOperand(1));
Ops.push_back(N->getOperand(0));
break;
case X86ISD::VSHLDQ:
assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
ImmN = N->getOperand(N->getNumOperands() - 1);
DecodePSLLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::VSRLDQ:
assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
ImmN = N->getOperand(N->getNumOperands() - 1);
DecodePSRLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFD:
case X86ISD::VPERMILPI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFHW:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFLW:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::VZEXT_MOVL:
DecodeZeroMoveLowMask(VT, Mask);
IsUnary = true;
break;
case X86ISD::VBROADCAST: {
SDValue N0 = N->getOperand(0);
// See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
// add the pre-extracted value to the Ops vector.
if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N0.getOperand(0).getValueType() == VT &&
N0.getConstantOperandVal(1) == 0)
Ops.push_back(N0.getOperand(0));
// We only decode broadcasts of same-sized vectors, unless the broadcast
// came from an extract from the original width. If we found one, we
// pushed it the Ops vector above.
if (N0.getValueType() == VT || !Ops.empty()) {
DecodeVectorBroadcast(VT, Mask);
IsUnary = true;
break;
}
return false;
}
case X86ISD::VPERMILPV: {
IsUnary = true;
SDValue MaskNode = N->getOperand(1);
unsigned MaskEltSize = VT.getScalarSizeInBits();
SmallVector<uint64_t, 32> RawMask;
if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
DecodeVPERMILPMask(VT, RawMask, Mask);
break;
}
if (auto *C = getTargetConstantFromNode(MaskNode)) {
DecodeVPERMILPMask(C, MaskEltSize, Mask);
break;
}
return false;
}
case X86ISD::PSHUFB: {
IsUnary = true;
SDValue MaskNode = N->getOperand(1);
SmallVector<uint64_t, 32> RawMask;
if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
DecodePSHUFBMask(RawMask, Mask);
break;
}
if (auto *C = getTargetConstantFromNode(MaskNode)) {
DecodePSHUFBMask(C, Mask);
break;
}
return false;
}
case X86ISD::VPERMI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERMMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::MOVSS:
case X86ISD::MOVSD:
DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
break;
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVSLDUP:
DecodeMOVSLDUPMask(VT, Mask);
IsUnary = true;
break;
case X86ISD::MOVSHDUP:
DecodeMOVSHDUPMask(VT, Mask);
IsUnary = true;
break;
case X86ISD::MOVDDUP:
DecodeMOVDDUPMask(VT, Mask);
IsUnary = true;
break;
case X86ISD::MOVLHPD:
case X86ISD::MOVLPD:
case X86ISD::MOVLPS:
// Not yet implemented
return false;
case X86ISD::VPERMIL2: {
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
unsigned MaskEltSize = VT.getScalarSizeInBits();
SDValue MaskNode = N->getOperand(2);
SDValue CtrlNode = N->getOperand(3);
if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
unsigned CtrlImm = CtrlOp->getZExtValue();
SmallVector<uint64_t, 32> RawMask;
if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask);
break;
}
if (auto *C = getTargetConstantFromNode(MaskNode)) {
DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
break;
}
}
return false;
}
case X86ISD::VPPERM: {
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
SDValue MaskNode = N->getOperand(2);
SmallVector<uint64_t, 32> RawMask;
if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
DecodeVPPERMMask(RawMask, Mask);
break;
}
if (auto *C = getTargetConstantFromNode(MaskNode)) {
DecodeVPPERMMask(C, Mask);
break;
}
return false;
}
case X86ISD::VPERMV: {
IsUnary = true;
// Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
Ops.push_back(N->getOperand(1));
SDValue MaskNode = N->getOperand(0);
SmallVector<uint64_t, 32> RawMask;
unsigned MaskEltSize = VT.getScalarSizeInBits();
if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
DecodeVPERMVMask(RawMask, Mask);
break;
}
if (auto *C = getTargetConstantFromNode(MaskNode)) {
DecodeVPERMVMask(C, MaskEltSize, Mask);
break;
}
return false;
}
case X86ISD::VPERMV3: {
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2);
// Unlike most shuffle nodes, VPERMV3's mask operand is the middle one.
Ops.push_back(N->getOperand(0));
Ops.push_back(N->getOperand(2));
SDValue MaskNode = N->getOperand(1);
unsigned MaskEltSize = VT.getScalarSizeInBits();
if (auto *C = getTargetConstantFromNode(MaskNode)) {
DecodeVPERMV3Mask(C, MaskEltSize, Mask);
break;
}
return false;
}
case X86ISD::VPERMIV3: {
IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
// Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
Ops.push_back(N->getOperand(1));
Ops.push_back(N->getOperand(2));
SDValue MaskNode = N->getOperand(0);
unsigned MaskEltSize = VT.getScalarSizeInBits();
if (auto *C = getTargetConstantFromNode(MaskNode)) {
DecodeVPERMV3Mask(C, MaskEltSize, Mask);
break;
}
return false;
}
default: llvm_unreachable("unknown target shuffle node");
}
// Empty mask indicates the decode failed.
if (Mask.empty())
return false;
// Check if we're getting a shuffle mask with zero'd elements.
if (!AllowSentinelZero)
if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
return false;
// If we have a fake unary shuffle, the shuffle mask is spread across two
// inputs that are actually the same node. Re-map the mask to always point
// into the first input.
if (IsFakeUnary)
for (int &M : Mask)
if (M >= (int)Mask.size())
M -= Mask.size();
// If we didn't already add operands in the opcode-specific code, default to
// adding 1 or 2 operands starting at 0.
if (Ops.empty()) {
Ops.push_back(N->getOperand(0));
if (!IsUnary || IsFakeUnary)
Ops.push_back(N->getOperand(1));
}
return true;
}
/// Check a target shuffle mask's inputs to see if we can set any values to
/// SM_SentinelZero - this is for elements that are known to be zero
/// (not just zeroable) from their inputs.
/// Returns true if the target shuffle mask was decoded.
static bool setTargetShuffleZeroElements(SDValue N,
SmallVectorImpl<int> &Mask,
SmallVectorImpl<SDValue> &Ops) {
bool IsUnary;
if (!isTargetShuffle(N.getOpcode()))
return false;
MVT VT = N.getSimpleValueType();
if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
return false;
SDValue V1 = Ops[0];
SDValue V2 = IsUnary ? V1 : Ops[1];
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
assert((VT.getSizeInBits() % Mask.size()) == 0 &&
"Illegal split of shuffle value type");
unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();
// Extract known constant input data.
APInt UndefSrcElts[2];
SmallVector<APInt, 32> SrcEltBits[2];
bool IsSrcConstant[2] = {
getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
SrcEltBits[0], true, false),
getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
SrcEltBits[1], true, false)};
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
int M = Mask[i];
// Already decoded as SM_SentinelZero / SM_SentinelUndef.
if (M < 0)
continue;
// Determine shuffle input and normalize the mask.
unsigned SrcIdx = M / Size;
SDValue V = M < Size ? V1 : V2;
M %= Size;
// We are referencing an UNDEF input.
if (V.isUndef()) {
Mask[i] = SM_SentinelUndef;
continue;
}
// SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
// TODO: We currently only set UNDEF for integer types - floats use the same
// registers as vectors and many of the scalar folded loads rely on the
// SCALAR_TO_VECTOR pattern.
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
(Size % V.getValueType().getVectorNumElements()) == 0) {
int Scale = Size / V.getValueType().getVectorNumElements();
int Idx = M / Scale;
if (Idx != 0 && !VT.isFloatingPoint())
Mask[i] = SM_SentinelUndef;
else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
Mask[i] = SM_SentinelZero;
continue;
}
// Attempt to extract from the source's constant bits.
if (IsSrcConstant[SrcIdx]) {
if (UndefSrcElts[SrcIdx][M])
Mask[i] = SM_SentinelUndef;
else if (SrcEltBits[SrcIdx][M] == 0)
Mask[i] = SM_SentinelZero;
}
}
assert(VT.getVectorNumElements() == Mask.size() &&
"Different mask size from vector size!");
return true;
}
// Attempt to decode ops that could be represented as a shuffle mask.
// The decoded shuffle mask may contain a different number of elements to the
// destination value type.
static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
SmallVectorImpl<SDValue> &Ops,
SelectionDAG &DAG) {
Mask.clear();
Ops.clear();
MVT VT = N.getSimpleValueType();
unsigned NumElts = VT.getVectorNumElements();
unsigned NumSizeInBits = VT.getSizeInBits();
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 &&
"Expected byte aligned value types");
unsigned Opcode = N.getOpcode();
switch (Opcode) {
case ISD::AND:
case X86ISD::ANDNP: {
// Attempt to decode as a per-byte mask.
APInt UndefElts;
SmallVector<APInt, 32> EltBits;
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
bool IsAndN = (X86ISD::ANDNP == Opcode);
uint64_t ZeroMask = IsAndN ? 255 : 0;
if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
return false;
for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
if (UndefElts[i]) {
Mask.push_back(SM_SentinelUndef);
continue;
}
uint64_t ByteBits = EltBits[i].getZExtValue();
if (ByteBits != 0 && ByteBits != 255)
return false;
Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
}
Ops.push_back(IsAndN ? N1 : N0);
return true;
}
case ISD::SCALAR_TO_VECTOR: {
// Match against a scalar_to_vector of an extract from a vector,
// for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
SDValue N0 = N.getOperand(0);
SDValue SrcExtract;
if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
N0.getOperand(0).getValueType() == VT) {
SrcExtract = N0;
} else if (N0.getOpcode() == ISD::AssertZext &&
N0.getOperand(0).getOpcode() == X86ISD::PEXTRW &&
cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i16) {
SrcExtract = N0.getOperand(0);
assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16);
} else if (N0.getOpcode() == ISD::AssertZext &&
N0.getOperand(0).getOpcode() == X86ISD::PEXTRB &&
cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i8) {
SrcExtract = N0.getOperand(0);
assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8);
}
if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)))
return false;
SDValue SrcVec = SrcExtract.getOperand(0);
EVT SrcVT = SrcVec.getValueType();
unsigned NumSrcElts = SrcVT.getVectorNumElements();
unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
unsigned SrcIdx = SrcExtract.getConstantOperandVal(1);
if (NumSrcElts <= SrcIdx)
return false;
Ops.push_back(SrcVec);
Mask.push_back(SrcIdx);
Mask.append(NumZeros, SM_SentinelZero);
Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
return true;
}
case X86ISD::PINSRB:
case X86ISD::PINSRW: {
SDValue InVec = N.getOperand(0);
SDValue InScl = N.getOperand(1);
uint64_t InIdx = N.getConstantOperandVal(2);
assert(InIdx < NumElts && "Illegal insertion index");
// Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern.
if (X86::isZeroNode(InScl)) {
Ops.push_back(InVec);
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i);
return true;
}
// Attempt to recognise a PINSR*(ASSERTZEXT(PEXTR*)) shuffle pattern.
// TODO: Expand this to support INSERT_VECTOR_ELT/etc.
unsigned ExOp =
(X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW);
if (InScl.getOpcode() != ISD::AssertZext ||
InScl.getOperand(0).getOpcode() != ExOp)
return false;
SDValue ExVec = InScl.getOperand(0).getOperand(0);
uint64_t ExIdx = InScl.getOperand(0).getConstantOperandVal(1);
assert(ExIdx < NumElts && "Illegal extraction index");
Ops.push_back(InVec);
Ops.push_back(ExVec);
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
return true;
}
case X86ISD::PACKSS: {
// If we know input saturation won't happen we can treat this
// as a truncation shuffle.
if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt ||
DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt)
return false;
Ops.push_back(N.getOperand(0));
Ops.push_back(N.getOperand(1));
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(i * 2);
return true;
}
case X86ISD::VSHLI:
case X86ISD::VSRLI: {
uint64_t ShiftVal = N.getConstantOperandVal(1);
// Out of range bit shifts are guaranteed to be zero.
if (NumBitsPerElt <= ShiftVal) {
Mask.append(NumElts, SM_SentinelZero);
return true;
}
// We can only decode 'whole byte' bit shifts as shuffles.
if ((ShiftVal % 8) != 0)
break;
uint64_t ByteShift = ShiftVal / 8;
unsigned NumBytes = NumSizeInBits / 8;
unsigned NumBytesPerElt = NumBitsPerElt / 8;
Ops.push_back(N.getOperand(0));
// Clear mask to all zeros and insert the shifted byte indices.
Mask.append(NumBytes, SM_SentinelZero);
if (X86ISD::VSHLI == Opcode) {
for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
Mask[i + j] = i + j - ByteShift;
} else {
for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
Mask[i + j - ByteShift] = i + j;
}
return true;
}
case ISD::ZERO_EXTEND_VECTOR_INREG:
case X86ISD::VZEXT: {
// TODO - add support for VPMOVZX with smaller input vector types.
SDValue Src = N.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
if (NumSizeInBits != SrcVT.getSizeInBits())
break;
DecodeZeroExtendMask(SrcVT.getScalarType(), VT, Mask);
Ops.push_back(Src);
return true;
}
}
return false;
}
/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask) {
int MaskWidth = Mask.size();
SmallVector<SDValue, 16> UsedInputs;
for (int i = 0, e = Inputs.size(); i < e; ++i) {
int lo = UsedInputs.size() * MaskWidth;
int hi = lo + MaskWidth;
if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
UsedInputs.push_back(Inputs[i]);
continue;
}
for (int &M : Mask)
if (lo <= M)
M -= MaskWidth;
}
Inputs = UsedInputs;
}
/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
/// remaining input indices in case we now have a unary shuffle and adjust the
/// inputs accordingly.
/// Returns true if the target shuffle mask was decoded.
static bool resolveTargetShuffleInputs(SDValue Op,
SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask,
SelectionDAG &DAG) {
if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
return false;
resolveTargetShuffleInputsAndMask(Inputs, Mask);
return true;
}
/// Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
unsigned Depth) {
if (Depth == 6)
return SDValue(); // Limit search depth.
SDValue V = SDValue(N, 0);
EVT VT = V.getValueType();
unsigned Opcode = V.getOpcode();
// Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
int Elt = SV->getMaskElt(Index);
if (Elt < 0)
return DAG.getUNDEF(VT.getVectorElementType());
unsigned NumElems = VT.getVectorNumElements();
SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
: SV->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
}
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
MVT ShufVT = V.getSimpleValueType();
MVT ShufSVT = ShufVT.getVectorElementType();
int NumElems = (int)ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
SmallVector<SDValue, 16> ShuffleOps;
bool IsUnary;
if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary))
return SDValue();
int Elt = ShuffleMask[Index];
if (Elt == SM_SentinelZero)
return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(N), ShufSVT)
: DAG.getConstantFP(+0.0, SDLoc(N), ShufSVT);
if (Elt == SM_SentinelUndef)
return DAG.getUNDEF(ShufSVT);
assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range");
SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
Depth+1);
}
// Actual nodes that may contain scalar elements
if (Opcode == ISD::BITCAST) {
V = V.getOperand(0);
EVT SrcVT = V.getValueType();
unsigned NumElems = VT.getVectorNumElements();
if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
return SDValue();
}
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
return (Index == 0) ? V.getOperand(0)
: DAG.getUNDEF(VT.getVectorElementType());
if (V.getOpcode() == ISD::BUILD_VECTOR)
return V.getOperand(Index);
return SDValue();
}
/// Custom lower build_vector of v16i8.
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (NumNonZero > 8 && !Subtarget.hasSSE41())
return SDValue();
SDLoc dl(Op);
SDValue V;
bool First = true;
// SSE4.1 - use PINSRB to insert each byte directly.
if (Subtarget.hasSSE41()) {
for (unsigned i = 0; i < 16; ++i) {
bool IsNonZero = (NonZeros & (1 << i)) != 0;
if (IsNonZero) {
// If the build vector contains zeros or our first insertion is not the
// first index then insert into zero vector to break any register
// dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
if (First) {
First = false;
if (NumZero || 0 != i)
V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
else {
assert(0 == i && "Expected insertion into zero-index");
V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
V = DAG.getBitcast(MVT::v16i8, V);
continue;
}
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V,
Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
}
}
return V;
}
// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
}
if ((i & 1) != 0) {
// FIXME: Investigate extending to i32 instead of just i16.
// FIXME: Investigate combining the first 4 bytes as a i32 instead.
SDValue ThisElt, LastElt;
bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
if (LastIsNonZero) {
LastElt =
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
DAG.getConstant(8, dl, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
if (ThisElt) {
if (1 == i) {
V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
: DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
V = DAG.getBitcast(MVT::v8i16, V);
} else {
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
DAG.getIntPtrConstant(i / 2, dl));
}
}
}
}
return DAG.getBitcast(MVT::v16i8, V);
}
/// Custom lower build_vector of v8i16.
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (NumNonZero > 4 && !Subtarget.hasSSE41())
return SDValue();
SDLoc dl(Op);
SDValue V;
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
bool IsNonZero = (NonZeros & (1 << i)) != 0;
if (IsNonZero) {
// If the build vector contains zeros or our first insertion is not the
// first index then insert into zero vector to break any register
// dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
if (First) {
First = false;
if (NumZero || 0 != i)
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else {
assert(0 == i && "Expected insertion into zero-index");
V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
V = DAG.getBitcast(MVT::v8i16, V);
continue;
}
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V,
Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
}
}
return V;
}
/// Custom lower build_vector of v4i32 or v4f32.
static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// Find all zeroable elements.
std::bitset<4> Zeroable;
for (int i=0; i < 4; ++i) {
SDValue Elt = Op->getOperand(i);
Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt));
}
assert(Zeroable.size() - Zeroable.count() > 1 &&
"We expect at least two non-zero elements!");
// We only know how to deal with build_vector nodes where elements are either
// zeroable or extract_vector_elt with constant index.
SDValue FirstNonZero;
unsigned FirstNonZeroIdx;
for (unsigned i=0; i < 4; ++i) {
if (Zeroable[i])
continue;
SDValue Elt = Op->getOperand(i);
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Elt.getOperand(1)))
return SDValue();
// Make sure that this node is extracting from a 128-bit vector.
MVT VT = Elt.getOperand(0).getSimpleValueType();
if (!VT.is128BitVector())
return SDValue();
if (!FirstNonZero.getNode()) {
FirstNonZero = Elt;
FirstNonZeroIdx = i;
}
}
assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
SDValue V1 = FirstNonZero.getOperand(0);
MVT VT = V1.getSimpleValueType();
// See if this build_vector can be lowered as a blend with zero.
SDValue Elt;
unsigned EltMaskIdx, EltIdx;
int Mask[4];
for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
if (Zeroable[EltIdx]) {
// The zero vector will be on the right hand side.
Mask[EltIdx] = EltIdx+4;
continue;
}
Elt = Op->getOperand(EltIdx);
// By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
EltMaskIdx = Elt.getConstantOperandVal(1);
if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
break;
Mask[EltIdx] = EltIdx;
}
if (EltIdx == 4) {
// Let the shuffle legalizer deal with blend operations.
SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
if (V1.getSimpleValueType() != VT)
V1 = DAG.getBitcast(VT, V1);
return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask);
}
// See if we can lower this build_vector to a INSERTPS.
if (!Subtarget.hasSSE41())
return SDValue();
SDValue V2 = Elt.getOperand(0);
if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
V1 = SDValue();
bool CanFold = true;
for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
if (Zeroable[i])
continue;
SDValue Current = Op->getOperand(i);
SDValue SrcVector = Current->getOperand(0);
if (!V1.getNode())
V1 = SrcVector;
CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i);
}
if (!CanFold)
return SDValue();
assert(V1.getNode() && "Expected at least two non-zero elements!");
if (V1.getSimpleValueType() != MVT::v4f32)
V1 = DAG.getBitcast(MVT::v4f32, V1);
if (V2.getSimpleValueType() != MVT::v4f32)
V2 = DAG.getBitcast(MVT::v4f32, V2);
// Ok, we can emit an INSERTPS instruction.
unsigned ZMask = Zeroable.to_ulong();
unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
SDLoc DL(Op);
SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
DAG.getIntPtrConstant(InsertPSMask, DL));
return DAG.getBitcast(VT, Result);
}
/// Return a vector logical shift node.
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
SelectionDAG &DAG, const TargetLowering &TLI,
const SDLoc &dl) {
assert(VT.is128BitVector() && "Unknown type for VShift");
MVT ShVT = MVT::v16i8;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getBitcast(ShVT, SrcOp);
MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
assert(NumBits % 8 == 0 && "Only support byte sized shifts");
SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
}
static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
SelectionDAG &DAG) {
// Check if the scalar load can be widened into a vector load. And if
// the address is "base + cst" see if the cst can be "absorbed" into
// the shuffle mask.
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
SDValue Ptr = LD->getBasePtr();
if (!ISD::isNormalLoad(LD) || LD->isVolatile())
return SDValue();
EVT PVT = LD->getValueType(0);
if (PVT != MVT::i32 && PVT != MVT::f32)
return SDValue();
int FI = -1;
int64_t Offset = 0;
if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
FI = FINode->getIndex();
Offset = 0;
} else if (DAG.isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
Offset = Ptr.getConstantOperandVal(1);
Ptr = Ptr.getOperand(0);
} else {
return SDValue();
}
// FIXME: 256-bit vector instructions don't require a strict alignment,
// improve this code to support it better.
unsigned RequiredAlign = VT.getSizeInBits()/8;
SDValue Chain = LD->getChain();
// Make sure the stack object alignment is at least 16 or 32.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
if (MFI.isFixedObjectIndex(FI)) {
// Can't change the alignment. FIXME: It's possible to compute
// the exact stack offset and reference FI + adjust offset instead.
// If someone *really* cares about this. That's the way to implement it.
return SDValue();
} else {
MFI.setObjectAlignment(FI, RequiredAlign);
}
}
// (Offset % 16 or 32) must be multiple of 4. Then address is then
// Ptr + (Offset & ~15).
if (Offset < 0)
return SDValue();
if ((Offset % RequiredAlign) & 3)
return SDValue();
int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
if (StartOffset) {
SDLoc DL(Ptr);
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
}
int EltNo = (Offset - StartOffset) >> 2;
unsigned NumElems = VT.getVectorNumElements();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(StartOffset));
SmallVector<int, 8> Mask(NumElems, EltNo);
return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask);
}
return SDValue();
}
/// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
/// elements can be replaced by a single large load which has the same value as
/// a build_vector or insert_subvector whose loaded operands are 'Elts'.
///
/// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
const SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
bool isAfterLegalize) {
unsigned NumElems = Elts.size();
int LastLoadedElt = -1;
SmallBitVector LoadMask(NumElems, false);
SmallBitVector ZeroMask(NumElems, false);
SmallBitVector UndefMask(NumElems, false);
// For each element in the initializer, see if we've found a load, zero or an
// undef.
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = peekThroughBitcasts(Elts[i]);
if (!Elt.getNode())
return SDValue();
if (Elt.isUndef())
UndefMask[i] = true;
else if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode()))
ZeroMask[i] = true;
else if (ISD::isNON_EXTLoad(Elt.getNode())) {
LoadMask[i] = true;
LastLoadedElt = i;
// Each loaded element must be the correct fractional portion of the
// requested vector load.
if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
return SDValue();
} else
return SDValue();
}
assert((ZeroMask | UndefMask | LoadMask).count() == NumElems &&
"Incomplete element masks");
// Handle Special Cases - all undef or undef/zero.
if (UndefMask.count() == NumElems)
return DAG.getUNDEF(VT);
// FIXME: Should we return this as a BUILD_VECTOR instead?
if ((ZeroMask | UndefMask).count() == NumElems)
return VT.isInteger() ? DAG.getConstant(0, DL, VT)
: DAG.getConstantFP(0.0, DL, VT);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
int FirstLoadedElt = LoadMask.find_first();
SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
LoadSDNode *LDBase = cast<LoadSDNode>(EltBase);
EVT LDBaseVT = EltBase.getValueType();
// Consecutive loads can contain UNDEFS but not ZERO elements.
// Consecutive loads with UNDEFs and ZEROs elements require a
// an additional shuffle stage to clear the ZERO elements.
bool IsConsecutiveLoad = true;
bool IsConsecutiveLoadWithZeros = true;
for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
if (LoadMask[i]) {
SDValue Elt = peekThroughBitcasts(Elts[i]);
LoadSDNode *LD = cast<LoadSDNode>(Elt);
if (!DAG.areNonVolatileConsecutiveLoads(
LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8,
i - FirstLoadedElt)) {
IsConsecutiveLoad = false;
IsConsecutiveLoadWithZeros = false;
break;
}
} else if (ZeroMask[i]) {
IsConsecutiveLoad = false;
}
}
auto CreateLoad = [&DAG, &DL](EVT VT, LoadSDNode *LDBase) {
auto MMOFlags = LDBase->getMemOperand()->getFlags();
assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&
"Cannot merge volatile loads.");
SDValue NewLd =
DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
DAG.makeEquivalentMemoryOrdering(LDBase, NewLd);
return NewLd;
};
// LOAD - all consecutive load/undefs (must start/end with a load).
// If we have found an entire vector of loads and undefs, then return a large
// load of the entire vector width starting at the base pointer.
// If the vector contains zeros, then attempt to shuffle those elements.
if (FirstLoadedElt == 0 && LastLoadedElt == (int)(NumElems - 1) &&
(IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
assert(LDBase && "Did not find base load for merging consecutive loads");
EVT EltVT = LDBase->getValueType(0);
// Ensure that the input vector size for the merged loads matches the
// cumulative size of the input elements.
if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
return SDValue();
if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
return SDValue();
// Don't create 256-bit non-temporal aligned loads without AVX2 as these
// will lower to regular temporal loads and use the cache.
if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 &&
VT.is256BitVector() && !Subtarget.hasInt256())
return SDValue();
if (IsConsecutiveLoad)
return CreateLoad(VT, LDBase);
// IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
// vector and a zero vector to clear out the zero elements.
if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
SmallVector<int, 4> ClearMask(NumElems, -1);
for (unsigned i = 0; i < NumElems; ++i) {
if (ZeroMask[i])
ClearMask[i] = i + NumElems;
else if (LoadMask[i])
ClearMask[i] = i;
}
SDValue V = CreateLoad(VT, LDBase);
SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
: DAG.getConstantFP(0.0, DL, VT);
return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
}
}
int LoadSize =
(1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
// VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
(LoadSize == 32 || LoadSize == 64) &&
((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
: MVT::getIntegerVT(LoadSize);
MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
if (TLI.isTypeLegal(VecVT)) {
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
SDValue ResNode =
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT,
LDBase->getPointerInfo(),
LDBase->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
false/*WriteMem*/);
DAG.makeEquivalentMemoryOrdering(LDBase, ResNode);
return DAG.getBitcast(VT, ResNode);
}
}
return SDValue();
}
static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
unsigned SplatBitSize, LLVMContext &C) {
unsigned ScalarSize = VT.getScalarSizeInBits();
unsigned NumElm = SplatBitSize / ScalarSize;
SmallVector<Constant *, 32> ConstantVec;
for (unsigned i = 0; i < NumElm; i++) {
APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
Constant *Const;
if (VT.isFloatingPoint()) {
if (ScalarSize == 32) {
Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
} else {
assert(ScalarSize == 64 && "Unsupported floating point scalar size");
Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
}
} else
Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
ConstantVec.push_back(Const);
}
return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
}
static bool isUseOfShuffle(SDNode *N) {
for (auto *U : N->uses()) {
if (isTargetShuffle(U->getOpcode()))
return true;
if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
return isUseOfShuffle(U);
}
return false;
}
/// Attempt to use the vbroadcast instruction to generate a splat value
/// from a splat BUILD_VECTOR which uses:
/// a. A single scalar load, or a constant.
/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
///
/// The VBROADCAST node is returned when a pattern is found,
/// or SDValue() otherwise.
static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// VBROADCAST requires AVX.
// TODO: Splats could be generated for non-AVX CPUs using SSE
// instructions, but there's less potential gain for only 128-bit vectors.
if (!Subtarget.hasAVX())
return SDValue();
MVT VT = BVOp->getSimpleValueType(0);
SDLoc dl(BVOp);
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Unsupported vector type for broadcast.");
BitVector UndefElements;
SDValue Ld = BVOp->getSplatValue(&UndefElements);
// We need a splat of a single value to use broadcast, and it doesn't
// make any sense if the value is only in one element of the vector.
if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
APInt SplatValue, Undef;
unsigned SplatBitSize;
bool HasUndef;
// Check if this is a repeated constant pattern suitable for broadcasting.
if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
SplatBitSize > VT.getScalarSizeInBits() &&
SplatBitSize < VT.getSizeInBits()) {
// Avoid replacing with broadcast when it's a use of a shuffle
// instruction to preserve the present custom lowering of shuffles.
if (isUseOfShuffle(BVOp) || BVOp->hasOneUse())
return SDValue();
// replace BUILD_VECTOR with broadcast of the repeated constants.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
LLVMContext *Ctx = DAG.getContext();
MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
if (Subtarget.hasAVX()) {
if (SplatBitSize <= 64 && Subtarget.hasAVX2() &&
!(SplatBitSize == 64 && Subtarget.is32Bit())) {
// Splatted value can fit in one INTEGER constant in constant pool.
// Load the constant and broadcast it.
MVT CVT = MVT::getIntegerVT(SplatBitSize);
Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize);
Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
SDValue CP = DAG.getConstantPool(C, PVT);
unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
Ld = DAG.getLoad(
CVT, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
Alignment);
SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
MVT::getVectorVT(CVT, Repeat), Ld);
return DAG.getBitcast(VT, Brdcst);
} else if (SplatBitSize == 32 || SplatBitSize == 64) {
// Splatted value can fit in one FLOAT constant in constant pool.
// Load the constant and broadcast it.
// AVX have support for 32 and 64 bit broadcast for floats only.
// No 64bit integer in 32bit subtarget.
MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
// Lower the splat via APFloat directly, to avoid any conversion.
Constant *C =
SplatBitSize == 32
? ConstantFP::get(*Ctx,
APFloat(APFloat::IEEEsingle(), SplatValue))
: ConstantFP::get(*Ctx,
APFloat(APFloat::IEEEdouble(), SplatValue));
SDValue CP = DAG.getConstantPool(C, PVT);
unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
Ld = DAG.getLoad(
CVT, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
Alignment);
SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
MVT::getVectorVT(CVT, Repeat), Ld);
return DAG.getBitcast(VT, Brdcst);
} else if (SplatBitSize > 64) {
// Load the vector of constants and broadcast it.
MVT CVT = VT.getScalarType();
Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
*Ctx);
SDValue VCP = DAG.getConstantPool(VecC, PVT);
unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
unsigned Alignment = cast<ConstantPoolSDNode>(VCP)->getAlignment();
Ld = DAG.getLoad(
MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
Alignment);
SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld);
return DAG.getBitcast(VT, Brdcst);
}
}
}
return SDValue();
}
bool ConstSplatVal =
(Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP);
// Make sure that all of the users of a non-constant load are from the
// BUILD_VECTOR node.
if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
return SDValue();
unsigned ScalarSize = Ld.getValueSizeInBits();
bool IsGE256 = (VT.getSizeInBits() >= 256);
// When optimizing for size, generate up to 5 extra bytes for a broadcast
// instruction to save 8 or more bytes of constant pool data.
// TODO: If multiple splats are generated to load the same constant,
// it may be detrimental to overall size. There needs to be a way to detect
// that condition to know if this is truly a size win.
bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
// Handle broadcasting a single constant scalar from the constant pool
// into a vector.
// On Sandybridge (no AVX2), it is still better to load a constant vector
// from the constant pool and not to broadcast it from a scalar.
// But override that restriction when optimizing for size.
// TODO: Check if splatting is recommended for other AVX-capable CPUs.
if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) {
EVT CVT = Ld.getValueType();
assert(!CVT.isVector() && "Must not broadcast a vector type");
// Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
// For size optimization, also splat v2f64 and v2i64, and for size opt
// with AVX2, also splat i8 and i16.
// With pattern matching, the VBROADCAST node may become a VMOVDDUP.
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
(OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
const Constant *C = nullptr;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
C = CI->getConstantIntValue();
else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
C = CF->getConstantFPValue();
assert(C && "Invalid constant type");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CP =
DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
Ld = DAG.getLoad(
CVT, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
Alignment);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
}
bool IsLoad = ISD::isNormalLoad(Ld.getNode());
// Handle AVX2 in-register broadcasts.
if (!IsLoad && Subtarget.hasInt256() &&
(ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The scalar source must be a normal load.
if (!IsLoad)
return SDValue();
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
(Subtarget.hasVLX() && ScalarSize == 64))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
// double since there is no vbroadcastsd xmm
if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) {
if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
// Unsupported broadcast.
return SDValue();
}
/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
/// underlying vector and index.
///
/// Modifies \p ExtractedFromVec to the real vector and returns the real
/// index.
static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
SDValue ExtIdx) {
int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
return Idx;
// For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
// lowered this:
// (extract_vector_elt (v8f32 %vreg1), Constant<6>)
// to:
// (extract_vector_elt (vector_shuffle<2,u,u,u>
// (extract_subvector (v8f32 %vreg0), Constant<4>),
// undef)
// Constant<0>)
// In this case the vector is the extract_subvector expression and the index
// is 2, as specified by the shuffle.
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
SDValue ShuffleVec = SVOp->getOperand(0);
MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
assert(ShuffleVecVT.getVectorElementType() ==
ExtractedFromVec.getSimpleValueType().getVectorElementType());
int ShuffleIdx = SVOp->getMaskElt(Idx);
if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
ExtractedFromVec = ShuffleVec;
return ShuffleIdx;
}
return Idx;
}
static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
// Skip if insert_vec_elt is not supported.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
return SDValue();
SDLoc DL(Op);
unsigned NumElems = Op.getNumOperands();
SDValue VecIn1;
SDValue VecIn2;
SmallVector<unsigned, 4> InsertIndices;
SmallVector<int, 8> Mask(NumElems, -1);
for (unsigned i = 0; i != NumElems; ++i) {
unsigned Opc = Op.getOperand(i).getOpcode();
if (Opc == ISD::UNDEF)
continue;
if (Opc != ISD::EXTRACT_VECTOR_ELT) {
// Quit if more than 1 elements need inserting.
if (InsertIndices.size() > 1)
return SDValue();
InsertIndices.push_back(i);
continue;
}
SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
SDValue ExtIdx = Op.getOperand(i).getOperand(1);
// Quit if non-constant index.
if (!isa<ConstantSDNode>(ExtIdx))
return SDValue();
int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
// Quit if extracted from vector of different type.
if (ExtractedFromVec.getValueType() != VT)
return SDValue();
if (!VecIn1.getNode())
VecIn1 = ExtractedFromVec;
else if (VecIn1 != ExtractedFromVec) {
if (!VecIn2.getNode())
VecIn2 = ExtractedFromVec;
else if (VecIn2 != ExtractedFromVec)
// Quit if more than 2 vectors to shuffle
return SDValue();
}
if (ExtractedFromVec == VecIn1)
Mask[i] = Idx;
else if (ExtractedFromVec == VecIn2)
Mask[i] = Idx + NumElems;
}
if (!VecIn1.getNode())
return SDValue();
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);
for (unsigned Idx : InsertIndices)
NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
DAG.getIntPtrConstant(Idx, DL));
return NV;
}
static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector");
uint64_t Immediate = 0;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (!In.isUndef())
Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
}
SDLoc dl(Op);
MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8));
return DAG.getConstant(Immediate, dl, VT);
}
// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
SDValue
X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
assert((VT.getVectorElementType() == MVT::i1) &&
"Unexpected type in LowerBUILD_VECTORvXi1!");
SDLoc dl(Op);
if (ISD::isBuildVectorAllZeros(Op.getNode()))
return DAG.getTargetConstant(0, dl, VT);
if (ISD::isBuildVectorAllOnes(Op.getNode()))
return DAG.getTargetConstant(1, dl, VT);
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
return DAG.getBitcast(VT, Imm);
SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
DAG.getIntPtrConstant(0, dl));
}
// Vector has one or more non-const elements
uint64_t Immediate = 0;
SmallVector<unsigned, 16> NonConstIdx;
bool IsSplat = true;
bool HasConstElts = false;
int SplatIdx = -1;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (In.isUndef())
continue;
if (!isa<ConstantSDNode>(In))
NonConstIdx.push_back(idx);
else {
Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
HasConstElts = true;
}
if (SplatIdx < 0)
SplatIdx = idx;
else if (In != Op.getOperand(SplatIdx))
IsSplat = false;
}
// for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
if (IsSplat)
return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx),
DAG.getConstant(1, dl, VT),
DAG.getConstant(0, dl, VT));
// insert elements one by one
SDValue DstVec;
SDValue Imm;
if (Immediate) {
MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
Imm = DAG.getConstant(Immediate, dl, ImmVT);
}
else if (HasConstElts)
Imm = DAG.getConstant(0, dl, VT);
else
Imm = DAG.getUNDEF(VT);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
DstVec = DAG.getBitcast(VT, Imm);
else {
SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
DAG.getIntPtrConstant(0, dl));
}
for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) {
unsigned InsertIdx = NonConstIdx[i];
DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
Op.getOperand(InsertIdx),
DAG.getIntPtrConstant(InsertIdx, dl));
}
return DstVec;
}
/// \brief Return true if \p N implements a horizontal binop and return the
/// operands for the horizontal binop into V0 and V1.
///
/// This is a helper function of LowerToHorizontalOp().
/// This function checks that the build_vector \p N in input implements a
/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
/// operation to match.
/// For example, if \p Opcode is equal to ISD::ADD, then this function
/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
/// is equal to ISD::SUB, then this function checks if this is a horizontal
/// arithmetic sub.
///
/// This function only analyzes elements of \p N whose indices are
/// in range [BaseIdx, LastIdx).
static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
SelectionDAG &DAG,
unsigned BaseIdx, unsigned LastIdx,
SDValue &V0, SDValue &V1) {
EVT VT = N->getValueType(0);
assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
"Invalid Vector in input!");
bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
bool CanFold = true;
unsigned ExpectedVExtractIdx = BaseIdx;
unsigned NumElts = LastIdx - BaseIdx;
V0 = DAG.getUNDEF(VT);
V1 = DAG.getUNDEF(VT);
// Check if N implements a horizontal binop.
for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
SDValue Op = N->getOperand(i + BaseIdx);
// Skip UNDEFs.
if (Op->isUndef()) {
// Update the expected vector extract index.
if (i * 2 == NumElts)
ExpectedVExtractIdx = BaseIdx;
ExpectedVExtractIdx += 2;
continue;
}
CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
if (!CanFold)
break;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// Try to match the following pattern:
// (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0) == Op1.getOperand(0) &&
isa<ConstantSDNode>(Op0.getOperand(1)) &&
isa<ConstantSDNode>(Op1.getOperand(1)));
if (!CanFold)
break;
unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
if (i * 2 < NumElts) {
if (V0.isUndef()) {
V0 = Op0.getOperand(0);
if (V0.getValueType() != VT)
return false;
}
} else {
if (V1.isUndef()) {
V1 = Op0.getOperand(0);
if (V1.getValueType() != VT)
return false;
}
if (i * 2 == NumElts)
ExpectedVExtractIdx = BaseIdx;
}
SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
if (I0 == ExpectedVExtractIdx)
CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
else if (IsCommutable && I1 == ExpectedVExtractIdx) {
// Try to match the following dag sequence:
// (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
} else
CanFold = false;
ExpectedVExtractIdx += 2;
}
return CanFold;
}
/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
/// a concat_vector.
///
/// This is a helper function of LowerToHorizontalOp().
/// This function expects two 256-bit vectors called V0 and V1.
/// At first, each vector is split into two separate 128-bit vectors.
/// Then, the resulting 128-bit vectors are used to implement two
/// horizontal binary operations.
///
/// The kind of horizontal binary operation is defined by \p X86Opcode.
///
/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
/// the two new horizontal binop.
/// When Mode is set, the first horizontal binop dag node would take as input
/// the lower 128-bit of V0 and the upper 128-bit of V0. The second
/// horizontal binop dag node would take as input the lower 128-bit of V1
/// and the upper 128-bit of V1.
/// Example:
/// HADD V0_LO, V0_HI
/// HADD V1_LO, V1_HI
///
/// Otherwise, the first horizontal binop dag node takes as input the lower
/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
/// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
/// Example:
/// HADD V0_LO, V1_LO
/// HADD V0_HI, V1_HI
///
/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
/// the upper 128-bits of the result.
static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
const SDLoc &DL, SelectionDAG &DAG,
unsigned X86Opcode, bool Mode,
bool isUndefLO, bool isUndefHI) {
MVT VT = V0.getSimpleValueType();
assert(VT.is256BitVector() && VT == V1.getSimpleValueType() &&
"Invalid nodes in input!");
unsigned NumElts = VT.getVectorNumElements();
SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
MVT NewVT = V0_LO.getSimpleValueType();
SDValue LO = DAG.getUNDEF(NewVT);
SDValue HI = DAG.getUNDEF(NewVT);
if (Mode) {
// Don't emit a horizontal binop if the result is expected to be UNDEF.
if (!isUndefLO && !V0->isUndef())
LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
if (!isUndefHI && !V1->isUndef())
HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
} else {
// Don't emit a horizontal binop if the result is expected to be UNDEF.
if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef()))
LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef()))
HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
}
/// Returns true iff \p BV builds a vector with the result equivalent to
/// the result of ADDSUB operation.
/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation
/// are written to the parameters \p Opnd0 and \p Opnd1.
static bool isAddSub(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
SDValue &Opnd0, SDValue &Opnd1) {
MVT VT = BV->getSimpleValueType(0);
if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
(!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
(!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
return false;
unsigned NumElts = VT.getVectorNumElements();
SDValue InVec0 = DAG.getUNDEF(VT);
SDValue InVec1 = DAG.getUNDEF(VT);
// Odd-numbered elements in the input build vector are obtained from
// adding two integer/float elements.
// Even-numbered elements in the input build vector are obtained from
// subtracting two integer/float elements.
unsigned ExpectedOpcode = ISD::FSUB;
unsigned NextExpectedOpcode = ISD::FADD;
bool AddFound = false;
bool SubFound = false;
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Op = BV->getOperand(i);
// Skip 'undef' values.
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::UNDEF) {
std::swap(ExpectedOpcode, NextExpectedOpcode);
continue;
}
// Early exit if we found an unexpected opcode.
if (Opcode != ExpectedOpcode)
return false;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// Try to match the following pattern:
// (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
// Early exit if we cannot match that sequence.
if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Op0.getOperand(1)) ||
!isa<ConstantSDNode>(Op1.getOperand(1)) ||
Op0.getOperand(1) != Op1.getOperand(1))
return false;
unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
if (I0 != i)
return false;
// We found a valid add/sub node. Update the information accordingly.
if (i & 1)
AddFound = true;
else
SubFound = true;
// Update InVec0 and InVec1.
if (InVec0.isUndef()) {
InVec0 = Op0.getOperand(0);
if (InVec0.getSimpleValueType() != VT)
return false;
}
if (InVec1.isUndef()) {
InVec1 = Op1.getOperand(0);
if (InVec1.getSimpleValueType() != VT)
return false;
}
// Make sure that operands in input to each add/sub node always
// come from a same pair of vectors.
if (InVec0 != Op0.getOperand(0)) {
if (ExpectedOpcode == ISD::FSUB)
return false;
// FADD is commutable. Try to commute the operands
// and then test again.
std::swap(Op0, Op1);
if (InVec0 != Op0.getOperand(0))
return false;
}
if (InVec1 != Op1.getOperand(0))
return false;
// Update the pair of expected opcodes.
std::swap(ExpectedOpcode, NextExpectedOpcode);
}
// Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
return false;
Opnd0 = InVec0;
Opnd1 = InVec1;
return true;
}
/// Returns true if is possible to fold MUL and an idiom that has already been
/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).
/// If (and only if) true is returned, the operands of FMADDSUB are written to
/// parameters \p Opnd0, \p Opnd1, \p Opnd2.
///
/// Prior to calling this function it should be known that there is some
/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called
/// before replacement of such SDNode with ADDSUB operation. Thus the number
/// of \p Opnd0 uses is expected to be equal to 2.
/// For example, this function may be called for the following IR:
/// %AB = fmul fast <2 x double> %A, %B
/// %Sub = fsub fast <2 x double> %AB, %C
/// %Add = fadd fast <2 x double> %AB, %C
/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add,
/// <2 x i32> <i32 0, i32 3>
/// There is a def for %Addsub here, which potentially can be replaced by
/// X86ISD::ADDSUB operation:
/// %Addsub = X86ISD::ADDSUB %AB, %C
/// and such ADDSUB can further be replaced with FMADDSUB:
/// %Addsub = FMADDSUB %A, %B, %C.
///
/// The main reason why this method is called before the replacement of the
/// recognized ADDSUB idiom with ADDSUB operation is that such replacement
/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
/// FMADDSUB is.
static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG,
SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) {
if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 ||
!Subtarget.hasAnyFMA())
return false;
// FIXME: These checks must match the similar ones in
// DAGCombiner::visitFADDForFMACombine. It would be good to have one
// function that would answer if it is Ok to fuse MUL + ADD to FMADD
// or MUL + ADDSUB to FMADDSUB.
const TargetOptions &Options = DAG.getTarget().Options;
bool AllowFusion =
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
if (!AllowFusion)
return false;
Opnd2 = Opnd1;
Opnd1 = Opnd0.getOperand(1);
Opnd0 = Opnd0.getOperand(0);
return true;
}
/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation
/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.
static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Opnd0, Opnd1;
if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1))
return SDValue();
MVT VT = BV->getSimpleValueType(0);
SDLoc DL(BV);
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
// Do not generate X86ISD::ADDSUB node for 512-bit types even though
// the ADDSUB idiom has been successfully recognized. There are no known
// X86 targets with 512-bit ADDSUB instructions!
// 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom
// recognition.
if (VT.is512BitVector())
return SDValue();
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = BV->getSimpleValueType(0);
unsigned NumElts = VT.getVectorNumElements();
unsigned NumUndefsLO = 0;
unsigned NumUndefsHI = 0;
unsigned Half = NumElts/2;
// Count the number of UNDEF operands in the build_vector in input.
for (unsigned i = 0, e = Half; i != e; ++i)
if (BV->getOperand(i)->isUndef())
NumUndefsLO++;
for (unsigned i = Half, e = NumElts; i != e; ++i)
if (BV->getOperand(i)->isUndef())
NumUndefsHI++;
// Early exit if this is either a build_vector of all UNDEFs or all the
// operands but one are UNDEF.
if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
return SDValue();
SDLoc DL(BV);
SDValue InVec0, InVec1;
if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) {
// Try to match an SSE3 float HADD/HSUB.
if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
} else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget.hasSSSE3()) {
// Try to match an SSSE3 integer HADD/HSUB.
if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
}
if (!Subtarget.hasAVX())
return SDValue();
if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
// Try to match an AVX horizontal add/sub of packed single/double
// precision floating point values from 256-bit vectors.
SDValue InVec2, InVec3;
if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
} else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
// Try to match an AVX2 horizontal add/sub of signed integers.
SDValue InVec2, InVec3;
unsigned X86Opcode;
bool CanFold = true;
if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
X86Opcode = X86ISD::HADD;
else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
X86Opcode = X86ISD::HSUB;
else
CanFold = false;
if (CanFold) {
// Fold this build_vector into a single horizontal add/sub.
// Do this only if the target has AVX2.
if (Subtarget.hasAVX2())
return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
// Do not try to expand this build_vector into a pair of horizontal
// add/sub if we can emit a pair of scalar add/sub.
if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
return SDValue();
// Convert this build_vector into a pair of horizontal binop followed by
// a concat vector.
bool isUndefLO = NumUndefsLO == Half;
bool isUndefHI = NumUndefsHI == Half;
return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
isUndefLO, isUndefHI);
}
}
if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
VT == MVT::v16i16) && Subtarget.hasAVX()) {
unsigned X86Opcode;
if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::HADD;
else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::HSUB;
else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::FHADD;
else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::FHSUB;
else
return SDValue();
// Don't try to expand this build_vector into a pair of horizontal add/sub
// if we can simply emit a pair of scalar add/sub.
if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
return SDValue();
// Convert this build_vector into two horizontal add/sub followed by
// a concat vector.
bool isUndefLO = NumUndefsLO == Half;
bool isUndefHI = NumUndefsHI == Half;
return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
isUndefLO, isUndefHI);
}
return SDValue();
}
/// If a BUILD_VECTOR's source elements all apply the same bit operation and
/// one of their operands is constant, lower to a pair of BUILD_VECTOR and
/// just apply the bit to the vectors.
/// NOTE: Its not in our interest to start make a general purpose vectorizer
/// from this, but enough scalar bit operations are created from the later
/// legalization + scalarization stages to need basic support.
static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Check that all elements have the same opcode.
// TODO: Should we allow UNDEFS and if so how many?
unsigned Opcode = Op->getOperand(0).getOpcode();
for (unsigned i = 1; i < NumElems; ++i)
if (Opcode != Op->getOperand(i).getOpcode())
return SDValue();
// TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
switch (Opcode) {
default:
return SDValue();
case ISD::AND:
case ISD::XOR:
case ISD::OR:
if (!TLI.isOperationLegalOrPromote(Opcode, VT))
return SDValue();
break;
}
SmallVector<SDValue, 4> LHSElts, RHSElts;
for (SDValue Elt : Op->ops()) {
SDValue LHS = Elt.getOperand(0);
SDValue RHS = Elt.getOperand(1);
// We expect the canonicalized RHS operand to be the constant.
if (!isa<ConstantSDNode>(RHS))
return SDValue();
LHSElts.push_back(LHS);
RHSElts.push_back(RHS);
}
SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
}
/// Create a vector constant without a load. SSE/AVX provide the bare minimum
/// functionality to do this, so it's all zeros, all ones, or some derivation
/// that is cheap to calculate.
static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
// Vectors containing all zeros can be matched by pxor and xorps.
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
// Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
// and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
return Op;
return getZeroVector(VT, Subtarget, DAG, DL);
}
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
// vpcmpeqd on 256-bit vectors.
if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
if (VT == MVT::v4i32 || VT == MVT::v16i32 ||
(VT == MVT::v8i32 && Subtarget.hasInt256()))
return Op;
return getOnesVector(VT, DAG, DL);
}
return SDValue();
}
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT ExtVT = VT.getVectorElementType();
unsigned NumElems = Op.getNumOperands();
// Generate vectors for predicate vectors.
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
return LowerBUILD_VECTORvXi1(Op, DAG);
if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
return VectorConstant;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
return AddSub;
if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
return HorizontalOp;
if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
return Broadcast;
if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG))
return BitOp;
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
uint64_t NonZeros = 0;
bool IsAllConstants = true;
SmallSet<SDValue, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = Op.getOperand(i);
if (Elt.isUndef())
continue;
Values.insert(Elt);
if (Elt.getOpcode() != ISD::Constant &&
Elt.getOpcode() != ISD::ConstantFP)
IsAllConstants = false;
if (X86::isZeroNode(Elt))
NumZero++;
else {
assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range.
NonZeros |= ((uint64_t)1 << i);
NumNonZero++;
}
}
// All undef vector. Return an UNDEF. All zero vectors were handled above.
if (NumNonZero == 0)
return DAG.getUNDEF(VT);
// Special case for single non-zero, non-undef, element.
if (NumNonZero == 1) {
unsigned Idx = countTrailingZeros(NonZeros);
SDValue Item = Op.getOperand(Idx);
// If this is an insertion of an i64 value on x86-32, and if the top bits of
// the value are obviously zero, truncate the value to i32 and do the
// insertion that way. Only do this if the value is non-constant or if the
// value is a constant being inserted into element 0. It is cheaper to do
// a constant pool load than it is to do a movd + shuffle.
if (ExtVT == MVT::i64 && !Subtarget.is64Bit() &&
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getHighBitsSet(64, 32))) {
// Handle SSE only.
assert(VT == MVT::v2i64 && "Expected an SSE value type!");
MVT VecVT = MVT::v4i32;
// Truncate the value (which may itself be a constant) to i32, and
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
return DAG.getBitcast(VT, getShuffleVectorZeroOrUndef(
Item, Idx * 2, true, Subtarget, DAG));
}
}
// If we have a constant or non-constant insertion into the low element of
// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
// the rest of the elements. This will be matched as movd/movq/movss/movsd
// depending on what the source datatype is.
if (Idx == 0) {
if (NumZero == 0)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget.is64Bit())) {
assert((VT.is128BitVector() || VT.is256BitVector() ||
VT.is512BitVector()) &&
"Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
// We can't directly insert an i8 or i16 into a vector, so zero extend
// it to i32 first.
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
if (VT.getSizeInBits() >= 256) {
MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
if (Subtarget.hasAVX()) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
} else {
// Without AVX, we need to extend to a 128-bit vector and then
// insert into the 256-bit vector.
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
SDValue ZeroVec = getZeroVector(ShufVT, Subtarget, DAG, dl);
Item = insert128BitVector(ZeroVec, Item, 0, DAG, dl);
}
} else {
assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getBitcast(VT, Item);
}
}
// Is it a vector logical left shift?
if (NumElems == 2 && Idx == 1 &&
X86::isZeroNode(Op.getOperand(0)) &&
!X86::isZeroNode(Op.getOperand(1))) {
unsigned NumBits = VT.getSizeInBits();
return getVShift(true, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
VT, Op.getOperand(1)),
NumBits/2, DAG, *this, dl);
}
if (IsAllConstants) // Otherwise, it's better to do a constpool load.
return SDValue();
// Otherwise, if this is a vector with i32 or f32 elements, and the element
// is a non-constant being inserted into an element other than the low one,
// we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
// movd/movss) to move this into the low element, then shuffle it into
// place.
if (EVTBits == 32) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
}
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1) {
if (EVTBits == 32) {
// Instead of a shuffle like this:
// shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
// Check if it's possible to issue this instead.
// shuffle (vload ptr)), undef, <1, 1, 1, 1>
unsigned Idx = countTrailingZeros(NonZeros);
SDValue Item = Op.getOperand(Idx);
if (Op.getNode()->isOnlyUserOf(Item.getNode()))
return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
}
return SDValue();
}
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
if (IsAllConstants)
return SDValue();
// See if we can use a vector load to get all of the elements.
if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) {
SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
if (SDValue LD =
EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false))
return LD;
}
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
if (VT.is256BitVector() || VT.is512BitVector()) {
SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
// Build both the lower and upper subvector.
SDValue Lower =
DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElems / 2));
SDValue Upper = DAG.getBuildVector(
HVT, dl, makeArrayRef(&Ops[NumElems / 2], NumElems / 2));
// Recreate the wider vector with the lower and upper part.
if (VT.is256BitVector())
return concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
return concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
}
// Let legalizer expand 2-wide build_vectors.
if (EVTBits == 64) {
if (NumNonZero == 1) {
// One half is zero or undef.
unsigned Idx = countTrailingZeros(NonZeros);
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
}
return SDValue();
}
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16)
if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero,
DAG, Subtarget))
return V;
if (EVTBits == 16 && NumElems == 8)
if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero,
DAG, Subtarget))
return V;
// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
if (EVTBits == 32 && NumElems == 4)
if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget))
return V;
// If element VT is == 32 bits, turn it into a number of shuffles.
if (NumElems == 4 && NumZero > 0) {
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1ULL << i));
if (isZero)
Ops[i] = getZeroVector(VT, Subtarget, DAG, dl);
else
Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
}
for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
default: break;
case 0:
Ops[i] = Ops[i*2]; // Must be a zero vector.
break;
case 1:
Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]);
break;
case 2:
Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
break;
case 3:
Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
break;
}
}
bool Reverse1 = (NonZeros & 0x3) == 2;
bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
int MaskVec[] = {
Reverse1 ? 1 : 0,
Reverse1 ? 0 : 1,
static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
static_cast<int>(Reverse2 ? NumElems : NumElems+1)
};
return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec);
}
if (Values.size() > 1 && VT.is128BitVector()) {
// Check for a build vector from mostly shuffle plus few inserting.
if (SDValue Sh = buildFromShuffleMostly(Op, DAG))
return Sh;
// For SSE 4.1, use insertps to put the high elements into the low element.
if (Subtarget.hasSSE41()) {
SDValue Result;
if (!Op.getOperand(0).isUndef())
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
else
Result = DAG.getUNDEF(VT);
for (unsigned i = 1; i < NumElems; ++i) {
if (Op.getOperand(i).isUndef()) continue;
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
}
return Result;
}
// Otherwise, expand into a number of unpckl*, start by extending each of
// our (non-undef) elements to the full vector width with the element in the
// bottom slot of the vector (which generates no code for SSE).
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < NumElems; ++i) {
if (!Op.getOperand(i).isUndef())
Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
else
Ops[i] = DAG.getUNDEF(VT);
}
// Next, we iteratively mix elements, e.g. for v4f32:
// Step 1: unpcklps 0, 1 ==> X: <?, ?, 1, 0>
// : unpcklps 2, 3 ==> Y: <?, ?, 3, 2>
// Step 2: unpcklpd X, Y ==> <3, 2, 1, 0>
for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) {
// Generate scaled UNPCKL shuffle mask.
SmallVector<int, 16> Mask;
for(unsigned i = 0; i != Scale; ++i)
Mask.push_back(i);
for (unsigned i = 0; i != Scale; ++i)
Mask.push_back(NumElems+i);
Mask.append(NumElems - Mask.size(), SM_SentinelUndef);
for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i)
Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask);
}
return Ops[0];
}
return SDValue();
}
// 256-bit AVX can use the vinsertf128 instruction
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT ResVT = Op.getSimpleValueType();
assert((ResVT.is256BitVector() ||
ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
unsigned NumElems = ResVT.getVectorNumElements();
if (ResVT.is256BitVector())
return concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
if (Op.getNumOperands() == 4) {
MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
ResVT.getVectorNumElements()/2);
SDValue V3 = Op.getOperand(2);
SDValue V4 = Op.getOperand(3);
return concat256BitVectors(
concat128BitVectors(V1, V2, HalfVT, NumElems / 2, DAG, dl),
concat128BitVectors(V3, V4, HalfVT, NumElems / 2, DAG, dl), ResVT,
NumElems, DAG, dl);
}
return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
// Return true if all the operands of the given CONCAT_VECTORS node are zeros
// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0)
static bool isExpandWithZeros(const SDValue &Op) {
assert(Op.getOpcode() == ISD::CONCAT_VECTORS &&
"Expand with zeros only possible in CONCAT_VECTORS nodes!");
for (unsigned i = 1; i < Op.getNumOperands(); i++)
if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode()))
return false;
return true;
}
// Returns true if the given node is a type promotion (by concatenating i1
// zeros) of the result of a node that already zeros all upper bits of
// k-register.
static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) {
unsigned Opc = Op.getOpcode();
assert(Opc == ISD::CONCAT_VECTORS &&
Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
"Unexpected node to check for type promotion!");
// As long as we are concatenating zeros to the upper part of a previous node
// result, climb up the tree until a node with different opcode is
// encountered
while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) {
if (Opc == ISD::INSERT_SUBVECTOR) {
if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) &&
Op.getConstantOperandVal(2) == 0)
Op = Op.getOperand(1);
else
return SDValue();
} else { // Opc == ISD::CONCAT_VECTORS
if (isExpandWithZeros(Op))
Op = Op.getOperand(0);
else
return SDValue();
}
Opc = Op.getOpcode();
}
// Check if the first inserted node zeroes the upper bits, or an 'and' result
// of a node that zeros the upper bits (its masked version).
if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) ||
(Op.getOpcode() == ISD::AND &&
(isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) ||
isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) {
return Op;
}
return SDValue();
}
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
const X86Subtarget &Subtarget,
SelectionDAG & DAG) {
SDLoc dl(Op);
MVT ResVT = Op.getSimpleValueType();
unsigned NumOfOperands = Op.getNumOperands();
assert(isPowerOf2_32(NumOfOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
// If this node promotes - by concatenating zeroes - the type of the result
// of a node with instruction that zeroes all upper (irrelevant) bits of the
// output register, mark it as legal and catch the pattern in instruction
// selection to avoid emitting extra insturctions (for zeroing upper bits).
if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) {
SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64);
SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted,
ZeroC);
}
SDValue Undef = DAG.getUNDEF(ResVT);
if (NumOfOperands > 2) {
// Specialize the cases when all, or all but one, of the operands are undef.
unsigned NumOfDefinedOps = 0;
unsigned OpIdx = 0;
for (unsigned i = 0; i < NumOfOperands; i++)
if (!Op.getOperand(i).isUndef()) {
NumOfDefinedOps++;
OpIdx = i;
}
if (NumOfDefinedOps == 0)
return Undef;
if (NumOfDefinedOps == 1) {
unsigned SubVecNumElts =
Op.getOperand(OpIdx).getValueType().getVectorNumElements();
SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef,
Op.getOperand(OpIdx), IdxVal);
}
MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
ResVT.getVectorNumElements()/2);
SmallVector<SDValue, 2> Ops;
for (unsigned i = 0; i < NumOfOperands/2; i++)
Ops.push_back(Op.getOperand(i));
SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
Ops.clear();
for (unsigned i = NumOfOperands/2; i < NumOfOperands; i++)
Ops.push_back(Op.getOperand(i));
SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
// 2 operands
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
unsigned NumElems = ResVT.getVectorNumElements();
assert(V1.getValueType() == V2.getValueType() &&
V1.getValueType().getVectorNumElements() == NumElems/2 &&
"Unexpected operands in CONCAT_VECTORS");
if (ResVT.getSizeInBits() >= 16)
return Op; // The operation is legal with KUNPCK
bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode());
bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode());
SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl);
if (IsZeroV1 && IsZeroV2)
return ZeroVec;
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
if (V2.isUndef())
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
if (IsZeroV2)
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx);
SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl);
if (V1.isUndef())
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal);
if (IsZeroV1)
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal);
V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal);
}
static SDValue LowerCONCAT_VECTORS(SDValue Op,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
if (VT.getVectorElementType() == MVT::i1)
return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG);
assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
(VT.is512BitVector() && (Op.getNumOperands() == 2 ||
Op.getNumOperands() == 4)));
// AVX can use the vinsertf128 instruction to create 256-bit vectors
// from two other 128-bit ones.
// 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
//===----------------------------------------------------------------------===//
// Vector shuffle lowering
//
// This is an experimental code path for lowering vector shuffles on x86. It is
// designed to handle arbitrary vector shuffles and blends, gracefully
// degrading performance as necessary. It works hard to recognize idiomatic
// shuffles and lower them to optimal instruction patterns without leaving
// a framework that allows reasonably efficient handling of all vector shuffle
// patterns.
//===----------------------------------------------------------------------===//
/// \brief Tiny helper function to identify a no-op mask.
///
/// This is a somewhat boring predicate function. It checks whether the mask
/// array input, which is assumed to be a single-input shuffle mask of the kind
/// used by the X86 shuffle instructions (not a fully general
/// ShuffleVectorSDNode mask) requires any shuffles to occur. Both undef and an
/// in-place shuffle are 'no-op's.
static bool isNoopShuffleMask(ArrayRef<int> Mask) {
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
assert(Mask[i] >= -1 && "Out of bound mask element!");
if (Mask[i] >= 0 && Mask[i] != i)
return false;
}
return true;
}
/// \brief Test whether there are elements crossing 128-bit lanes in this
/// shuffle mask.
///
/// X86 divides up its shuffles into in-lane and cross-lane shuffle operations
/// and we routinely test for these.
static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
int LaneSize = 128 / VT.getScalarSizeInBits();
int Size = Mask.size();
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
return true;
return false;
}
/// \brief Test whether a shuffle mask is equivalent within each sub-lane.
///
/// This checks a shuffle mask to see if it is performing the same
/// lane-relative shuffle in each sub-lane. This trivially implies
/// that it is also not lane-crossing. It may however involve a blend from the
/// same lane of a second vector.
///
/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
/// non-trivial to compute in the face of undef lanes. The representation is
/// suitable for use with existing 128-bit shuffles as entries from the second
/// vector have been remapped to [LaneSize, 2*LaneSize).
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
ArrayRef<int> Mask,
SmallVectorImpl<int> &RepeatedMask) {
auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
RepeatedMask.assign(LaneSize, -1);
int Size = Mask.size();
for (int i = 0; i < Size; ++i) {
assert(Mask[i] == SM_SentinelUndef || Mask[i] >= 0);
if (Mask[i] < 0)
continue;
if ((Mask[i] % Size) / LaneSize != i / LaneSize)
// This entry crosses lanes, so there is no way to model this shuffle.
return false;
// Ok, handle the in-lane shuffles by detecting if and when they repeat.
// Adjust second vector indices to start at LaneSize instead of Size.
int LocalM = Mask[i] < Size ? Mask[i] % LaneSize
: Mask[i] % LaneSize + LaneSize;
if (RepeatedMask[i % LaneSize] < 0)
// This is the first non-undef entry in this slot of a 128-bit lane.
RepeatedMask[i % LaneSize] = LocalM;
else if (RepeatedMask[i % LaneSize] != LocalM)
// Found a mismatch with the repeated mask.
return false;
}
return true;
}
/// Test whether a shuffle mask is equivalent within each 128-bit lane.
static bool
is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
SmallVectorImpl<int> &RepeatedMask) {
return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
}
/// Test whether a shuffle mask is equivalent within each 256-bit lane.
static bool
is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
SmallVectorImpl<int> &RepeatedMask) {
return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask);
}
/// Test whether a target shuffle mask is equivalent within each sub-lane.
/// Unlike isRepeatedShuffleMask we must respect SM_SentinelZero.
static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT,
ArrayRef<int> Mask,
SmallVectorImpl<int> &RepeatedMask) {
int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
RepeatedMask.assign(LaneSize, SM_SentinelUndef);
int Size = Mask.size();
for (int i = 0; i < Size; ++i) {
assert(isUndefOrZero(Mask[i]) || (Mask[i] >= 0));
if (Mask[i] == SM_SentinelUndef)
continue;
if (Mask[i] == SM_SentinelZero) {
if (!isUndefOrZero(RepeatedMask[i % LaneSize]))
return false;
RepeatedMask[i % LaneSize] = SM_SentinelZero;
continue;
}
if ((Mask[i] % Size) / LaneSize != i / LaneSize)
// This entry crosses lanes, so there is no way to model this shuffle.
return false;
// Ok, handle the in-lane shuffles by detecting if and when they repeat.
// Adjust second vector indices to start at LaneSize instead of Size.
int LocalM =
Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
if (RepeatedMask[i % LaneSize] == SM_SentinelUndef)
// This is the first non-undef entry in this slot of a 128-bit lane.
RepeatedMask[i % LaneSize] = LocalM;
else if (RepeatedMask[i % LaneSize] != LocalM)
// Found a mismatch with the repeated mask.
return false;
}
return true;
}
/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
/// arguments.
///
/// This is a fast way to test a shuffle mask against a fixed pattern:
///
/// if (isShuffleEquivalent(Mask, 3, 2, {1, 0})) { ... }
///
/// It returns true if the mask is exactly as wide as the argument list, and
/// each element of the mask is either -1 (signifying undef) or the value given
/// in the argument.
static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef<int> Mask,
ArrayRef<int> ExpectedMask) {
if (Mask.size() != ExpectedMask.size())
return false;
int Size = Mask.size();
// If the values are build vectors, we can look through them to find
// equivalent inputs that make the shuffles equivalent.
auto *BV1 = dyn_cast<BuildVectorSDNode>(V1);
auto *BV2 = dyn_cast<BuildVectorSDNode>(V2);
for (int i = 0; i < Size; ++i) {
assert(Mask[i] >= -1 && "Out of bound mask element!");
if (Mask[i] >= 0 && Mask[i] != ExpectedMask[i]) {
auto *MaskBV = Mask[i] < Size ? BV1 : BV2;
auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2;
if (!MaskBV || !ExpectedBV ||
MaskBV->getOperand(Mask[i] % Size) !=
ExpectedBV->getOperand(ExpectedMask[i] % Size))
return false;
}
}
return true;
}
/// Checks whether a target shuffle mask is equivalent to an explicit pattern.
///
/// The masks must be exactly the same width.
///
/// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding
/// value in ExpectedMask is always accepted. Otherwise the indices must match.
///
/// SM_SentinelZero is accepted as a valid negative index but must match in both.
static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
ArrayRef<int> ExpectedMask) {
int Size = Mask.size();
if (Size != (int)ExpectedMask.size())
return false;
for (int i = 0; i < Size; ++i)
if (Mask[i] == SM_SentinelUndef)
continue;
else if (Mask[i] < 0 && Mask[i] != SM_SentinelZero)
return false;
else if (Mask[i] != ExpectedMask[i])
return false;
return true;
}
// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle
// mask.
static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask,
const APInt &Zeroable) {
int NumElts = Mask.size();
assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes");
SmallVector<int, 64> TargetMask(NumElts, SM_SentinelUndef);
for (int i = 0; i != NumElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef)
continue;
assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index");
TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M);
}
return TargetMask;
}
// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
// instructions.
static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
if (VT != MVT::v8i32 && VT != MVT::v8f32)
return false;
SmallVector<int, 8> Unpcklwd;
createUnpackShuffleMask(MVT::v8i16, Unpcklwd, /* Lo = */ true,
/* Unary = */ false);
SmallVector<int, 8> Unpckhwd;
createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false,
/* Unary = */ false);
bool IsUnpackwdMask = (isTargetShuffleEquivalent(Mask, Unpcklwd) ||
isTargetShuffleEquivalent(Mask, Unpckhwd));
return IsUnpackwdMask;
}
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///
/// This helper function produces an 8-bit shuffle immediate corresponding to
/// the ubiquitous shuffle encoding scheme used in x86 instructions for
/// shuffling 4 lanes. It can be used with most of the PSHUF instructions for
/// example.
///
/// NB: We rely heavily on "undef" masks preserving the input lane.
static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");
assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");
unsigned Imm = 0;
Imm |= (Mask[0] < 0 ? 0 : Mask[0]) << 0;
Imm |= (Mask[1] < 0 ? 1 : Mask[1]) << 2;
Imm |= (Mask[2] < 0 ? 2 : Mask[2]) << 4;
Imm |= (Mask[3] < 0 ? 3 : Mask[3]) << 6;
return Imm;
}
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
SelectionDAG &DAG) {
return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
}
/// \brief Compute whether each element of a shuffle is zeroable.
///
/// A "zeroable" vector shuffle element is one which can be lowered to zero.
/// Either it is an undef element in the shuffle mask, the element of the input
/// referenced is undef, or the element of the input referenced is known to be
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
/// as many lanes with this technique as possible to simplify the remaining
/// shuffle.
static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
SDValue V1, SDValue V2) {
APInt Zeroable(Mask.size(), 0);
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
int VectorSizeInBits = V1.getValueSizeInBits();
int ScalarSizeInBits = VectorSizeInBits / Mask.size();
assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
int M = Mask[i];
// Handle the easy cases.
if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
Zeroable.setBit(i);
continue;
}
// Determine shuffle input and normalize the mask.
SDValue V = M < Size ? V1 : V2;
M %= Size;
// Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
if (V.getOpcode() != ISD::BUILD_VECTOR)
continue;
// If the BUILD_VECTOR has fewer elements then the bitcasted portion of
// the (larger) source element must be UNDEF/ZERO.
if ((Size % V.getNumOperands()) == 0) {
int Scale = Size / V->getNumOperands();
SDValue Op = V.getOperand(M / Scale);
if (Op.isUndef() || X86::isZeroNode(Op))
Zeroable.setBit(i);
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = Cst->getAPIntValue();
Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
if (Val == 0)
Zeroable.setBit(i);
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
APInt Val = Cst->getValueAPF().bitcastToAPInt();
Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
if (Val == 0)
Zeroable.setBit(i);
}
continue;
}
// If the BUILD_VECTOR has more elements then all the (smaller) source
// elements must be UNDEF or ZERO.
if ((V.getNumOperands() % Size) == 0) {
int Scale = V->getNumOperands() / Size;
bool AllZeroable = true;
for (int j = 0; j < Scale; ++j) {
SDValue Op = V.getOperand((M * Scale) + j);
AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op));
}
if (AllZeroable)
Zeroable.setBit(i);
continue;
}
}
return Zeroable;
}
// The Shuffle result is as follow:
// 0*a[0]0*a[1]...0*a[n] , n >=0 where a[] elements in a ascending order.
// Each Zeroable's element correspond to a particular Mask's element.
// As described in computeZeroableShuffleElements function.
//
// The function looks for a sub-mask that the nonzero elements are in
// increasing order. If such sub-mask exist. The function returns true.
static bool isNonZeroElementsInOrder(const APInt &Zeroable,
ArrayRef<int> Mask, const EVT &VectorType,
bool &IsZeroSideLeft) {
int NextElement = -1;
// Check if the Mask's nonzero elements are in increasing order.
for (int i = 0, e = Mask.size(); i < e; i++) {
// Checks if the mask's zeros elements are built from only zeros.
assert(Mask[i] >= -1 && "Out of bound mask element!");
if (Mask[i] < 0)
return false;
if (Zeroable[i])
continue;
// Find the lowest non zero element
if (NextElement < 0) {
NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0;
IsZeroSideLeft = NextElement != 0;
}
// Exit if the mask's non zero elements are not in increasing order.
if (NextElement != Mask[i])
return false;
NextElement++;
}
return true;
}
/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
int Size = Mask.size();
int LaneSize = 128 / VT.getScalarSizeInBits();
const int NumBytes = VT.getSizeInBits() / 8;
const int NumEltBytes = VT.getScalarSizeInBits() / 8;
assert((Subtarget.hasSSSE3() && VT.is128BitVector()) ||
(Subtarget.hasAVX2() && VT.is256BitVector()) ||
(Subtarget.hasBWI() && VT.is512BitVector()));
SmallVector<SDValue, 64> PSHUFBMask(NumBytes);
// Sign bit set in i8 mask means zero element.
SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);
SDValue V;
for (int i = 0; i < NumBytes; ++i) {
int M = Mask[i / NumEltBytes];
if (M < 0) {
PSHUFBMask[i] = DAG.getUNDEF(MVT::i8);
continue;
}
if (Zeroable[i / NumEltBytes]) {
PSHUFBMask[i] = ZeroMask;
continue;
}
// We can only use a single input of V1 or V2.
SDValue SrcV = (M >= Size ? V2 : V1);
if (V && V != SrcV)
return SDValue();
V = SrcV;
M %= Size;
// PSHUFB can't cross lanes, ensure this doesn't happen.
if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize))
return SDValue();
M = M % LaneSize;
M = M * NumEltBytes + (i % NumEltBytes);
PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8);
}
assert(V && "Failed to find a source input");
MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes);
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V),
DAG.getBuildVector(I8VT, DL, PSHUFBMask)));
}
static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
const SDLoc &dl);
// X86 has dedicated shuffle that can be lowered to VEXPAND
static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
const APInt &Zeroable,
ArrayRef<int> Mask, SDValue &V1,
SDValue &V2, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
bool IsLeftZeroSide = true;
if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
IsLeftZeroSide))
return SDValue();
unsigned VEXPANDMask = (~Zeroable).getZExtValue();
MVT IntegerType =
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType);
unsigned NumElts = VT.getVectorNumElements();
assert((NumElts == 4 || NumElts == 8 || NumElts == 16) &&
"Unexpected number of vector elements");
SDValue VMask = getMaskNode(MaskNode, MVT::getVectorVT(MVT::i1, NumElts),
Subtarget, DAG, DL);
SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL);
SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1;
return DAG.getSelect(DL, VT, VMask,
DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector),
ZeroVector);
}
static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
unsigned &UnpackOpcode, bool IsUnary,
ArrayRef<int> TargetMask, SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
int NumElts = VT.getVectorNumElements();
bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true;
for (int i = 0; i != NumElts; i += 2) {
int M1 = TargetMask[i + 0];
int M2 = TargetMask[i + 1];
Undef1 &= (SM_SentinelUndef == M1);
Undef2 &= (SM_SentinelUndef == M2);
Zero1 &= isUndefOrZero(M1);
Zero2 &= isUndefOrZero(M2);
}
assert(!((Undef1 || Zero1) && (Undef2 || Zero2)) &&
"Zeroable shuffle detected");
// Attempt to match the target mask against the unpack lo/hi mask patterns.
SmallVector<int, 64> Unpckl, Unpckh;
createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
UnpackOpcode = X86ISD::UNPCKL;
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
return true;
}
createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
UnpackOpcode = X86ISD::UNPCKH;
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
return true;
}
// If an unary shuffle, attempt to match as an unpack lo/hi with zero.
if (IsUnary && (Zero1 || Zero2)) {
// Don't bother if we can blend instead.
if ((Subtarget.hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) &&
isSequentialOrUndefOrZeroInRange(TargetMask, 0, NumElts, 0))
return false;
bool MatchLo = true, MatchHi = true;
for (int i = 0; (i != NumElts) && (MatchLo || MatchHi); ++i) {
int M = TargetMask[i];
// Ignore if the input is known to be zero or the index is undef.
if ((((i & 1) == 0) && Zero1) || (((i & 1) == 1) && Zero2) ||
(M == SM_SentinelUndef))
continue;
MatchLo &= (M == Unpckl[i]);
MatchHi &= (M == Unpckh[i]);
}
if (MatchLo || MatchHi) {
UnpackOpcode = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;
V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;
return true;
}
}
// If a binary shuffle, commute and try again.
if (!IsUnary) {
ShuffleVectorSDNode::commuteMask(Unpckl);
if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
UnpackOpcode = X86ISD::UNPCKL;
std::swap(V1, V2);
return true;
}
ShuffleVectorSDNode::commuteMask(Unpckh);
if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
UnpackOpcode = X86ISD::UNPCKH;
std::swap(V1, V2);
return true;
}
}
return false;
}
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
SmallVector<int, 8> Unpckl;
createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, /* Unary = */ false);
if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
SmallVector<int, 8> Unpckh;
createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, /* Unary = */ false);
if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
// Commute and try again.
ShuffleVectorSDNode::commuteMask(Unpckl);
if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1);
ShuffleVectorSDNode::commuteMask(Unpckh);
if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1);
return SDValue();
}
/// \brief Try to emit a bitmask instruction for a shuffle.
///
/// This handles cases where we can model a blend exactly as a bitmask due to
/// one of the inputs being zeroable.
static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable,
SelectionDAG &DAG) {
assert(!VT.isFloatingPoint() && "Floating point types are not supported");
MVT EltVT = VT.getVectorElementType();
SDValue Zero = DAG.getConstant(0, DL, EltVT);
SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
SDValue V;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
if (Zeroable[i])
continue;
if (Mask[i] % Size != i)
return SDValue(); // Not a blend.
if (!V)
V = Mask[i] < Size ? V1 : V2;
else if (V != (Mask[i] < Size ? V1 : V2))
return SDValue(); // Can only let one input through the mask.
VMaskOps[i] = AllOnes;
}
if (!V)
return SDValue(); // No non-zeroable elements!
SDValue VMask = DAG.getBuildVector(VT, DL, VMaskOps);
return DAG.getNode(ISD::AND, DL, VT, V, VMask);
}
/// \brief Try to emit a blend instruction for a shuffle using bit math.
///
/// This is used as a fallback approach when first class blend instructions are
/// unavailable. Currently it is only suitable for integer vectors, but could
/// be generalized for floating point vectors if desirable.
static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(VT.isInteger() && "Only supports integer vector types!");
MVT EltVT = VT.getVectorElementType();
SDValue Zero = DAG.getConstant(0, DL, EltVT);
SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
SmallVector<SDValue, 16> MaskOps;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i + Size)
return SDValue(); // Shuffled input!
MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero);
}
SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps);
V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
// We have to cast V2 around.
MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
DAG.getBitcast(MaskVT, V1Mask),
DAG.getBitcast(MaskVT, V2)));
return DAG.getNode(ISD::OR, DL, VT, V1, V2);
}
static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
const X86Subtarget &Subtarget,
SelectionDAG &DAG);
static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
MutableArrayRef<int> TargetMask,
bool &ForceV1Zero, bool &ForceV2Zero,
uint64_t &BlendMask) {
bool V1IsZeroOrUndef =
V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZeroOrUndef =
V2.isUndef() || ISD::isBuildVectorAllZeros(V2.getNode());
BlendMask = 0;
ForceV1Zero = false, ForceV2Zero = false;
assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask");
// Attempt to generate the binary blend mask. If an input is zero then
// we can use any lane.
// TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
int M = TargetMask[i];
if (M == SM_SentinelUndef)
continue;
if (M == i)
continue;
if (M == i + Size) {
BlendMask |= 1ull << i;
continue;
}
if (M == SM_SentinelZero) {
if (V1IsZeroOrUndef) {
ForceV1Zero = true;
TargetMask[i] = i;
continue;
}
if (V2IsZeroOrUndef) {
ForceV2Zero = true;
BlendMask |= 1ull << i;
TargetMask[i] = i + Size;
continue;
}
}
return false;
}
return true;
}
uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, int Scale) {
uint64_t ScaledMask = 0;
for (int i = 0; i != Size; ++i)
if (BlendMask & (1ull << i))
ScaledMask |= ((1ull << Scale) - 1) << (i * Scale);
return ScaledMask;
}
/// \brief Try to emit a blend instruction for a shuffle.
///
/// This doesn't do any checks for the availability of instructions for blending
/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
/// be matched in the backend with the type given. What it does check for is
/// that the shuffle mask is a blend, or convertible into a blend with zero.
static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Original,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable);
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero,
BlendMask))
return SDValue();
// Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
if (ForceV1Zero)
V1 = getZeroVector(VT, Subtarget, DAG, DL);
if (ForceV2Zero)
V2 = getZeroVector(VT, Subtarget, DAG, DL);
switch (VT.SimpleTy) {
case MVT::v2f64:
case MVT::v4f32:
case MVT::v4f64:
case MVT::v8f32:
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
case MVT::v4i64:
case MVT::v8i32:
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
LLVM_FALLTHROUGH;
case MVT::v2i64:
case MVT::v4i32:
// If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
// that instruction.
if (Subtarget.hasAVX2()) {
// Scale the blend by the number of 32-bit dwords per element.
int Scale = VT.getScalarSizeInBits() / 32;
BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
V1 = DAG.getBitcast(BlendVT, V1);
V2 = DAG.getBitcast(BlendVT, V2);
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8)));
}
LLVM_FALLTHROUGH;
case MVT::v8i16: {
// For integer shuffles we need to expand the mask and cast the inputs to
// v8i16s prior to blending.
int Scale = 8 / VT.getVectorNumElements();
BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
V1 = DAG.getBitcast(MVT::v8i16, V1);
V2 = DAG.getBitcast(MVT::v8i16, V2);
return DAG.getBitcast(VT,
DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8)));
}
case MVT::v16i16: {
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
// We can lower these with PBLENDW which is mirrored across 128-bit lanes.
assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!");
BlendMask = 0;
for (int i = 0; i < 8; ++i)
if (RepeatedMask[i] >= 8)
BlendMask |= 1ull << i;
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
}
LLVM_FALLTHROUGH;
}
case MVT::v16i8:
case MVT::v32i8: {
assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");
if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
MVT IntegerType =
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
}
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
if (SDValue Masked =
lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
return Masked;
// Scale the blend by the number of bytes per element.
int Scale = VT.getScalarSizeInBits() / 8;
// This form of blend is always done on bytes. Compute the byte vector
// type.
MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
// Compute the VSELECT mask. Note that VSELECT is really confusing in the
// mix of LLVM's code generator and the x86 backend. We tell the code
// generator that boolean values in the elements of an x86 vector register
// are -1 for true and 0 for false. We then use the LLVM semantics of 'true'
// mapping a select to operand #1, and 'false' mapping to operand #2. The
// reality in x86 is that vector masks (pre-AVX-512) use only the high bit
// of the element (the remaining are ignored) and 0 in that high bit would
// mean operand #1 while 1 in the high bit would mean operand #2. So while
// the LLVM model for boolean values in vector elements gets the relevant
// bit set, it is set backwards and over constrained relative to x86's
// actual model.
SmallVector<SDValue, 32> VSELECTMask;
for (int i = 0, Size = Mask.size(); i < Size; ++i)
for (int j = 0; j < Scale; ++j)
VSELECTMask.push_back(
Mask[i] < 0 ? DAG.getUNDEF(MVT::i8)
: DAG.getConstant(Mask[i] < Size ? -1 : 0, DL,
MVT::i8));
V1 = DAG.getBitcast(BlendVT, V1);
V2 = DAG.getBitcast(BlendVT, V2);
return DAG.getBitcast(
VT,
DAG.getSelect(DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask),
V1, V2));
}
case MVT::v16f32:
case MVT::v8f64:
case MVT::v8i64:
case MVT::v16i32:
case MVT::v32i16:
case MVT::v64i8: {
MVT IntegerType =
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
}
default:
llvm_unreachable("Not a supported integer vector type!");
}
}
/// \brief Try to lower as a blend of elements from two inputs followed by
/// a single-input permutation.
///
/// This matches the pattern where we can blend elements from two inputs and
/// then reduce the shuffle to a single-input permutation.
static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
// We build up the blend mask while checking whether a blend is a viable way
// to reduce the shuffle.
SmallVector<int, 32> BlendMask(Mask.size(), -1);
SmallVector<int, 32> PermuteMask(Mask.size(), -1);
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
if (Mask[i] < 0)
continue;
assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds.");
if (BlendMask[Mask[i] % Size] < 0)
BlendMask[Mask[i] % Size] = Mask[i];
else if (BlendMask[Mask[i] % Size] != Mask[i])
return SDValue(); // Can't blend in the needed input!
PermuteMask[i] = Mask[i] % Size;
}
SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);
}
/// \brief Generic routine to decompose a shuffle and blend into independent
/// blends and permutes.
///
/// This matches the extremely common pattern for handling combined
/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
/// operations. It will try to pick the best arrangement of shuffles and
/// blends.
static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
MVT VT, SDValue V1,
SDValue V2,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
// Shuffle the input elements into the desired positions in V1 and V2 and
// blend them together.
SmallVector<int, 32> V1Mask(Mask.size(), -1);
SmallVector<int, 32> V2Mask(Mask.size(), -1);
SmallVector<int, 32> BlendMask(Mask.size(), -1);
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (Mask[i] >= 0 && Mask[i] < Size) {
V1Mask[i] = Mask[i];
BlendMask[i] = i;
} else if (Mask[i] >= Size) {
V2Mask[i] = Mask[i] - Size;
BlendMask[i] = i + Size;
}
// Try to lower with the simpler initial blend strategy unless one of the
// input shuffles would be a no-op. We prefer to shuffle inputs as the
// shuffle may be able to fold with a load or other benefit. However, when
// we'll have to do 2x as many shuffles in order to achieve this, blending
// first is a better strategy.
if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask))
if (SDValue BlendPerm =
lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
return BlendPerm;
V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
}
/// \brief Try to lower a vector shuffle as a rotation.
///
/// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512.
static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2,
ArrayRef<int> Mask) {
int NumElts = Mask.size();
// We need to detect various ways of spelling a rotation:
// [11, 12, 13, 14, 15, 0, 1, 2]
// [-1, 12, 13, 14, -1, -1, 1, -1]
// [-1, -1, -1, -1, -1, -1, 1, 2]
// [ 3, 4, 5, 6, 7, 8, 9, 10]
// [-1, 4, 5, 6, -1, -1, 9, -1]
// [-1, 4, 5, 6, -1, -1, -1, -1]
int Rotation = 0;
SDValue Lo, Hi;
for (int i = 0; i < NumElts; ++i) {
int M = Mask[i];
assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) &&
"Unexpected mask index.");
if (M < 0)
continue;
// Determine where a rotated vector would have started.
int StartIdx = i - (M % NumElts);
if (StartIdx == 0)
// The identity rotation isn't interesting, stop.
return -1;
// If we found the tail of a vector the rotation must be the missing
// front. If we found the head of a vector, it must be how much of the
// head.
int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
if (Rotation == 0)
Rotation = CandidateRotation;
else if (Rotation != CandidateRotation)
// The rotations don't match, so we can't match this mask.
return -1;
// Compute which value this mask is pointing at.
SDValue MaskV = M < NumElts ? V1 : V2;
// Compute which of the two target values this index should be assigned
// to. This reflects whether the high elements are remaining or the low
// elements are remaining.
SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
// Either set up this value if we've not encountered it before, or check
// that it remains consistent.
if (!TargetV)
TargetV = MaskV;
else if (TargetV != MaskV)
// This may be a rotation, but it pulls from the inputs in some
// unsupported interleaving.
return -1;
}
// Check that we successfully analyzed the mask, and normalize the results.
assert(Rotation != 0 && "Failed to locate a viable rotation!");
assert((Lo || Hi) && "Failed to find a rotated input vector!");
if (!Lo)
Lo = Hi;
else if (!Hi)
Hi = Lo;
V1 = Lo;
V2 = Hi;
return Rotation;
}
/// \brief Try to lower a vector shuffle as a byte rotation.
///
/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
/// try to generically lower a vector shuffle through such an pattern. It
/// does not check for the profitability of lowering either as PALIGNR or
/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
/// This matches shuffle vectors that look like:
///
/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
///
/// Essentially it concatenates V1 and V2, shifts right by some number of
/// elements, and takes the low elements as the result. Note that while this is
/// specified as a *right shift* because x86 is little-endian, it is a *left
/// rotate* of the vector lanes.
static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
ArrayRef<int> Mask) {
// Don't accept any shuffles with zero elements.
if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
return -1;
// PALIGNR works on 128-bit lanes.
SmallVector<int, 16> RepeatedMask;
if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
return -1;
int Rotation = matchVectorShuffleAsRotate(V1, V2, RepeatedMask);
if (Rotation <= 0)
return -1;
// PALIGNR rotates bytes, so we need to scale the
// rotation based on how many bytes are in the vector lane.
int NumElts = RepeatedMask.size();
int Scale = 16 / NumElts;
return Rotation * Scale;
}
static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
SDValue Lo = V1, Hi = V2;
int ByteRotation = matchVectorShuffleAsByteRotate(VT, Lo, Hi, Mask);
if (ByteRotation <= 0)
return SDValue();
// Cast the inputs to i8 vector of correct length to match PALIGNR or
// PSLLDQ/PSRLDQ.
MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
Lo = DAG.getBitcast(ByteVT, Lo);
Hi = DAG.getBitcast(ByteVT, Hi);
// SSSE3 targets can use the palignr instruction.
if (Subtarget.hasSSSE3()) {
assert((!VT.is512BitVector() || Subtarget.hasBWI()) &&
"512-bit PALIGNR requires BWI instructions");
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi,
DAG.getConstant(ByteRotation, DL, MVT::i8)));
}
assert(VT.is128BitVector() &&
"Rotate-based lowering only supports 128-bit lowering!");
assert(Mask.size() <= 16 &&
"Can shuffle at most 16 bytes in a 128-bit vector!");
assert(ByteVT == MVT::v16i8 &&
"SSE2 rotate lowering only needed for v16i8!");
// Default SSE2 implementation
int LoByteShift = 16 - ByteRotation;
int HiByteShift = ByteRotation;
SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
DAG.getConstant(LoByteShift, DL, MVT::i8));
SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
DAG.getConstant(HiByteShift, DL, MVT::i8));
return DAG.getBitcast(VT,
DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
}
/// \brief Try to lower a vector shuffle as a dword/qword rotation.
///
/// AVX512 has a VALIGND/VALIGNQ instructions that will do an arbitrary
/// rotation of the concatenation of two vectors; This routine will
/// try to generically lower a vector shuffle through such an pattern.
///
/// Essentially it concatenates V1 and V2, shifts right by some number of
/// elements, and takes the low elements as the result. Note that while this is
/// specified as a *right shift* because x86 is little-endian, it is a *left
/// rotate* of the vector lanes.
static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) &&
"Only 32-bit and 64-bit elements are supported!");
// 128/256-bit vectors are only supported with VLX.
assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector()))
&& "VLX required for 128/256-bit vectors");
SDValue Lo = V1, Hi = V2;
int Rotation = matchVectorShuffleAsRotate(Lo, Hi, Mask);
if (Rotation <= 0)
return SDValue();
return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
DAG.getConstant(Rotation, DL, MVT::i8));
}
/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
///
/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
/// PSRL(W/D/Q/DQ) SSE2 and AVX2 logical bit-shift instructions. The function
/// matches elements from one of the input vectors shuffled to the left or
/// right with zeroable elements 'shifted in'. It handles both the strictly
/// bit-wise element shifts and the byte shift across an entire 128-bit double
/// quad word lane.
///
/// PSHL : (little-endian) left bit shift.
/// [ zz, 0, zz, 2 ]
/// [ -1, 4, zz, -1 ]
/// PSRL : (little-endian) right bit shift.
/// [ 1, zz, 3, zz]
/// [ -1, -1, 7, zz]
/// PSLLDQ : (little-endian) left byte shift
/// [ zz, 0, 1, 2, 3, 4, 5, 6]
/// [ zz, zz, -1, -1, 2, 3, 4, -1]
/// [ zz, zz, zz, zz, zz, zz, -1, 1]
/// PSRLDQ : (little-endian) right byte shift
/// [ 5, 6, 7, zz, zz, zz, zz, zz]
/// [ -1, 5, 6, 7, zz, zz, zz, zz]
/// [ 1, 2, -1, -1, -1, -1, zz, zz]
static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
unsigned ScalarSizeInBits,
ArrayRef<int> Mask, int MaskOffset,
const APInt &Zeroable,
const X86Subtarget &Subtarget) {
int Size = Mask.size();
unsigned SizeInBits = Size * ScalarSizeInBits;
auto CheckZeros = [&](int Shift, int Scale, bool Left) {
for (int i = 0; i < Size; i += Scale)
for (int j = 0; j < Shift; ++j)
if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
return false;
return true;
};
auto MatchShift = [&](int Shift, int Scale, bool Left) {
for (int i = 0; i != Size; i += Scale) {
unsigned Pos = Left ? i + Shift : i;
unsigned Low = Left ? i : i + Shift;
unsigned Len = Scale - Shift;
if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset))
return -1;
}
int ShiftEltBits = ScalarSizeInBits * Scale;
bool ByteShift = ShiftEltBits > 64;
Opcode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI)
: (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI);
int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
// Normalize the scale for byte shifts to still produce an i64 element
// type.
Scale = ByteShift ? Scale / 2 : Scale;
// We need to round trip through the appropriate type for the shift.
MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
: MVT::getVectorVT(ShiftSVT, Size / Scale);
return (int)ShiftAmt;
};
// SSE/AVX supports logical shifts up to 64-bit integers - so we can just
// keep doubling the size of the integer elements up to that. We can
// then shift the elements of the integer vector by whole multiples of
// their width within the elements of the larger integer vector. Test each
// multiple to see if we can find a match with the moved element indices
// and that the shifted in elements are all zeroable.
unsigned MaxWidth = ((SizeInBits == 512) && !Subtarget.hasBWI() ? 64 : 128);
for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
for (int Shift = 1; Shift != Scale; ++Shift)
for (bool Left : {true, false})
if (CheckZeros(Shift, Scale, Left)) {
int ShiftAmt = MatchShift(Shift, Scale, Left);
if (0 < ShiftAmt)
return ShiftAmt;
}
// no match
return -1;
}
static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
int Size = Mask.size();
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
MVT ShiftVT;
SDValue V = V1;
unsigned Opcode;
// Try to match shuffle against V1 shift.
int ShiftAmt = matchVectorShuffleAsShift(
ShiftVT, Opcode, VT.getScalarSizeInBits(), Mask, 0, Zeroable, Subtarget);
// If V1 failed, try to match shuffle against V2 shift.
if (ShiftAmt < 0) {
ShiftAmt =
matchVectorShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
Mask, Size, Zeroable, Subtarget);
V = V2;
}
if (ShiftAmt < 0)
return SDValue();
assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
"Illegal integer vector type");
V = DAG.getBitcast(ShiftVT, V);
V = DAG.getNode(Opcode, DL, ShiftVT, V,
DAG.getConstant(ShiftAmt, DL, MVT::i8));
return DAG.getBitcast(VT, V);
}
// EXTRQ: Extract Len elements from lower half of source, starting at Idx.
// Remainder of lower half result is zero and upper half is all undef.
static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
ArrayRef<int> Mask, uint64_t &BitLen,
uint64_t &BitIdx, const APInt &Zeroable) {
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask");
// Upper half must be undefined.
if (!isUndefInRange(Mask, HalfSize, HalfSize))
return false;
// Determine the extraction length from the part of the
// lower half that isn't zeroable.
int Len = HalfSize;
for (; Len > 0; --Len)
if (!Zeroable[Len - 1])
break;
assert(Len > 0 && "Zeroable shuffle mask");
// Attempt to match first Len sequential elements from the lower half.
SDValue Src;
int Idx = -1;
for (int i = 0; i != Len; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef)
continue;
SDValue &V = (M < Size ? V1 : V2);
M = M % Size;
// The extracted elements must start at a valid index and all mask
// elements must be in the lower half.
if (i > M || M >= HalfSize)
return false;
if (Idx < 0 || (Src == V && Idx == (M - i))) {
Src = V;
Idx = M - i;
continue;
}
return false;
}
if (!Src || Idx < 0)
return false;
assert((Idx + Len) <= HalfSize && "Illegal extraction mask");
BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
V1 = Src;
return true;
}
// INSERTQ: Extract lowest Len elements from lower half of second source and
// insert over first source, starting at Idx.
// { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
ArrayRef<int> Mask, uint64_t &BitLen,
uint64_t &BitIdx) {
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
// Upper half must be undefined.
if (!isUndefInRange(Mask, HalfSize, HalfSize))
return false;
for (int Idx = 0; Idx != HalfSize; ++Idx) {
SDValue Base;
// Attempt to match first source from mask before insertion point.
if (isUndefInRange(Mask, 0, Idx)) {
/* EMPTY */
} else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) {
Base = V1;
} else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) {
Base = V2;
} else {
continue;
}
// Extend the extraction length looking to match both the insertion of
// the second source and the remaining elements of the first.
for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) {
SDValue Insert;
int Len = Hi - Idx;
// Match insertion.
if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) {
Insert = V1;
} else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) {
Insert = V2;
} else {
continue;
}
// Match the remaining elements of the lower half.
if (isUndefInRange(Mask, Hi, HalfSize - Hi)) {
/* EMPTY */
} else if ((!Base || (Base == V1)) &&
isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) {
Base = V1;
} else if ((!Base || (Base == V2)) &&
isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi,
Size + Hi)) {
Base = V2;
} else {
continue;
}
BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
V1 = Base;
V2 = Insert;
return true;
}
}
return false;
}
/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable,
SelectionDAG &DAG) {
uint64_t BitLen, BitIdx;
if (matchVectorShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
if (matchVectorShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),
V2 ? V2 : DAG.getUNDEF(VT),
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
return SDValue();
}
/// \brief Lower a vector shuffle as a zero or any extension.
///
/// Given a specific number of elements, element bit width, and extension
/// stride, produce either a zero or any extension based on the available
/// features of the subtarget. The extended elements are consecutive and
/// begin and can start from an offsetted element index in the input; to
/// avoid excess shuffling the offset must either being in the bottom lane
/// or at the start of a higher lane. All extended elements must be from
/// the same lane.
static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(Scale > 1 && "Need a scale to extend.");
int EltBits = VT.getScalarSizeInBits();
int NumElements = VT.getVectorNumElements();
int NumEltsPerLane = 128 / EltBits;
int OffsetLane = Offset / NumEltsPerLane;
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
"Only 8, 16, and 32 bit elements can be extended.");
assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits.");
assert(0 <= Offset && "Extension offset must be positive.");
assert((Offset < NumEltsPerLane || Offset % NumEltsPerLane == 0) &&
"Extension offset must be in the first lane or start an upper lane.");
// Check that an index is in same lane as the base offset.
auto SafeOffset = [&](int Idx) {
return OffsetLane == (Idx / NumEltsPerLane);
};
// Shift along an input so that the offset base moves to the first element.
auto ShuffleOffset = [&](SDValue V) {
if (!Offset)
return V;
SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
for (int i = 0; i * Scale < NumElements; ++i) {
int SrcIdx = i + Offset;
ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1;
}
return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask);
};
// Found a valid zext mask! Try various lowering strategies based on the
// input type and available ISA extensions.
if (Subtarget.hasSSE41()) {
// Not worth offsetting 128-bit vectors if scale == 2, a pattern using
// PUNPCK will catch this in a later shuffle match.
if (Offset && Scale == 2 && VT.is128BitVector())
return SDValue();
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
InputV = ShuffleOffset(InputV);
InputV = getExtendInVec(X86ISD::VZEXT, DL, ExtVT, InputV, DAG);
return DAG.getBitcast(VT, InputV);
}
assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");
// For any extends we can cheat for larger element sizes and use shuffle
// instructions that can fold with a load and/or copy.
if (AnyExt && EltBits == 32) {
int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1,
-1};
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getBitcast(MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
}
if (AnyExt && EltBits == 16 && Scale > 2) {
int PSHUFDMask[4] = {Offset / 2, -1,
SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1};
InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getBitcast(MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
int PSHUFWMask[4] = {1, -1, -1, -1};
unsigned OddEvenOp = (Offset & 1 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW);
return DAG.getBitcast(
VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16,
DAG.getBitcast(MVT::v8i16, InputV),
getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG)));
}
// The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
// to 64-bits.
if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) {
assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
assert(VT.is128BitVector() && "Unexpected vector width!");
int LoIdx = Offset * EltBits;
SDValue Lo = DAG.getBitcast(
MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
DAG.getConstant(EltBits, DL, MVT::i8),
DAG.getConstant(LoIdx, DL, MVT::i8)));
if (isUndefInRange(Mask, NumElements / 2, NumElements / 2) ||
!SafeOffset(Offset + 1))
return DAG.getBitcast(VT, Lo);
int HiIdx = (Offset + 1) * EltBits;
SDValue Hi = DAG.getBitcast(
MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
DAG.getConstant(EltBits, DL, MVT::i8),
DAG.getConstant(HiIdx, DL, MVT::i8)));
return DAG.getBitcast(VT,
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
}
// If this would require more than 2 unpack instructions to expand, use
// pshufb when available. We can only use more than 2 unpack instructions
// when zero extending i8 elements which also makes it easier to use pshufb.
if (Scale > 4 && EltBits == 8 && Subtarget.hasSSSE3()) {
assert(NumElements == 16 && "Unexpected byte vector width!");
SDValue PSHUFBMask[16];
for (int i = 0; i < 16; ++i) {
int Idx = Offset + (i / Scale);
PSHUFBMask[i] = DAG.getConstant(
(i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8);
}
InputV = DAG.getBitcast(MVT::v16i8, InputV);
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
DAG.getBuildVector(MVT::v16i8, DL, PSHUFBMask)));
}
// If we are extending from an offset, ensure we start on a boundary that
// we can unpack from.
int AlignToUnpack = Offset % (NumElements / Scale);
if (AlignToUnpack) {
SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
for (int i = AlignToUnpack; i < NumElements; ++i)
ShMask[i - AlignToUnpack] = i;
InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask);
Offset -= AlignToUnpack;
}
// Otherwise emit a sequence of unpacks.
do {
unsigned UnpackLoHi = X86ISD::UNPCKL;
if (Offset >= (NumElements / 2)) {
UnpackLoHi = X86ISD::UNPCKH;
Offset -= (NumElements / 2);
}
MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
: getZeroVector(InputVT, Subtarget, DAG, DL);
InputV = DAG.getBitcast(InputVT, InputV);
InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext);
Scale /= 2;
EltBits *= 2;
NumElements /= 2;
} while (Scale > 1);
return DAG.getBitcast(VT, InputV);
}
/// \brief Try to lower a vector shuffle as a zero extension on any microarch.
///
/// This routine will try to do everything in its power to cleverly lower
/// a shuffle which happens to match the pattern of a zero extend. It doesn't
/// check for the profitability of this lowering, it tries to aggressively
/// match this pattern. It will use all of the micro-architectural details it
/// can to emit an efficient lowering. It handles both blends with all-zero
/// inputs to explicitly zero-extend and undef-lanes (sometimes undef due to
/// masking out later).
///
/// The reason we have dedicated lowering for zext-style shuffles is that they
/// are both incredibly common and often quite performance sensitive.
static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
int Bits = VT.getSizeInBits();
int NumLanes = Bits / 128;
int NumElements = VT.getVectorNumElements();
int NumEltsPerLane = NumElements / NumLanes;
assert(VT.getScalarSizeInBits() <= 32 &&
"Exceeds 32-bit integer zero extension limit");
assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");
// Define a helper function to check a particular ext-scale and lower to it if
// valid.
auto Lower = [&](int Scale) -> SDValue {
SDValue InputV;
bool AnyExt = true;
int Offset = 0;
int Matches = 0;
for (int i = 0; i < NumElements; ++i) {
int M = Mask[i];
if (M < 0)
continue; // Valid anywhere but doesn't tell us anything.
if (i % Scale != 0) {
// Each of the extended elements need to be zeroable.
if (!Zeroable[i])
return SDValue();
// We no longer are in the anyext case.
AnyExt = false;
continue;
}
// Each of the base elements needs to be consecutive indices into the
// same input vector.
SDValue V = M < NumElements ? V1 : V2;
M = M % NumElements;
if (!InputV) {
InputV = V;
Offset = M - (i / Scale);
} else if (InputV != V)
return SDValue(); // Flip-flopping inputs.
// Offset must start in the lowest 128-bit lane or at the start of an
// upper lane.
// FIXME: Is it ever worth allowing a negative base offset?
if (!((0 <= Offset && Offset < NumEltsPerLane) ||
(Offset % NumEltsPerLane) == 0))
return SDValue();
// If we are offsetting, all referenced entries must come from the same
// lane.
if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane))
return SDValue();
if ((M % NumElements) != (Offset + (i / Scale)))
return SDValue(); // Non-consecutive strided elements.
Matches++;
}
// If we fail to find an input, we have a zero-shuffle which should always
// have already been handled.
// FIXME: Maybe handle this here in case during blending we end up with one?
if (!InputV)
return SDValue();
// If we are offsetting, don't extend if we only match a single input, we
// can always do better by using a basic PSHUF or PUNPCK.
if (Offset != 0 && Matches < 2)
return SDValue();
return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG);
};
// The widest scale possible for extending is to a 64-bit integer.
assert(Bits % 64 == 0 &&
"The number of bits in a vector must be divisible by 64 on x86!");
int NumExtElements = Bits / 64;
// Each iteration, try extending the elements half as much, but into twice as
// many elements.
for (; NumExtElements < NumElements; NumExtElements *= 2) {
assert(NumElements % NumExtElements == 0 &&
"The input vector size must be divisible by the extended size.");
if (SDValue V = Lower(NumElements / NumExtElements))
return V;
}
// General extends failed, but 128-bit vectors may be able to use MOVQ.
if (Bits != 128)
return SDValue();
// Returns one of the source operands if the shuffle can be reduced to a
// MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits.
auto CanZExtLowHalf = [&]() {
for (int i = NumElements / 2; i != NumElements; ++i)
if (!Zeroable[i])
return SDValue();
if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0))
return V1;
if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements))
return V2;
return SDValue();
};
if (SDValue V = CanZExtLowHalf()) {
V = DAG.getBitcast(MVT::v2i64, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V);
return DAG.getBitcast(VT, V);
}
// No viable ext lowering found.
return SDValue();
}
/// \brief Try to get a scalar value for a specific element of a vector.
///
/// Looks through BUILD_VECTOR and SCALAR_TO_VECTOR nodes to find a scalar.
static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
SelectionDAG &DAG) {
MVT VT = V.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
V = peekThroughBitcasts(V);
// If the bitcasts shift the element size, we can't extract an equivalent
// element from it.
MVT NewVT = V.getSimpleValueType();
if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
return SDValue();
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) {
// Ensure the scalar operand is the same size as the destination.
// FIXME: Add support for scalar truncation where possible.
SDValue S = V.getOperand(Idx);
if (EltVT.getSizeInBits() == S.getSimpleValueType().getSizeInBits())
return DAG.getBitcast(EltVT, S);
}
return SDValue();
}
/// \brief Helper to test for a load that can be folded with x86 shuffles.
///
/// This is particularly important because the set of instructions varies
/// significantly based on whether the operand is a load or not.
static bool isShuffleFoldableLoad(SDValue V) {
V = peekThroughBitcasts(V);
return ISD::isNON_EXTLoad(V.getNode());
}
/// \brief Try to lower insertion of a single element into a zero vector.
///
/// This is a common pattern that we have especially efficient patterns to lower
/// across all subtarget feature sets.
static SDValue lowerVectorShuffleAsElementInsertion(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT ExtVT = VT;
MVT EltVT = VT.getVectorElementType();
int V2Index =
find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) -
Mask.begin();
bool IsV1Zeroable = true;
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (i != V2Index && !Zeroable[i]) {
IsV1Zeroable = false;
break;
}
// Check for a single input from a SCALAR_TO_VECTOR node.
// FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
// all the smarts here sunk into that routine. However, the current
// lowering of BUILD_VECTOR makes that nearly impossible until the old
// vector shuffle lowering is dead.
SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
DAG);
if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
// We need to zext the scalar if it is smaller than an i32.
V2S = DAG.getBitcast(EltVT, V2S);
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
// Using zext to expand a narrow element won't work for non-zero
// insertions.
if (!IsV1Zeroable)
return SDValue();
// Zero-extend directly to i32.
ExtVT = MVT::v4i32;
V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
}
V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
} else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
EltVT == MVT::i16) {
// Either not inserting from the low element of the input or the input
// element size is too small to use VZEXT_MOVL to clear the high bits.
return SDValue();
}
if (!IsV1Zeroable) {
// If V1 can't be treated as a zero vector we have fewer options to lower
// this. We can't support integer vectors or non-zero targets cheaply, and
// the V1 elements can't be permuted in any way.
assert(VT == ExtVT && "Cannot change extended type when non-zeroable!");
if (!VT.isFloatingPoint() || V2Index != 0)
return SDValue();
SmallVector<int, 8> V1Mask(Mask.begin(), Mask.end());
V1Mask[V2Index] = -1;
if (!isNoopShuffleMask(V1Mask))
return SDValue();
// This is essentially a special case blend operation, but if we have
// general purpose blend operations, they are always faster. Bail and let
// the rest of the lowering handle these as blends.
if (Subtarget.hasSSE41())
return SDValue();
// Otherwise, use MOVSD or MOVSS.
assert((EltVT == MVT::f32 || EltVT == MVT::f64) &&
"Only two types of floating point element types to handle!");
return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL,
ExtVT, V1, V2);
}
// This lowering only works for the low element with floating point vectors.
if (VT.isFloatingPoint() && V2Index != 0)
return SDValue();
V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
if (ExtVT != VT)
V2 = DAG.getBitcast(VT, V2);
if (V2Index != 0) {
// If we have 4 or fewer lanes we can cheaply shuffle the element into
// the desired position. Otherwise it is more efficient to do a vector
// shift left. We know that we can do a vector shift left because all
// the inputs are zero.
if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) {
SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
V2Shuffle[V2Index] = 0;
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
} else {
V2 = DAG.getBitcast(MVT::v16i8, V2);
V2 = DAG.getNode(
X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL,
DAG.getTargetLoweringInfo().getScalarShiftAmountTy(
DAG.getDataLayout(), VT)));
V2 = DAG.getBitcast(VT, V2);
}
}
return V2;
}
/// Try to lower broadcast of a single - truncated - integer element,
/// coming from a scalar_to_vector/build_vector node \p V0 with larger elements.
///
/// This assumes we have AVX2.
static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT,
SDValue V0, int BroadcastIdx,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.hasAVX2() &&
"We can only lower integer broadcasts with AVX2!");
EVT EltVT = VT.getVectorElementType();
EVT V0VT = V0.getValueType();
assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!");
assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!");
EVT V0EltVT = V0VT.getVectorElementType();
if (!V0EltVT.isInteger())
return SDValue();
const unsigned EltSize = EltVT.getSizeInBits();
const unsigned V0EltSize = V0EltVT.getSizeInBits();
// This is only a truncation if the original element type is larger.
if (V0EltSize <= EltSize)
return SDValue();
assert(((V0EltSize % EltSize) == 0) &&
"Scalar type sizes must all be powers of 2 on x86!");
const unsigned V0Opc = V0.getOpcode();
const unsigned Scale = V0EltSize / EltSize;
const unsigned V0BroadcastIdx = BroadcastIdx / Scale;
if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) &&
V0Opc != ISD::BUILD_VECTOR)
return SDValue();
SDValue Scalar = V0.getOperand(V0BroadcastIdx);
// If we're extracting non-least-significant bits, shift so we can truncate.
// Hopefully, we can fold away the trunc/srl/load into the broadcast.
// Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer
// vpbroadcast+vmovd+shr to vpshufb(m)+vmovd.
if (const int OffsetIdx = BroadcastIdx % Scale)
Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar,
DAG.getConstant(OffsetIdx * EltSize, DL, Scalar.getValueType()));
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar));
}
/// \brief Try to lower broadcast of a single element.
///
/// For convenience, this code also bundles all of the subtarget feature set
/// filtering. While a little annoying to re-dispatch on type here, there isn't
/// a convenient way to factor it out.
static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) ||
(Subtarget.hasAVX() && VT.isFloatingPoint()) ||
(Subtarget.hasAVX2() && VT.isInteger())))
return SDValue();
// With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise
// we can only broadcast from a register with AVX2.
unsigned NumElts = Mask.size();
unsigned Opcode = VT == MVT::v2f64 ? X86ISD::MOVDDUP : X86ISD::VBROADCAST;
bool BroadcastFromReg = (Opcode == X86ISD::MOVDDUP) || Subtarget.hasAVX2();
// Check that the mask is a broadcast.
int BroadcastIdx = -1;
for (int i = 0; i != (int)NumElts; ++i) {
SmallVector<int, 8> BroadcastMask(NumElts, i);
if (isShuffleEquivalent(V1, V2, Mask, BroadcastMask)) {
BroadcastIdx = i;
break;
}
}
if (BroadcastIdx < 0)
return SDValue();
assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
"a sorted mask where the broadcast "
"comes from V1.");
// Go up the chain of (vector) values to find a scalar load that we can
// combine with the broadcast.
SDValue V = V1;
for (;;) {
switch (V.getOpcode()) {
case ISD::BITCAST: {
SDValue VSrc = V.getOperand(0);
MVT SrcVT = VSrc.getSimpleValueType();
if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits())
break;
V = VSrc;
continue;
}
case ISD::CONCAT_VECTORS: {
int OperandSize = Mask.size() / V.getNumOperands();
V = V.getOperand(BroadcastIdx / OperandSize);
BroadcastIdx %= OperandSize;
continue;
}
case ISD::INSERT_SUBVECTOR: {
SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
auto ConstantIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
if (!ConstantIdx)
break;
int BeginIdx = (int)ConstantIdx->getZExtValue();
int EndIdx =
BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements();
if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
BroadcastIdx -= BeginIdx;
V = VInner;
} else {
V = VOuter;
}
continue;
}
}
break;
}
// Check if this is a broadcast of a scalar. We special case lowering
// for scalars so that we can more effectively fold with loads.
// First, look through bitcast: if the original value has a larger element
// type than the shuffle, the broadcast element is in essence truncated.
// Make that explicit to ease folding.
if (V.getOpcode() == ISD::BITCAST && VT.isInteger())
if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast(
DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
return TruncBroadcast;
MVT BroadcastVT = VT;
// Peek through any bitcast (only useful for loads).
SDValue BC = peekThroughBitcasts(V);
// Also check the simpler case, where we can directly reuse the scalar.
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
V = V.getOperand(BroadcastIdx);
// If we can't broadcast from a register, check that the input is a load.
if (!BroadcastFromReg && !isShuffleFoldableLoad(V))
return SDValue();
} else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) {
BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
Opcode = (BroadcastVT.is128BitVector() ? X86ISD::MOVDDUP : Opcode);
}
// If we are broadcasting a load that is only used by the shuffle
// then we can reduce the vector load to the broadcasted scalar load.
LoadSDNode *Ld = cast<LoadSDNode>(BC);
SDValue BaseAddr = Ld->getOperand(1);
EVT SVT = BroadcastVT.getScalarType();
unsigned Offset = BroadcastIdx * SVT.getStoreSize();
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
DAG.getMachineFunction().getMachineMemOperand(
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
DAG.makeEquivalentMemoryOrdering(Ld, V);
} else if (!BroadcastFromReg) {
// We can't broadcast from a vector register.
return SDValue();
} else if (BroadcastIdx != 0) {
// We can only broadcast from the zero-element of a vector register,
// but it can be advantageous to broadcast from the zero-element of a
// subvector.
if (!VT.is256BitVector() && !VT.is512BitVector())
return SDValue();
// VPERMQ/VPERMPD can perform the cross-lane shuffle directly.
if (VT == MVT::v4f64 || VT == MVT::v4i64)
return SDValue();
// Only broadcast the zero-element of a 128-bit subvector.
unsigned EltSize = VT.getScalarSizeInBits();
if (((BroadcastIdx * EltSize) % 128) != 0)
return SDValue();
// The shuffle input might have been a bitcast we looked through; look at
// the original input vector. Emit an EXTRACT_SUBVECTOR of that type; we'll
// later bitcast it to BroadcastVT.
MVT SrcVT = V.getSimpleValueType();
assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
"Unexpected vector element size");
assert((SrcVT.is256BitVector() || SrcVT.is512BitVector()) &&
"Unexpected vector size");
MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), 128 / EltSize);
V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V,
DAG.getIntPtrConstant(BroadcastIdx, DL));
}
if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector())
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
DAG.getBitcast(MVT::f64, V));
// Bitcast back to the same scalar type as BroadcastVT.
MVT SrcVT = V.getSimpleValueType();
if (SrcVT.getScalarType() != BroadcastVT.getScalarType()) {
assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
"Unexpected vector element size");
if (SrcVT.isVector()) {
unsigned NumSrcElts = SrcVT.getVectorNumElements();
SrcVT = MVT::getVectorVT(BroadcastVT.getScalarType(), NumSrcElts);
} else {
SrcVT = BroadcastVT.getScalarType();
}
V = DAG.getBitcast(SrcVT, V);
}
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
if (!Subtarget.is64Bit() && SrcVT == MVT::i64) {
V = DAG.getBitcast(MVT::f64, V);
unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
}
// We only support broadcasting from 128-bit vectors to minimize the
// number of patterns we need to deal with in isel. So extract down to
// 128-bits.
if (SrcVT.getSizeInBits() > 128)
V = extract128BitVector(V, 0, DAG, DL);
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}
// Check for whether we can use INSERTPS to perform the shuffle. We only use
// INSERTPS when the V1 elements are already in the correct locations
// because otherwise we can just always use two SHUFPS instructions which
// are much smaller to encode than a SHUFPS and an INSERTPS. We can also
// perform INSERTPS if a single V1 element is out of place and all V2
// elements are zeroable.
static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
unsigned &InsertPSMask,
const APInt &Zeroable,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
// Attempt to match INSERTPS with one element from VA or VB being
// inserted into VA (or undef). If successful, V1, V2 and InsertPSMask
// are updated.
auto matchAsInsertPS = [&](SDValue VA, SDValue VB,
ArrayRef<int> CandidateMask) {
unsigned ZMask = 0;
int VADstIndex = -1;
int VBDstIndex = -1;
bool VAUsedInPlace = false;
for (int i = 0; i < 4; ++i) {
// Synthesize a zero mask from the zeroable elements (includes undefs).
if (Zeroable[i]) {
ZMask |= 1 << i;
continue;
}
// Flag if we use any VA inputs in place.
if (i == CandidateMask[i]) {
VAUsedInPlace = true;
continue;
}
// We can only insert a single non-zeroable element.
if (VADstIndex >= 0 || VBDstIndex >= 0)
return false;
if (CandidateMask[i] < 4) {
// VA input out of place for insertion.
VADstIndex = i;
} else {
// VB input for insertion.
VBDstIndex = i;
}
}
// Don't bother if we have no (non-zeroable) element for insertion.
if (VADstIndex < 0 && VBDstIndex < 0)
return false;
// Determine element insertion src/dst indices. The src index is from the
// start of the inserted vector, not the start of the concatenated vector.
unsigned VBSrcIndex = 0;
if (VADstIndex >= 0) {
// If we have a VA input out of place, we use VA as the V2 element
// insertion and don't use the original V2 at all.
VBSrcIndex = CandidateMask[VADstIndex];
VBDstIndex = VADstIndex;
VB = VA;
} else {
VBSrcIndex = CandidateMask[VBDstIndex] - 4;
}
// If no V1 inputs are used in place, then the result is created only from
// the zero mask and the V2 insertion - so remove V1 dependency.
if (!VAUsedInPlace)
VA = DAG.getUNDEF(MVT::v4f32);
// Update V1, V2 and InsertPSMask accordingly.
V1 = VA;
V2 = VB;
// Insert the V2 element into the desired position.
InsertPSMask = VBSrcIndex << 6 | VBDstIndex << 4 | ZMask;
assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
return true;
};
if (matchAsInsertPS(V1, V2, Mask))
return true;
// Commute and try again.
SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end());
ShuffleVectorSDNode::commuteMask(CommutedMask);
if (matchAsInsertPS(V2, V1, CommutedMask))
return true;
return false;
}
static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
// Attempt to match the insertps pattern.
unsigned InsertPSMask;
if (!matchVectorShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
return SDValue();
// Insert the V2 element into the desired position.
return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
DAG.getConstant(InsertPSMask, DL, MVT::i8));
}
/// \brief Try to lower a shuffle as a permute of the inputs followed by an
/// UNPCK instruction.
///
/// This specifically targets cases where we end up with alternating between
/// the two inputs, and so can permute them into something that feeds a single
/// UNPCK instruction. Note that this routine only targets integer vectors
/// because for floating point vectors we have a generalized SHUFPS lowering
/// strategy that handles everything that doesn't *exactly* match an unpack,
/// making this clever lowering unnecessary.
static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(!VT.isFloatingPoint() &&
"This routine only supports integer vectors.");
assert(VT.is128BitVector() &&
"This routine only works on 128-bit vectors.");
assert(!V2.isUndef() &&
"This routine should only be used when blending two inputs.");
assert(Mask.size() >= 2 && "Single element masks are invalid.");
int Size = Mask.size();
int NumLoInputs =
count_if(Mask, [Size](int M) { return M >= 0 && M % Size < Size / 2; });
int NumHiInputs =
count_if(Mask, [Size](int M) { return M % Size >= Size / 2; });
bool UnpackLo = NumLoInputs >= NumHiInputs;
auto TryUnpack = [&](int ScalarSize, int Scale) {
SmallVector<int, 16> V1Mask((unsigned)Size, -1);
SmallVector<int, 16> V2Mask((unsigned)Size, -1);
for (int i = 0; i < Size; ++i) {
if (Mask[i] < 0)
continue;
// Each element of the unpack contains Scale elements from this mask.
int UnpackIdx = i / Scale;
// We only handle the case where V1 feeds the first slots of the unpack.
// We rely on canonicalization to ensure this is the case.
if ((UnpackIdx % 2 == 0) != (Mask[i] < Size))
return SDValue();
// Setup the mask for this input. The indexing is tricky as we have to
// handle the unpack stride.
SmallVectorImpl<int> &VMask = (UnpackIdx % 2 == 0) ? V1Mask : V2Mask;
VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 : Size / 2)] =
Mask[i] % Size;
}
// If we will have to shuffle both inputs to use the unpack, check whether
// we can just unpack first and shuffle the result. If so, skip this unpack.
if ((NumLoInputs == 0 || NumHiInputs == 0) && !isNoopShuffleMask(V1Mask) &&
!isNoopShuffleMask(V2Mask))
return SDValue();
// Shuffle the inputs into place.
V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
// Cast the inputs to the type we will use to unpack them.
MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale);
V1 = DAG.getBitcast(UnpackVT, V1);
V2 = DAG.getBitcast(UnpackVT, V2);
// Unpack the inputs and cast the result back to the desired type.
return DAG.getBitcast(
VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
UnpackVT, V1, V2));
};
// We try each unpack from the largest to the smallest to try and find one
// that fits this mask.
int OrigScalarSize = VT.getScalarSizeInBits();
for (int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2)
if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize))
return Unpack;
// If none of the unpack-rooted lowerings worked (or were profitable) try an
// initial unpack.
if (NumLoInputs == 0 || NumHiInputs == 0) {
assert((NumLoInputs > 0 || NumHiInputs > 0) &&
"We have to have *some* inputs!");
int HalfOffset = NumLoInputs == 0 ? Size / 2 : 0;
// FIXME: We could consider the total complexity of the permute of each
// possible unpacking. Or at the least we should consider how many
// half-crossings are created.
// FIXME: We could consider commuting the unpacks.
SmallVector<int, 32> PermMask((unsigned)Size, -1);
for (int i = 0; i < Size; ++i) {
if (Mask[i] < 0)
continue;
assert(Mask[i] % Size >= HalfOffset && "Found input from wrong half!");
PermMask[i] =
2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1);
}
return DAG.getVectorShuffle(
VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL,
DL, VT, V1, V2),
DAG.getUNDEF(VT), PermMask);
}
return SDValue();
}
/// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
///
/// This is the basis function for the 2-lane 64-bit shuffles as we have full
/// support for floating point shuffles but not integer shuffles. These
/// instructions will incur a domain crossing penalty on some chips though so
/// it is better to avoid lowering through this for integer vectors where
/// possible.
static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
if (V2.isUndef()) {
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Straight shuffle of a single input vector. Simulate this by using the
// single input as both of the "inputs" to this instruction..
unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
if (Subtarget.hasAVX()) {
// If we have AVX, we can use VPERMILPS which will allow folding a load
// into the shuffle.
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
DAG.getConstant(SHUFPDMask, DL, MVT::i8));
}
return DAG.getNode(
X86ISD::SHUFP, DL, MVT::v2f64,
Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
DAG.getConstant(SHUFPDMask, DL, MVT::i8));
}
assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
assert(Mask[1] >= 2 && "Non-canonicalized blend!");
// If we have a single input, insert that into V1 if we can do so cheaply.
if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Insertion;
// Try inverting the insertion since for v2 masks it is easy to do and we
// can't reliably sort the mask one way or the other.
int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
return Insertion;
}
// Try to use one of the special instruction patterns to handle two common
// blend patterns if a zero-blend above didn't work.
if (isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||
isShuffleEquivalent(V1, V2, Mask, {1, 3}))
if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
// We can either use a special instruction to load over the low double or
// to move just the low double.
return DAG.getNode(
isShuffleFoldableLoad(V1S) ? X86ISD::MOVLPD : X86ISD::MOVSD,
DL, MVT::v2f64, V2,
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));
if (Subtarget.hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
return V;
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
DAG.getConstant(SHUFPDMask, DL, MVT::i8));
}
/// \brief Handle lowering of 2-lane 64-bit integer shuffles.
///
/// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by
/// the integer unit to minimize domain crossing penalties. However, for blends
/// it falls back to the floating point shuffle operation with appropriate bit
/// casting.
static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
if (V2.isUndef()) {
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Straight shuffle of a single input vector. For everything from SSE2
// onward this has a single fast instruction with no scary immediates.
// We have to map the mask as it is actually a v4i32 shuffle instruction.
V1 = DAG.getBitcast(MVT::v4i32, V1);
int WidenedMask[4] = {
std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1,
std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
return DAG.getBitcast(
MVT::v2i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG)));
}
assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!");
assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!");
assert(Mask[0] < 2 && "We sort V1 to be the first input.");
assert(Mask[1] >= 2 && "We sort V2 to be the second input.");
// If we have a blend of two same-type PACKUS operations and the blend aligns
// with the low and high halves, we can just merge the PACKUS operations.
// This is particularly important as it lets us merge shuffles that this
// routine itself creates.
auto GetPackNode = [](SDValue V) {
V = peekThroughBitcasts(V);
return V.getOpcode() == X86ISD::PACKUS ? V : SDValue();
};
if (SDValue V1Pack = GetPackNode(V1))
if (SDValue V2Pack = GetPackNode(V2)) {
EVT PackVT = V1Pack.getValueType();
if (PackVT == V2Pack.getValueType())
return DAG.getBitcast(MVT::v2i64,
DAG.getNode(X86ISD::PACKUS, DL, PackVT,
Mask[0] == 0 ? V1Pack.getOperand(0)
: V1Pack.getOperand(1),
Mask[1] == 2 ? V2Pack.getOperand(0)
: V2Pack.getOperand(1)));
}
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// When loading a scalar and then shuffling it into a vector we can often do
// the insertion cheaply.
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Insertion;
// Try inverting the insertion since for v2 masks it is easy to do and we
// can't reliably sort the mask one way or the other.
int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2};
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
return Insertion;
// We have different paths for blend lowering, but they all must use the
// *exact* same predicate.
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
return V;
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget.hasSSSE3())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
return Rotate;
// If we have direct support for blends, we should lower by decomposing into
// a permute. That will be faster than the domain cross.
if (IsBlendSupported)
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2,
Mask, DAG);
// We implement this with SHUFPD which is pretty lame because it will likely
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
// However, all the alternatives are still more cycles and newer chips don't
// have this problem. It would be really nice if x86 had better shuffles here.
V1 = DAG.getBitcast(MVT::v2f64, V1);
V2 = DAG.getBitcast(MVT::v2f64, V2);
return DAG.getBitcast(MVT::v2i64,
DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
}
/// \brief Test whether this can be lowered with a single SHUFPS instruction.
///
/// This is used to disable more specialized lowerings when the shufps lowering
/// will happen to be efficient.
static bool isSingleSHUFPSMask(ArrayRef<int> Mask) {
// This routine only handles 128-bit shufps.
assert(Mask.size() == 4 && "Unsupported mask size!");
assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!");
assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!");
assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!");
assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!");
// To lower with a single SHUFPS we need to have the low half and high half
// each requiring a single input.
if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4))
return false;
if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4))
return false;
return true;
}
/// \brief Lower a vector shuffle using the SHUFPS instruction.
///
/// This is a helper routine dedicated to lowering vector shuffles using SHUFPS.
/// It makes no assumptions about whether this is the *best* lowering, it simply
/// uses it.
static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
SDValue LowV = V1, HighV = V2;
int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
if (NumV2Elements == 1) {
int V2Index = find_if(Mask, [](int M) { return M >= 4; }) - Mask.begin();
// Compute the index adjacent to V2Index and in the same half by toggling
// the low bit.
int V2AdjIndex = V2Index ^ 1;
if (Mask[V2AdjIndex] < 0) {
// Handles all the cases where we have a single V2 element and an undef.
// This will only ever happen in the high lanes because we commute the
// vector otherwise.
if (V2Index < 2)
std::swap(LowV, HighV);
NewMask[V2Index] -= 4;
} else {
// Handle the case where the V2 element ends up adjacent to a V1 element.
// To make this work, blend them together as the first step.
int V1Index = V2AdjIndex;
int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG));
// Now proceed to reconstruct the final blend as we have the necessary
// high or low half formed.
if (V2Index < 2) {
LowV = V2;
HighV = V1;
} else {
HighV = V2;
}
NewMask[V1Index] = 2; // We put the V1 element in V2[2].
NewMask[V2Index] = 0; // We shifted the V2 element into V2[0].
}
} else if (NumV2Elements == 2) {
if (Mask[0] < 4 && Mask[1] < 4) {
// Handle the easy case where we have V1 in the low lanes and V2 in the
// high lanes.
NewMask[2] -= 4;
NewMask[3] -= 4;
} else if (Mask[2] < 4 && Mask[3] < 4) {
// We also handle the reversed case because this utility may get called
// when we detect a SHUFPS pattern but can't easily commute the shuffle to
// arrange things in the right direction.
NewMask[0] -= 4;
NewMask[1] -= 4;
HighV = V1;
LowV = V2;
} else {
// We have a mixture of V1 and V2 in both low and high lanes. Rather than
// trying to place elements directly, just blend them and set up the final
// shuffle to place them.
// The first two blend mask elements are for V1, the second two are for
// V2.
int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1],
Mask[2] < 4 ? Mask[2] : Mask[3],
(Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
(Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG));
// Now we do a normal shuffle of V1 by giving V1 as both operands to
// a blend.
LowV = HighV = V1;
NewMask[0] = Mask[0] < 4 ? 0 : 2;
NewMask[1] = Mask[0] < 4 ? 2 : 0;
NewMask[2] = Mask[2] < 4 ? 1 : 3;
NewMask[3] = Mask[2] < 4 ? 3 : 1;
}
}
return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV,
getV4X86ShuffleImm8ForMask(NewMask, DL, DAG));
}
/// \brief Lower 4-lane 32-bit floating point shuffles.
///
/// Uses instructions exclusively from the floating point unit to minimize
/// domain crossing penalties, as these are sufficient to implement all v4f32
/// shuffles.
static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
if (NumV2Elements == 0) {
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
DL, MVT::v4f32, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Use even/odd duplicate instructions for masks that match their pattern.
if (Subtarget.hasSSE3()) {
if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2}))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3}))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
}
if (Subtarget.hasAVX()) {
// If we have AVX, we can use VPERMILPS which will allow folding a load
// into the shuffle.
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
}
// Otherwise, use a straight shuffle of a single input vector. We pass the
// input vector to both operands to simulate this with a SHUFPS.
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
}
// There are special ways we can lower some single-element blends. However, we
// have custom ways we can lower more complex single-element blends below that
// we defer to if both this and BLENDPS fail to match, so restrict this to
// when the V2 input is targeting element 0 of the mask -- that is the fast
// case here.
if (NumV2Elements == 1 && Mask[0] >= 4)
if (SDValue V = lowerVectorShuffleAsElementInsertion(
DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return V;
if (Subtarget.hasSSE41()) {
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// Use INSERTPS if we can complete the shuffle efficiently.
if (SDValue V =
lowerVectorShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG))
return V;
if (!isSingleSHUFPSMask(Mask))
if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
DL, MVT::v4f32, V1, V2, Mask, DAG))
return BlendPerm;
}
// Use low/high mov instructions.
if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5}))
return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7}))
return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
return V;
// Otherwise fall back to a SHUFPS lowering strategy.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
}
/// \brief Lower 4-lane i32 vector shuffles.
///
/// We try to handle these with integer-domain shuffles where we can, but for
/// blends we use the floating point domain blend instructions.
static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
if (NumV2Elements == 0) {
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Straight shuffle of a single input vector. For everything from SSE2
// onward this has a single fast instruction with no scary immediates.
// We coerce the shuffle pattern to be compatible with UNPCK instructions
// but we aren't actually going to use the UNPCK instruction because doing
// so prevents folding a load into this instruction or making a copy.
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 1, 1}))
Mask = UnpackLoMask;
else if (isShuffleEquivalent(V1, V2, Mask, {2, 2, 3, 3}))
Mask = UnpackHiMask;
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
}
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// There are special ways we can lower some single-element blends.
if (NumV2Elements == 1)
if (SDValue V = lowerVectorShuffleAsElementInsertion(
DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return V;
// We have different paths for blend lowering, but they all must use the
// *exact* same predicate.
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
Zeroable, DAG))
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
return V;
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget.hasSSSE3())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
return Rotate;
// Assume that a single SHUFPS is faster than an alternative sequence of
// multiple instructions (even if the CPU has a domain penalty).
// If some CPU is harmed by the domain switch, we can fix it in a later pass.
if (!isSingleSHUFPSMask(Mask)) {
// If we have direct support for blends, we should lower by decomposing into
// a permute. That will be faster than the domain cross.
if (IsBlendSupported)
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
Mask, DAG);
// Try to lower by permuting the inputs into an unpack instruction.
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
DL, MVT::v4i32, V1, V2, Mask, DAG))
return Unpack;
}
// We implement this with SHUFPS because it can blend from two vectors.
// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
// up the inputs, bypassing domain shift penalties that we would incur if we
// directly used PSHUFD on Nehalem and older. For newer chips, this isn't
// relevant.
SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1);
SDValue CastV2 = DAG.getBitcast(MVT::v4f32, V2);
SDValue ShufPS = DAG.getVectorShuffle(MVT::v4f32, DL, CastV1, CastV2, Mask);
return DAG.getBitcast(MVT::v4i32, ShufPS);
}
/// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2
/// shuffle lowering, and the most complex part.
///
/// The lowering strategy is to try to form pairs of input lanes which are
/// targeted at the same half of the final vector, and then use a dword shuffle
/// to place them onto the right half, and finally unpack the paired lanes into
/// their final position.
///
/// The exact breakdown of how to form these dword pairs and align them on the
/// correct sides is really tricky. See the comments within the function for
/// more of the details.
///
/// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each
/// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to
/// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
/// vector, form the analogous 128-bit 8-element Mask.
static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
assert(Mask.size() == 8 && "Shuffle mask length doesn't match!");
MutableArrayRef<int> LoMask = Mask.slice(0, 4);
MutableArrayRef<int> HiMask = Mask.slice(4, 4);
SmallVector<int, 4> LoInputs;
copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; });
std::sort(LoInputs.begin(), LoInputs.end());
LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end());
SmallVector<int, 4> HiInputs;
copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; });
std::sort(HiInputs.begin(), HiInputs.end());
HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
int NumLToL =
std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin();
int NumHToL = LoInputs.size() - NumLToL;
int NumLToH =
std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin();
int NumHToH = HiInputs.size() - NumLToH;
MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL);
MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH);
MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
// If we are splatting two values from one half - one to each half, then
// we can shuffle that half so each is splatted to a dword, then splat those
// to their respective halves.
auto SplatHalfs = [&](int LoInput, int HiInput, unsigned ShufWOp,
int DOffset) {
int PSHUFHalfMask[] = {LoInput % 4, LoInput % 4, HiInput % 4, HiInput % 4};
int PSHUFDMask[] = {DOffset + 0, DOffset + 0, DOffset + 1, DOffset + 1};
V = DAG.getNode(ShufWOp, DL, VT, V,
getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
V = DAG.getBitcast(PSHUFDVT, V);
V = DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, V,
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
return DAG.getBitcast(VT, V);
};
if (NumLToL == 1 && NumLToH == 1 && (NumHToL + NumHToH) == 0)
return SplatHalfs(LToLInputs[0], LToHInputs[0], X86ISD::PSHUFLW, 0);
if (NumHToL == 1 && NumHToH == 1 && (NumLToL + NumLToH) == 0)
return SplatHalfs(HToLInputs[0], HToHInputs[0], X86ISD::PSHUFHW, 2);
// Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
// such inputs we can swap two of the dwords across the half mark and end up
// with <=2 inputs to each half in each half. Once there, we can fall through
// to the generic code below. For example:
//
// Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
// Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]
//
// However in some very rare cases we have a 1-into-3 or 3-into-1 on one half
// and an existing 2-into-2 on the other half. In this case we may have to
// pre-shuffle the 2-into-2 half to avoid turning it into a 3-into-1 or
// 1-into-3 which could cause us to cycle endlessly fixing each side in turn.
// Fortunately, we don't have to handle anything but a 2-into-2 pattern
// because any other situation (including a 3-into-1 or 1-into-3 in the other
// half than the one we target for fixing) will be fixed when we re-enter this
// path. We will also combine away any sequence of PSHUFD instructions that
// result into a single instruction. Here is an example of the tricky case:
//
// Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
// Mask: [3, 7, 1, 0, 2, 7, 3, 5] -THIS-IS-BAD!!!!-> [5, 7, 1, 0, 4, 7, 5, 3]
//
// This now has a 1-into-3 in the high half! Instead, we do two shuffles:
//
// Input: [a, b, c, d, e, f, g, h] PSHUFHW[0,2,1,3]-> [a, b, c, d, e, g, f, h]
// Mask: [3, 7, 1, 0, 2, 7, 3, 5] -----------------> [3, 7, 1, 0, 2, 7, 3, 6]
//
// Input: [a, b, c, d, e, g, f, h] -PSHUFD[0,2,1,3]-> [a, b, e, g, c, d, f, h]
// Mask: [3, 7, 1, 0, 2, 7, 3, 6] -----------------> [5, 7, 1, 0, 4, 7, 5, 6]
//
// The result is fine to be handled by the generic logic.
auto balanceSides = [&](ArrayRef<int> AToAInputs, ArrayRef<int> BToAInputs,
ArrayRef<int> BToBInputs, ArrayRef<int> AToBInputs,
int AOffset, int BOffset) {
assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) &&
"Must call this with A having 3 or 1 inputs from the A half.");
assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) &&
"Must call this with B having 1 or 3 inputs from the B half.");
assert(AToAInputs.size() + BToAInputs.size() == 4 &&
"Must call this with either 3:1 or 1:3 inputs (summing to 4).");
bool ThreeAInputs = AToAInputs.size() == 3;
// Compute the index of dword with only one word among the three inputs in
// a half by taking the sum of the half with three inputs and subtracting
// the sum of the actual three inputs. The difference is the remaining
// slot.
int ADWord, BDWord;
int &TripleDWord = ThreeAInputs ? ADWord : BDWord;
int &OneInputDWord = ThreeAInputs ? BDWord : ADWord;
int TripleInputOffset = ThreeAInputs ? AOffset : BOffset;
ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs;
int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
int TripleNonInputIdx =
TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
TripleDWord = TripleNonInputIdx / 2;
// We use xor with one to compute the adjacent DWord to whichever one the
// OneInput is in.
OneInputDWord = (OneInput / 2) ^ 1;
// Check for one tricky case: We're fixing a 3<-1 or a 1<-3 shuffle for AToA
// and BToA inputs. If there is also such a problem with the BToB and AToB
// inputs, we don't try to fix it necessarily -- we'll recurse and see it in
// the next pass. However, if we have a 2<-2 in the BToB and AToB inputs, it
// is essential that we don't *create* a 3<-1 as then we might oscillate.
if (BToBInputs.size() == 2 && AToBInputs.size() == 2) {
// Compute how many inputs will be flipped by swapping these DWords. We
// need
// to balance this to ensure we don't form a 3-1 shuffle in the other
// half.
int NumFlippedAToBInputs =
std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) +
std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1);
int NumFlippedBToBInputs =
std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) +
std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1);
if ((NumFlippedAToBInputs == 1 &&
(NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) ||
(NumFlippedBToBInputs == 1 &&
(NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) {
// We choose whether to fix the A half or B half based on whether that
// half has zero flipped inputs. At zero, we may not be able to fix it
// with that half. We also bias towards fixing the B half because that
// will more commonly be the high half, and we have to bias one way.
auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord,
ArrayRef<int> Inputs) {
int FixIdx = PinnedIdx ^ 1; // The adjacent slot to the pinned slot.
bool IsFixIdxInput = is_contained(Inputs, PinnedIdx ^ 1);
// Determine whether the free index is in the flipped dword or the
// unflipped dword based on where the pinned index is. We use this bit
// in an xor to conditionally select the adjacent dword.
int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord));
bool IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx);
if (IsFixIdxInput == IsFixFreeIdxInput)
FixFreeIdx += 1;
IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx);
assert(IsFixIdxInput != IsFixFreeIdxInput &&
"We need to be changing the number of flipped inputs!");
int PSHUFHalfMask[] = {0, 1, 2, 3};
std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]);
V = DAG.getNode(
FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL,
MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V,
getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
for (int &M : Mask)
if (M >= 0 && M == FixIdx)
M = FixFreeIdx;
else if (M >= 0 && M == FixFreeIdx)
M = FixIdx;
};
if (NumFlippedBToBInputs != 0) {
int BPinnedIdx =
BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
} else {
assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!");
int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput;
FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
}
}
}
int PSHUFDMask[] = {0, 1, 2, 3};
PSHUFDMask[ADWord] = BDWord;
PSHUFDMask[BDWord] = ADWord;
V = DAG.getBitcast(
VT,
DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
// Adjust the mask to match the new locations of A and B.
for (int &M : Mask)
if (M >= 0 && M/2 == ADWord)
M = 2 * BDWord + M % 2;
else if (M >= 0 && M/2 == BDWord)
M = 2 * ADWord + M % 2;
// Recurse back into this routine to re-compute state now that this isn't
// a 3 and 1 problem.
return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget,
DAG);
};
if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3))
return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0);
// At this point there are at most two inputs to the low and high halves from
// each half. That means the inputs can always be grouped into dwords and
// those dwords can then be moved to the correct half with a dword shuffle.
// We use at most one low and one high word shuffle to collect these paired
// inputs into dwords, and finally a dword shuffle to place them.
int PSHUFLMask[4] = {-1, -1, -1, -1};
int PSHUFHMask[4] = {-1, -1, -1, -1};
int PSHUFDMask[4] = {-1, -1, -1, -1};
// First fix the masks for all the inputs that are staying in their
// original halves. This will then dictate the targets of the cross-half
// shuffles.
auto fixInPlaceInputs =
[&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs,
MutableArrayRef<int> SourceHalfMask,
MutableArrayRef<int> HalfMask, int HalfOffset) {
if (InPlaceInputs.empty())
return;
if (InPlaceInputs.size() == 1) {
SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
InPlaceInputs[0] - HalfOffset;
PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
return;
}
if (IncomingInputs.empty()) {
// Just fix all of the in place inputs.
for (int Input : InPlaceInputs) {
SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
PSHUFDMask[Input / 2] = Input / 2;
}
return;
}
assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
InPlaceInputs[0] - HalfOffset;
// Put the second input next to the first so that they are packed into
// a dword. We find the adjacent index by toggling the low bit.
int AdjIndex = InPlaceInputs[0] ^ 1;
SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex);
PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
};
fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0);
fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4);
// Now gather the cross-half inputs and place them into a free dword of
// their target half.
// FIXME: This operation could almost certainly be simplified dramatically to
// look more like the 3-1 fixing operation.
auto moveInputsToRightHalf = [&PSHUFDMask](
MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset,
int DestOffset) {
auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word;
};
auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask,
int Word) {
int LowWord = Word & ~1;
int HighWord = Word | 1;
return isWordClobbered(SourceHalfMask, LowWord) ||
isWordClobbered(SourceHalfMask, HighWord);
};
if (IncomingInputs.empty())
return;
if (ExistingInputs.empty()) {
// Map any dwords with inputs from them into the right half.
for (int Input : IncomingInputs) {
// If the source half mask maps over the inputs, turn those into
// swaps and use the swapped lane.
if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) {
if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) {
SourceHalfMask[SourceHalfMask[Input - SourceOffset]] =
Input - SourceOffset;
// We have to swap the uses in our half mask in one sweep.
for (int &M : HalfMask)
if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset)
M = Input;
else if (M == Input)
M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
} else {
assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] ==
Input - SourceOffset &&
"Previous placement doesn't match!");
}
// Note that this correctly re-maps both when we do a swap and when
// we observe the other side of the swap above. We rely on that to
// avoid swapping the members of the input list directly.
Input = SourceHalfMask[Input - SourceOffset] + SourceOffset;
}
// Map the input's dword into the correct half.
if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0)
PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2;
else
assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] ==
Input / 2 &&
"Previous placement doesn't match!");
}
// And just directly shift any other-half mask elements to be same-half
// as we will have mirrored the dword containing the element into the
// same position within that half.
for (int &M : HalfMask)
if (M >= SourceOffset && M < SourceOffset + 4) {
M = M - SourceOffset + DestOffset;
assert(M >= 0 && "This should never wrap below zero!");
}
return;
}
// Ensure we have the input in a viable dword of its current half. This
// is particularly tricky because the original position may be clobbered
// by inputs being moved and *staying* in that half.
if (IncomingInputs.size() == 1) {
if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
int InputFixed = find(SourceHalfMask, -1) - std::begin(SourceHalfMask) +
SourceOffset;
SourceHalfMask[InputFixed - SourceOffset] =
IncomingInputs[0] - SourceOffset;
std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0],
InputFixed);
IncomingInputs[0] = InputFixed;
}
} else if (IncomingInputs.size() == 2) {
if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 ||
isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
// We have two non-adjacent or clobbered inputs we need to extract from
// the source half. To do this, we need to map them into some adjacent
// dword slot in the source mask.
int InputsFixed[2] = {IncomingInputs[0] - SourceOffset,
IncomingInputs[1] - SourceOffset};
// If there is a free slot in the source half mask adjacent to one of
// the inputs, place the other input in it. We use (Index XOR 1) to
// compute an adjacent index.
if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) &&
SourceHalfMask[InputsFixed[0] ^ 1] < 0) {
SourceHalfMask[InputsFixed[0]] = InputsFixed[0];
SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
InputsFixed[1] = InputsFixed[0] ^ 1;
} else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) &&
SourceHalfMask[InputsFixed[1] ^ 1] < 0) {
SourceHalfMask[InputsFixed[1]] = InputsFixed[1];
SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0];
InputsFixed[0] = InputsFixed[1] ^ 1;
} else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] < 0 &&
SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] < 0) {
// The two inputs are in the same DWord but it is clobbered and the
// adjacent DWord isn't used at all. Move both inputs to the free
// slot.
SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0];
SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1];
InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1);
InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1;
} else {
// The only way we hit this point is if there is no clobbering
// (because there are no off-half inputs to this half) and there is no
// free slot adjacent to one of the inputs. In this case, we have to
// swap an input with a non-input.
for (int i = 0; i < 4; ++i)
assert((SourceHalfMask[i] < 0 || SourceHalfMask[i] == i) &&
"We can't handle any clobbers here!");
assert(InputsFixed[1] != (InputsFixed[0] ^ 1) &&
"Cannot have adjacent inputs here!");
SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1;
// We also have to update the final source mask in this case because
// it may need to undo the above swap.
for (int &M : FinalSourceHalfMask)
if (M == (InputsFixed[0] ^ 1) + SourceOffset)
M = InputsFixed[1] + SourceOffset;
else if (M == InputsFixed[1] + SourceOffset)
M = (InputsFixed[0] ^ 1) + SourceOffset;
InputsFixed[1] = InputsFixed[0] ^ 1;
}
// Point everything at the fixed inputs.
for (int &M : HalfMask)
if (M == IncomingInputs[0])
M = InputsFixed[0] + SourceOffset;
else if (M == IncomingInputs[1])
M = InputsFixed[1] + SourceOffset;
IncomingInputs[0] = InputsFixed[0] + SourceOffset;
IncomingInputs[1] = InputsFixed[1] + SourceOffset;
}
} else {
llvm_unreachable("Unhandled input size!");
}
// Now hoist the DWord down to the right half.
int FreeDWord = (PSHUFDMask[DestOffset / 2] < 0 ? 0 : 1) + DestOffset / 2;
assert(PSHUFDMask[FreeDWord] < 0 && "DWord not free");
PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
for (int &M : HalfMask)
for (int Input : IncomingInputs)
if (M == Input)
M = FreeDWord * 2 + Input % 2;
};
moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask,
/*SourceOffset*/ 4, /*DestOffset*/ 0);
moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask,
/*SourceOffset*/ 0, /*DestOffset*/ 4);
// Now enact all the shuffles we've computed to move the inputs into their
// target half.
if (!isNoopShuffleMask(PSHUFLMask))
V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
getV4X86ShuffleImm8ForMask(PSHUFLMask, DL, DAG));
if (!isNoopShuffleMask(PSHUFHMask))
V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG));
if (!isNoopShuffleMask(PSHUFDMask))
V = DAG.getBitcast(
VT,
DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
// At this point, each half should contain all its inputs, and we can then
// just shuffle them into their final position.
assert(count_if(LoMask, [](int M) { return M >= 4; }) == 0 &&
"Failed to lift all the high half inputs to the low mask!");
assert(count_if(HiMask, [](int M) { return M >= 0 && M < 4; }) == 0 &&
"Failed to lift all the low half inputs to the high mask!");
// Do a half shuffle for the low mask.
if (!isNoopShuffleMask(LoMask))
V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
getV4X86ShuffleImm8ForMask(LoMask, DL, DAG));
// Do a half shuffle with the high mask after shifting its values down.
for (int &M : HiMask)
if (M >= 0)
M -= 4;
if (!isNoopShuffleMask(HiMask))
V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
getV4X86ShuffleImm8ForMask(HiMask, DL, DAG));
return V;
}
/// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
/// blend if only one input is used.
static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse,
bool &V2InUse) {
SDValue V1Mask[16];
SDValue V2Mask[16];
V1InUse = false;
V2InUse = false;
int Size = Mask.size();
int Scale = 16 / Size;
for (int i = 0; i < 16; ++i) {
if (Mask[i / Scale] < 0) {
V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
} else {
const int ZeroMask = 0x80;
int V1Idx = Mask[i / Scale] < Size ? Mask[i / Scale] * Scale + i % Scale
: ZeroMask;
int V2Idx = Mask[i / Scale] < Size
? ZeroMask
: (Mask[i / Scale] - Size) * Scale + i % Scale;
if (Zeroable[i / Scale])
V1Idx = V2Idx = ZeroMask;
V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8);
V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8);
V1InUse |= (ZeroMask != V1Idx);
V2InUse |= (ZeroMask != V2Idx);
}
}
if (V1InUse)
V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
DAG.getBitcast(MVT::v16i8, V1),
DAG.getBuildVector(MVT::v16i8, DL, V1Mask));
if (V2InUse)
V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
DAG.getBitcast(MVT::v16i8, V2),
DAG.getBuildVector(MVT::v16i8, DL, V2Mask));
// If we need shuffled inputs from both, blend the two.
SDValue V;
if (V1InUse && V2InUse)
V = DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
else
V = V1InUse ? V1 : V2;
// Cast the result back to the correct type.
return DAG.getBitcast(VT, V);
}
/// \brief Generic lowering of 8-lane i16 shuffles.
///
/// This handles both single-input shuffles and combined shuffle/blends with
/// two inputs. The single input shuffles are immediately delegated to
/// a dedicated lowering routine.
///
/// The blends are lowered in one of three fundamental ways. If there are few
/// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle
/// of the input is significantly cheaper when lowered as an interleaving of
/// the two inputs, try to interleave them. Otherwise, blend the low and high
/// halves of the inputs separately (making them have relatively few inputs)
/// and then concatenate them.
static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });
if (NumV2Inputs == 0) {
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
return V;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1,
Mask, Subtarget, DAG))
return Rotate;
// Make a copy of the mask so it can be modified.
SmallVector<int, 8> MutableMask(Mask.begin(), Mask.end());
return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1,
MutableMask, Subtarget,
DAG);
}
assert(llvm::any_of(Mask, [](int M) { return M >= 0 && M < 8; }) &&
"All single-input shuffles should be canonicalized to be V1-input "
"shuffles.");
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
Zeroable, DAG))
return V;
// There are special ways we can lower some single-element blends.
if (NumV2Inputs == 1)
if (SDValue V = lowerVectorShuffleAsElementInsertion(
DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
return V;
// We have different paths for blend lowering, but they all must use the
// *exact* same predicate.
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
Zeroable, DAG))
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
return V;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
return Rotate;
if (SDValue BitBlend =
lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
return BitBlend;
// Try to lower by permuting the inputs into an unpack instruction.
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1,
V2, Mask, DAG))
return Unpack;
// If we can't directly blend but can use PSHUFB, that will be better as it
// can both shuffle and set up the inefficient blend.
if (!IsBlendSupported && Subtarget.hasSSSE3()) {
bool V1InUse, V2InUse;
return lowerVectorShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
Zeroable, DAG, V1InUse, V2InUse);
}
// We can always bit-blend if we have to so the fallback strategy is to
// decompose into single-input permutes and blends.
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
Mask, DAG);
}
/// \brief Check whether a compaction lowering can be done by dropping even
/// elements and compute how many times even elements must be dropped.
///
/// This handles shuffles which take every Nth element where N is a power of
/// two. Example shuffle masks:
///
/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14
/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
/// N = 2: 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12
/// N = 2: 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28
/// N = 3: 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8
/// N = 3: 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24
///
/// Any of these lanes can of course be undef.
///
/// This routine only supports N <= 3.
/// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here
/// for larger N.
///
/// \returns N above, or the number of times even elements must be dropped if
/// there is such a number. Otherwise returns zero.
static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
bool IsSingleInput) {
// The modulus for the shuffle vector entries is based on whether this is
// a single input or not.
int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
assert(isPowerOf2_32((uint32_t)ShuffleModulus) &&
"We should only be called with masks with a power-of-2 size!");
uint64_t ModMask = (uint64_t)ShuffleModulus - 1;
// We track whether the input is viable for all power-of-2 strides 2^1, 2^2,
// and 2^3 simultaneously. This is because we may have ambiguity with
// partially undef inputs.
bool ViableForN[3] = {true, true, true};
for (int i = 0, e = Mask.size(); i < e; ++i) {
// Ignore undef lanes, we'll optimistically collapse them to the pattern we
// want.
if (Mask[i] < 0)
continue;
bool IsAnyViable = false;
for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
if (ViableForN[j]) {
uint64_t N = j + 1;
// The shuffle mask must be equal to (i * 2^N) % M.
if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask))
IsAnyViable = true;
else
ViableForN[j] = false;
}
// Early exit if we exhaust the possible powers of two.
if (!IsAnyViable)
break;
}
for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
if (ViableForN[j])
return j + 1;
// Return 0 as there is no viable power of two.
return 0;
}
/// \brief Generic lowering of v16i8 shuffles.
///
/// This is a hybrid strategy to lower v16i8 vectors. It first attempts to
/// detect any complexity reducing interleaving. If that doesn't help, it uses
/// UNPCK to spread the i8 elements across two i16-element vectors, and uses
/// the existing lowering for v8i16 blends on each half, finally PACK-ing them
/// back together.
static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return Rotate;
// Try to use a zext lowering.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
Zeroable, DAG))
return V;
int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });
// For single-input shuffles, there are some nicer lowering tricks we can use.
if (NumV2Elements == 0) {
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Check whether we can widen this to an i16 shuffle by duplicating bytes.
// Notably, this handles splat and partial-splat shuffles more efficiently.
// However, it only makes sense if the pre-duplication shuffle simplifies
// things significantly. Currently, this means we need to be able to
// express the pre-duplication shuffle as an i16 shuffle.
//
// FIXME: We should check for other patterns which can be widened into an
// i16 shuffle as well.
auto canWidenViaDuplication = [](ArrayRef<int> Mask) {
for (int i = 0; i < 16; i += 2)
if (Mask[i] >= 0 && Mask[i + 1] >= 0 && Mask[i] != Mask[i + 1])
return false;
return true;
};
auto tryToWidenViaDuplication = [&]() -> SDValue {
if (!canWidenViaDuplication(Mask))
return SDValue();
SmallVector<int, 4> LoInputs;
copy_if(Mask, std::back_inserter(LoInputs),
[](int M) { return M >= 0 && M < 8; });
std::sort(LoInputs.begin(), LoInputs.end());
LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()),
LoInputs.end());
SmallVector<int, 4> HiInputs;
copy_if(Mask, std::back_inserter(HiInputs), [](int M) { return M >= 8; });
std::sort(HiInputs.begin(), HiInputs.end());
HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()),
HiInputs.end());
bool TargetLo = LoInputs.size() >= HiInputs.size();
ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs;
ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs;
int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
SmallDenseMap<int, int, 8> LaneMap;
for (int I : InPlaceInputs) {
PreDupI16Shuffle[I/2] = I/2;
LaneMap[I] = I;
}
int j = TargetLo ? 0 : 4, je = j + 4;
for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) {
// Check if j is already a shuffle of this input. This happens when
// there are two adjacent bytes after we move the low one.
if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) {
// If we haven't yet mapped the input, search for a slot into which
// we can map it.
while (j < je && PreDupI16Shuffle[j] >= 0)
++j;
if (j == je)
// We can't place the inputs into a single half with a simple i16 shuffle, so bail.
return SDValue();
// Map this input with the i16 shuffle.
PreDupI16Shuffle[j] = MovingInputs[i] / 2;
}
// Update the lane map based on the mapping we ended up with.
LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
}
V1 = DAG.getBitcast(
MVT::v16i8,
DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle));
// Unpack the bytes to form the i16s that will be shuffled into place.
V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
MVT::v16i8, V1, V1);
int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
for (int i = 0; i < 16; ++i)
if (Mask[i] >= 0) {
int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
assert(MappedMask < 8 && "Invalid v8 shuffle mask!");
if (PostDupI16Shuffle[i / 2] < 0)
PostDupI16Shuffle[i / 2] = MappedMask;
else
assert(PostDupI16Shuffle[i / 2] == MappedMask &&
"Conflicting entries in the original shuffle!");
}
return DAG.getBitcast(
MVT::v16i8,
DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle));
};
if (SDValue V = tryToWidenViaDuplication())
return V;
}
if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
Zeroable, DAG))
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
return V;
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
// with PSHUFB. It is important to do this before we attempt to generate any
// blends but after all of the single-input lowerings. If the single input
// lowerings can find an instruction sequence that is faster than a PSHUFB, we
// want to preserve that and we can DAG combine any longer sequences into
// a PSHUFB in the end. But once we start blending from multiple inputs,
// the complexity of DAG combining bad patterns back into PSHUFB is too high,
// and there are *very* few patterns that would actually be faster than the
// PSHUFB approach because of its ability to zero lanes.
//
// FIXME: The only exceptions to the above are blends which are exact
// interleavings with direct instructions supporting them. We currently don't
// handle those well here.
if (Subtarget.hasSSSE3()) {
bool V1InUse = false;
bool V2InUse = false;
SDValue PSHUFB = lowerVectorShuffleAsBlendOfPSHUFBs(
DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse);
// If both V1 and V2 are in use and we can use a direct blend or an unpack,
// do so. This avoids using them to handle blends-with-zero which is
// important as a single pshufb is significantly faster for that.
if (V1InUse && V2InUse) {
if (Subtarget.hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(
DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Blend;
// We can use an unpack to do the blending rather than an or in some
// cases. Even though the or may be (very minorly) more efficient, we
// preference this lowering because there are common cases where part of
// the complexity of the shuffles goes away when we do the final blend as
// an unpack.
// FIXME: It might be worth trying to detect if the unpack-feeding
// shuffles will both be pshufb, in which case we shouldn't bother with
// this.
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
DL, MVT::v16i8, V1, V2, Mask, DAG))
return Unpack;
}
return PSHUFB;
}
// There are special ways we can lower some single-element blends.
if (NumV2Elements == 1)
if (SDValue V = lowerVectorShuffleAsElementInsertion(
DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
return V;
if (SDValue BitBlend =
lowerVectorShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
return BitBlend;
// Check whether a compaction lowering can be done. This handles shuffles
// which take every Nth element for some even N. See the helper function for
// details.
//
// We special case these as they can be particularly efficiently handled with
// the PACKUSB instruction on x86 and they show up in common patterns of
// rearranging bytes to truncate wide elements.
bool IsSingleInput = V2.isUndef();
if (int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput)) {
// NumEvenDrops is the power of two stride of the elements. Another way of
// thinking about it is that we need to drop the even elements this many
// times to get the original input.
// First we need to zero all the dropped bytes.
assert(NumEvenDrops <= 3 &&
"No support for dropping even elements more than 3 times.");
// We use the mask type to pick which bytes are preserved based on how many
// elements are dropped.
MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
SDValue ByteClearMask = DAG.getBitcast(
MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1]));
V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
if (!IsSingleInput)
V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);
// Now pack things back together.
V1 = DAG.getBitcast(MVT::v8i16, V1);
V2 = IsSingleInput ? V1 : DAG.getBitcast(MVT::v8i16, V2);
SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2);
for (int i = 1; i < NumEvenDrops; ++i) {
Result = DAG.getBitcast(MVT::v8i16, Result);
Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result);
}
return Result;
}
// Handle multi-input cases by blending single-input shuffles.
if (NumV2Elements > 0)
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2,
Mask, DAG);
// The fallback path for single-input shuffles widens this into two v8i16
// vectors with unpacks, shuffles those, and then pulls them back together
// with a pack.
SDValue V = V1;
std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
for (int i = 0; i < 16; ++i)
if (Mask[i] >= 0)
(i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i];
SDValue VLoHalf, VHiHalf;
// Check if any of the odd lanes in the v16i8 are used. If not, we can mask
// them out and avoid using UNPCK{L,H} to extract the elements of V as
// i16s.
if (none_of(LoBlendMask, [](int M) { return M >= 0 && M % 2 == 1; }) &&
none_of(HiBlendMask, [](int M) { return M >= 0 && M % 2 == 1; })) {
// Use a mask to drop the high bytes.
VLoHalf = DAG.getBitcast(MVT::v8i16, V);
VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf,
DAG.getConstant(0x00FF, DL, MVT::v8i16));
// This will be a single vector shuffle instead of a blend so nuke VHiHalf.
VHiHalf = DAG.getUNDEF(MVT::v8i16);
// Squash the masks to point directly into VLoHalf.
for (int &M : LoBlendMask)
if (M >= 0)
M /= 2;
for (int &M : HiBlendMask)
if (M >= 0)
M /= 2;
} else {
// Otherwise just unpack the low half of V into VLoHalf and the high half into
// VHiHalf so that we can blend them as i16s.
SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL);
VLoHalf = DAG.getBitcast(
MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
VHiHalf = DAG.getBitcast(
MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
}
SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);
return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
}
/// \brief Dispatching routine to lower various 128-bit x86 vector shuffles.
///
/// This routine breaks down the specific type of 128-bit shuffle and
/// dispatches to the lowering routines accordingly.
static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
switch (VT.SimpleTy) {
case MVT::v2i64:
return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v2f64:
return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4i32:
return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4f32:
return lowerV4F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i16:
return lowerV8I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i8:
return lowerV16I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
default:
llvm_unreachable("Unimplemented!");
}
}
/// \brief Generic routine to split vector shuffle into half-sized shuffles.
///
/// This routine just extracts two subvectors, shuffles them independently, and
/// then concatenates them back together. This should work effectively with all
/// AVX vector shuffle types.
static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(VT.getSizeInBits() >= 256 &&
"Only for 256-bit or wider vector shuffles!");
assert(V1.getSimpleValueType() == VT && "Bad operand type!");
assert(V2.getSimpleValueType() == VT && "Bad operand type!");
ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2);
ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2);
int NumElements = VT.getVectorNumElements();
int SplitNumElements = NumElements / 2;
MVT ScalarVT = VT.getVectorElementType();
MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
// Rather than splitting build-vectors, just build two narrower build
// vectors. This helps shuffling with splats and zeros.
auto SplitVector = [&](SDValue V) {
V = peekThroughBitcasts(V);
MVT OrigVT = V.getSimpleValueType();
int OrigNumElements = OrigVT.getVectorNumElements();
int OrigSplitNumElements = OrigNumElements / 2;
MVT OrigScalarVT = OrigVT.getVectorElementType();
MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2);
SDValue LoV, HiV;
auto *BV = dyn_cast<BuildVectorSDNode>(V);
if (!BV) {
LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
DAG.getIntPtrConstant(0, DL));
HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
DAG.getIntPtrConstant(OrigSplitNumElements, DL));
} else {
SmallVector<SDValue, 16> LoOps, HiOps;
for (int i = 0; i < OrigSplitNumElements; ++i) {
LoOps.push_back(BV->getOperand(i));
HiOps.push_back(BV->getOperand(i + OrigSplitNumElements));
}
LoV = DAG.getBuildVector(OrigSplitVT, DL, LoOps);
HiV = DAG.getBuildVector(OrigSplitVT, DL, HiOps);
}
return std::make_pair(DAG.getBitcast(SplitVT, LoV),
DAG.getBitcast(SplitVT, HiV));
};
SDValue LoV1, HiV1, LoV2, HiV2;
std::tie(LoV1, HiV1) = SplitVector(V1);
std::tie(LoV2, HiV2) = SplitVector(V2);
// Now create two 4-way blends of these half-width vectors.
auto HalfBlend = [&](ArrayRef<int> HalfMask) {
bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false;
SmallVector<int, 32> V1BlendMask((unsigned)SplitNumElements, -1);
SmallVector<int, 32> V2BlendMask((unsigned)SplitNumElements, -1);
SmallVector<int, 32> BlendMask((unsigned)SplitNumElements, -1);
for (int i = 0; i < SplitNumElements; ++i) {
int M = HalfMask[i];
if (M >= NumElements) {
if (M >= NumElements + SplitNumElements)
UseHiV2 = true;
else
UseLoV2 = true;
V2BlendMask[i] = M - NumElements;
BlendMask[i] = SplitNumElements + i;
} else if (M >= 0) {
if (M >= SplitNumElements)
UseHiV1 = true;
else
UseLoV1 = true;
V1BlendMask[i] = M;
BlendMask[i] = i;
}
}
// Because the lowering happens after all combining takes place, we need to
// manually combine these blend masks as much as possible so that we create
// a minimal number of high-level vector shuffle nodes.
// First try just blending the halves of V1 or V2.
if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2)
return DAG.getUNDEF(SplitVT);
if (!UseLoV2 && !UseHiV2)
return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
if (!UseLoV1 && !UseHiV1)
return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
SDValue V1Blend, V2Blend;
if (UseLoV1 && UseHiV1) {
V1Blend =
DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
} else {
// We only use half of V1 so map the usage down into the final blend mask.
V1Blend = UseLoV1 ? LoV1 : HiV1;
for (int i = 0; i < SplitNumElements; ++i)
if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements)
BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);
}
if (UseLoV2 && UseHiV2) {
V2Blend =
DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
} else {
// We only use half of V2 so map the usage down into the final blend mask.
V2Blend = UseLoV2 ? LoV2 : HiV2;
for (int i = 0; i < SplitNumElements; ++i)
if (BlendMask[i] >= SplitNumElements)
BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0);
}
return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask);
};
SDValue Lo = HalfBlend(LoMask);
SDValue Hi = HalfBlend(HiMask);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
}
/// \brief Either split a vector in halves or decompose the shuffles and the
/// blend.
///
/// This is provided as a good fallback for many lowerings of non-single-input
/// shuffles with more than one 128-bit lane. In those cases, we want to select
/// between splitting the shuffle into 128-bit components and stitching those
/// back together vs. extracting the single-input shuffles and blending those
/// results.
static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(!V2.isUndef() && "This routine must not be used to lower single-input "
"shuffles as it could then recurse on itself.");
int Size = Mask.size();
// If this can be modeled as a broadcast of two elements followed by a blend,
// prefer that lowering. This is especially important because broadcasts can
// often fold with memory operands.
auto DoBothBroadcast = [&] {
int V1BroadcastIdx = -1, V2BroadcastIdx = -1;
for (int M : Mask)
if (M >= Size) {
if (V2BroadcastIdx < 0)
V2BroadcastIdx = M - Size;
else if (M - Size != V2BroadcastIdx)
return false;
} else if (M >= 0) {
if (V1BroadcastIdx < 0)
V1BroadcastIdx = M;
else if (M != V1BroadcastIdx)
return false;
}
return true;
};
if (DoBothBroadcast())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
DAG);
// If the inputs all stem from a single 128-bit lane of each input, then we
// split them rather than blending because the split will decompose to
// unusually few instructions.
int LaneCount = VT.getSizeInBits() / 128;
int LaneSize = Size / LaneCount;
SmallBitVector LaneInputs[2];
LaneInputs[0].resize(LaneCount, false);
LaneInputs[1].resize(LaneCount, false);
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0)
LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true;
if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1)
return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
// Otherwise, just fall back to decomposed shuffles and a blend. This requires
// that the decomposed single-input shuffles don't end up here.
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
}
/// \brief Lower a vector shuffle crossing multiple 128-bit lanes as
/// a permutation and blend of those lanes.
///
/// This essentially blends the out-of-lane inputs to each lane into the lane
/// from a permuted copy of the vector. This lowering strategy results in four
/// instructions in the worst case for a single-input cross lane shuffle which
/// is lower than any other fully general cross-lane shuffle strategy I'm aware
/// of. Special cases for each particular shuffle pattern should be handled
/// prior to trying this lowering.
static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
// FIXME: This should probably be generalized for 512-bit vectors as well.
assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
int Size = Mask.size();
int LaneSize = Size / 2;
// If there are only inputs from one 128-bit lane, splitting will in fact be
// less expensive. The flags track whether the given lane contains an element
// that crosses to another lane.
bool LaneCrossing[2] = {false, false};
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
if (!LaneCrossing[0] || !LaneCrossing[1])
return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
assert(V2.isUndef() &&
"This last part of this routine only works on single input shuffles");
SmallVector<int, 32> FlippedBlendMask(Size);
for (int i = 0; i < Size; ++i)
FlippedBlendMask[i] =
Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize)
? Mask[i]
: Mask[i] % LaneSize +
(i / LaneSize) * LaneSize + Size);
// Flip the vector, and blend the results which should now be in-lane. The
// VPERM2X128 mask uses the low 2 bits for the low source and bits 4 and
// 5 for the high source. The value 3 selects the high half of source 2 and
// the value 2 selects the low half of source 2. We only use source 2 to
// allow folding it into a memory operand.
unsigned PERMMask = 3 | 2 << 4;
SDValue Flipped = DAG.getNode(X86ISD::VPERM2X128, DL, VT, DAG.getUNDEF(VT),
V1, DAG.getConstant(PERMMask, DL, MVT::i8));
return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask);
}
/// \brief Handle lowering 2-lane 128-bit shuffles.
static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SmallVector<int, 4> WidenedMask;
if (!canWidenShuffleElements(Mask, WidenedMask))
return SDValue();
// TODO: If minimizing size and one of the inputs is a zero vector and the
// the zero vector has only one use, we could use a VPERM2X128 to save the
// instruction bytes needed to explicitly generate the zero vector.
// Blends are faster and handle all the non-lane-crossing cases.
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
bool IsV2Zero = ISD::isBuildVectorAllZeros(V2.getNode());
// If either input operand is a zero vector, use VPERM2X128 because its mask
// allows us to replace the zero input with an implicit zero.
if (!IsV1Zero && !IsV2Zero) {
// Check for patterns which can be matched with a single insert of a 128-bit
// subvector.
bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
// With AVX2, use VPERMQ/VPERMPD to allow memory folding.
if (Subtarget.hasAVX2() && V2.isUndef())
return SDValue();
// With AVX1, use vperm2f128 (below) to allow load folding. Otherwise,
// this will likely become vinsertf128 which can't fold a 256-bit memop.
if (!isa<LoadSDNode>(peekThroughBitcasts(V1))) {
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements() / 2);
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0, DL));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
OnlyUsesV1 ? V1 : V2,
DAG.getIntPtrConstant(0, DL));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
}
}
// Otherwise form a 128-bit permutation. After accounting for undefs,
// convert the 64-bit shuffle mask selection values into 128-bit
// selection bits by dividing the indexes by 2 and shifting into positions
// defined by a vperm2*128 instruction's immediate control byte.
// The immediate permute control byte looks like this:
// [1:0] - select 128 bits from sources for low half of destination
// [2] - ignore
// [3] - zero low half of destination
// [5:4] - select 128 bits from sources for high half of destination
// [6] - ignore
// [7] - zero high half of destination
int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0];
int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1];
unsigned PermMask = MaskLO | (MaskHI << 4);
// If either input is a zero vector, replace it with an undef input.
// Shuffle mask values < 4 are selecting elements of V1.
// Shuffle mask values >= 4 are selecting elements of V2.
// Adjust each half of the permute mask by clearing the half that was
// selecting the zero vector and setting the zero mask bit.
if (IsV1Zero) {
V1 = DAG.getUNDEF(VT);
if (MaskLO < 2)
PermMask = (PermMask & 0xf0) | 0x08;
if (MaskHI < 2)
PermMask = (PermMask & 0x0f) | 0x80;
}
if (IsV2Zero) {
V2 = DAG.getUNDEF(VT);
if (MaskLO >= 2)
PermMask = (PermMask & 0xf0) | 0x08;
if (MaskHI >= 2)
PermMask = (PermMask & 0x0f) | 0x80;
}
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
DAG.getConstant(PermMask, DL, MVT::i8));
}
/// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
/// shuffling each lane.
///
/// This will only succeed when the result of fixing the 128-bit lanes results
/// in a single-input non-lane-crossing shuffle with a repeating shuffle mask in
/// each 128-bit lanes. This handles many cases where we can quickly blend away
/// the lane crosses early and then use simpler shuffles within each lane.
///
/// FIXME: It might be worthwhile at some point to support this without
/// requiring the 128-bit lane-relative shuffles to be repeating, but currently
/// in x86 only floating point has interesting non-repeating shuffles, and even
/// those are still *marginally* more expensive.
static SDValue lowerVectorShuffleByMerging128BitLanes(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(!V2.isUndef() && "This is only useful with multiple inputs.");
int Size = Mask.size();
int LaneSize = 128 / VT.getScalarSizeInBits();
int NumLanes = Size / LaneSize;
assert(NumLanes > 1 && "Only handles 256-bit and wider shuffles.");
// See if we can build a hypothetical 128-bit lane-fixing shuffle mask. Also
// check whether the in-128-bit lane shuffles share a repeating pattern.
SmallVector<int, 4> Lanes((unsigned)NumLanes, -1);
SmallVector<int, 4> InLaneMask((unsigned)LaneSize, -1);
for (int i = 0; i < Size; ++i) {
if (Mask[i] < 0)
continue;
int j = i / LaneSize;
if (Lanes[j] < 0) {
// First entry we've seen for this lane.
Lanes[j] = Mask[i] / LaneSize;
} else if (Lanes[j] != Mask[i] / LaneSize) {
// This doesn't match the lane selected previously!
return SDValue();
}
// Check that within each lane we have a consistent shuffle mask.
int k = i % LaneSize;
if (InLaneMask[k] < 0) {
InLaneMask[k] = Mask[i] % LaneSize;
} else if (InLaneMask[k] != Mask[i] % LaneSize) {
// This doesn't fit a repeating in-lane mask.
return SDValue();
}
}
// First shuffle the lanes into place.
MVT LaneVT = MVT::getVectorVT(VT.isFloatingPoint() ? MVT::f64 : MVT::i64,
VT.getSizeInBits() / 64);
SmallVector<int, 8> LaneMask((unsigned)NumLanes * 2, -1);
for (int i = 0; i < NumLanes; ++i)
if (Lanes[i] >= 0) {
LaneMask[2 * i + 0] = 2*Lanes[i] + 0;
LaneMask[2 * i + 1] = 2*Lanes[i] + 1;
}
V1 = DAG.getBitcast(LaneVT, V1);
V2 = DAG.getBitcast(LaneVT, V2);
SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask);
// Cast it back to the type we actually want.
LaneShuffle = DAG.getBitcast(VT, LaneShuffle);
// Now do a simple shuffle that isn't lane crossing.
SmallVector<int, 8> NewMask((unsigned)Size, -1);
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0)
NewMask[i] = (i / LaneSize) * LaneSize + Mask[i] % LaneSize;
assert(!is128BitLaneCrossingShuffleMask(VT, NewMask) &&
"Must not introduce lane crosses at this point!");
return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
}
/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
/// This allows for fast cases such as subvector extraction/insertion
/// or shuffling smaller vector types which can lower more efficiently.
static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(VT.is256BitVector() && "Expected 256-bit vector");
unsigned NumElts = VT.getVectorNumElements();
unsigned HalfNumElts = NumElts / 2;
MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts);
bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts);
if (!UndefLower && !UndefUpper)
return SDValue();
// Upper half is undef and lower half is whole upper subvector.
// e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
if (UndefUpper &&
isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
DAG.getIntPtrConstant(HalfNumElts, DL));
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
DAG.getIntPtrConstant(0, DL));
}
// Lower half is undef and upper half is whole lower subvector.
// e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
if (UndefLower &&
isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
DAG.getIntPtrConstant(0, DL));
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
DAG.getIntPtrConstant(HalfNumElts, DL));
}
// If the shuffle only uses two of the four halves of the input operands,
// then extract them and perform the 'half' shuffle at half width.
// e.g. vector_shuffle <X, X, X, X, u, u, u, u> or <X, X, u, u>
int HalfIdx1 = -1, HalfIdx2 = -1;
SmallVector<int, 8> HalfMask(HalfNumElts);
unsigned Offset = UndefLower ? HalfNumElts : 0;
for (unsigned i = 0; i != HalfNumElts; ++i) {
int M = Mask[i + Offset];
if (M < 0) {
HalfMask[i] = M;
continue;
}
// Determine which of the 4 half vectors this element is from.
// i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2.
int HalfIdx = M / HalfNumElts;
// Determine the element index into its half vector source.
int HalfElt = M % HalfNumElts;
// We can shuffle with up to 2 half vectors, set the new 'half'
// shuffle mask accordingly.
if (HalfIdx1 < 0 || HalfIdx1 == HalfIdx) {
HalfMask[i] = HalfElt;
HalfIdx1 = HalfIdx;
continue;
}
if (HalfIdx2 < 0 || HalfIdx2 == HalfIdx) {
HalfMask[i] = HalfElt + HalfNumElts;
HalfIdx2 = HalfIdx;
continue;
}
// Too many half vectors referenced.
return SDValue();
}
assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
// Only shuffle the halves of the inputs when useful.
int NumLowerHalves =
(HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2);
int NumUpperHalves =
(HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3);
// uuuuXXXX - don't extract uppers just to insert again.
if (UndefLower && NumUpperHalves != 0)
return SDValue();
// XXXXuuuu - don't extract both uppers, instead shuffle and then extract.
if (UndefUpper && NumUpperHalves == 2)
return SDValue();
// AVX2 - XXXXuuuu - always extract lowers.
if (Subtarget.hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) {
// AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
if (VT == MVT::v4f64 || VT == MVT::v4i64)
return SDValue();
// AVX2 supports variable 32-bit element cross-lane shuffles.
if (VT == MVT::v8f32 || VT == MVT::v8i32) {
// XXXXuuuu - don't extract lowers and uppers.
if (UndefUpper && NumLowerHalves != 0 && NumUpperHalves != 0)
return SDValue();
}
}
auto GetHalfVector = [&](int HalfIdx) {
if (HalfIdx < 0)
return DAG.getUNDEF(HalfVT);
SDValue V = (HalfIdx < 2 ? V1 : V2);
HalfIdx = (HalfIdx % 2) * HalfNumElts;
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
DAG.getIntPtrConstant(HalfIdx, DL));
};
SDValue Half1 = GetHalfVector(HalfIdx1);
SDValue Half2 = GetHalfVector(HalfIdx2);
SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
DAG.getIntPtrConstant(Offset, DL));
}
/// \brief Test whether the specified input (0 or 1) is in-place blended by the
/// given mask.
///
/// This returns true if the elements from a particular input are already in the
/// slot required by the given mask and require no permutation.
static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) {
assert((Input == 0 || Input == 1) && "Only two inputs to shuffles.");
int Size = Mask.size();
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0 && Mask[i] / Size == Input && Mask[i] % Size != i)
return false;
return true;
}
/// Handle case where shuffle sources are coming from the same 128-bit lane and
/// every lane can be represented as the same repeating mask - allowing us to
/// shuffle the sources with the repeating shuffle and then permute the result
/// to the destination lanes.
static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
int NumElts = VT.getVectorNumElements();
int NumLanes = VT.getSizeInBits() / 128;
int NumLaneElts = NumElts / NumLanes;
// On AVX2 we may be able to just shuffle the lowest elements and then
// broadcast the result.
if (Subtarget.hasAVX2()) {
for (unsigned BroadcastSize : {16, 32, 64}) {
if (BroadcastSize <= VT.getScalarSizeInBits())
continue;
int NumBroadcastElts = BroadcastSize / VT.getScalarSizeInBits();
// Attempt to match a repeating pattern every NumBroadcastElts,
// accounting for UNDEFs but only references the lowest 128-bit
// lane of the inputs.
auto FindRepeatingBroadcastMask = [&](SmallVectorImpl<int> &RepeatMask) {
for (int i = 0; i != NumElts; i += NumBroadcastElts)
for (int j = 0; j != NumBroadcastElts; ++j) {
int M = Mask[i + j];
if (M < 0)
continue;
int &R = RepeatMask[j];
if (0 != ((M % NumElts) / NumLaneElts))
return false;
if (0 <= R && R != M)
return false;
R = M;
}
return true;
};
SmallVector<int, 8> RepeatMask((unsigned)NumElts, -1);
if (!FindRepeatingBroadcastMask(RepeatMask))
continue;
// Shuffle the (lowest) repeated elements in place for broadcast.
SDValue RepeatShuf = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatMask);
// Shuffle the actual broadcast.
SmallVector<int, 8> BroadcastMask((unsigned)NumElts, -1);
for (int i = 0; i != NumElts; i += NumBroadcastElts)
for (int j = 0; j != NumBroadcastElts; ++j)
BroadcastMask[i + j] = j;
return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT),
BroadcastMask);
}
}
// Bail if the shuffle mask doesn't cross 128-bit lanes.
if (!is128BitLaneCrossingShuffleMask(VT, Mask))
return SDValue();
// Bail if we already have a repeated lane shuffle mask.
SmallVector<int, 8> RepeatedShuffleMask;
if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedShuffleMask))
return SDValue();
// On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes
// (with PERMQ/PERMPD), otherwise we can only permute whole 128-bit lanes.
int SubLaneScale = Subtarget.hasAVX2() && VT.is256BitVector() ? 2 : 1;
int NumSubLanes = NumLanes * SubLaneScale;
int NumSubLaneElts = NumLaneElts / SubLaneScale;
// Check that all the sources are coming from the same lane and see if we can
// form a repeating shuffle mask (local to each sub-lane). At the same time,
// determine the source sub-lane for each destination sub-lane.
int TopSrcSubLane = -1;
SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1);
SmallVector<int, 8> RepeatedSubLaneMasks[2] = {
SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef),
SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef)};
for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) {
// Extract the sub-lane mask, check that it all comes from the same lane
// and normalize the mask entries to come from the first lane.
int SrcLane = -1;
SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1);
for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
int M = Mask[(DstSubLane * NumSubLaneElts) + Elt];
if (M < 0)
continue;
int Lane = (M % NumElts) / NumLaneElts;
if ((0 <= SrcLane) && (SrcLane != Lane))
return SDValue();
SrcLane = Lane;
int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts);
SubLaneMask[Elt] = LocalM;
}
// Whole sub-lane is UNDEF.
if (SrcLane < 0)
continue;
// Attempt to match against the candidate repeated sub-lane masks.
for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) {
auto MatchMasks = [NumSubLaneElts](ArrayRef<int> M1, ArrayRef<int> M2) {
for (int i = 0; i != NumSubLaneElts; ++i) {
if (M1[i] < 0 || M2[i] < 0)
continue;
if (M1[i] != M2[i])
return false;
}
return true;
};
auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane];
if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask))
continue;
// Merge the sub-lane mask into the matching repeated sub-lane mask.
for (int i = 0; i != NumSubLaneElts; ++i) {
int M = SubLaneMask[i];
if (M < 0)
continue;
assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) &&
"Unexpected mask element");
RepeatedSubLaneMask[i] = M;
}
// Track the top most source sub-lane - by setting the remaining to UNDEF
// we can greatly simplify shuffle matching.
int SrcSubLane = (SrcLane * SubLaneScale) + SubLane;
TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane);
Dst2SrcSubLanes[DstSubLane] = SrcSubLane;
break;
}
// Bail if we failed to find a matching repeated sub-lane mask.
if (Dst2SrcSubLanes[DstSubLane] < 0)
return SDValue();
}
assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes &&
"Unexpected source lane");
// Create a repeating shuffle mask for the entire vector.
SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1);
for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) {
int Lane = SubLane / SubLaneScale;
auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale];
for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
int M = RepeatedSubLaneMask[Elt];
if (M < 0)
continue;
int Idx = (SubLane * NumSubLaneElts) + Elt;
RepeatedMask[Idx] = M + (Lane * NumLaneElts);
}
}
SDValue RepeatedShuffle = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask);
// Shuffle each source sub-lane to its destination.
SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1);
for (int i = 0; i != NumElts; i += NumSubLaneElts) {
int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts];
if (SrcSubLane < 0)
continue;
for (int j = 0; j != NumSubLaneElts; ++j)
SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts);
}
return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT),
SubLaneMask);
}
static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
unsigned &ShuffleImm,
ArrayRef<int> Mask) {
int NumElts = VT.getVectorNumElements();
assert(VT.getScalarSizeInBits() == 64 &&
(NumElts == 2 || NumElts == 4 || NumElts == 8) &&
"Unexpected data type for VSHUFPD");
// Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
// Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
ShuffleImm = 0;
bool ShufpdMask = true;
bool CommutableMask = true;
for (int i = 0; i < NumElts; ++i) {
if (Mask[i] == SM_SentinelUndef)
continue;
if (Mask[i] < 0)
return false;
int Val = (i & 6) + NumElts * (i & 1);
int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
if (Mask[i] < Val || Mask[i] > Val + 1)
ShufpdMask = false;
if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1)
CommutableMask = false;
ShuffleImm |= (Mask[i] % 2) << i;
}
if (ShufpdMask)
return true;
if (CommutableMask) {
std::swap(V1, V2);
return true;
}
return false;
}
static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&&
"Unexpected data type for VSHUFPD");
unsigned Immediate = 0;
if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
return SDValue();
return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
DAG.getConstant(Immediate, DL, MVT::i8));
}
static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
if (V2.isUndef())
return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
}
/// \brief Handle lowering of 4-lane 64-bit floating point shuffles.
///
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return V;
if (V2.isUndef()) {
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Use low duplicate instructions for masks that match their pattern.
if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2}))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1);
if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
// Non-half-crossing single input shuffles can be lowered with an
// interleaved permutation.
unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
DAG.getConstant(VPERMILPMask, DL, MVT::i8));
}
// With AVX2 we have direct support for this permutation.
if (Subtarget.hasAVX2())
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
// Try to create an in-lane repeating shuffle mask and then shuffle the
// the results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return V;
// Otherwise, fall back.
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
DAG);
}
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// Check if the blend happens to exactly fit that of SHUFPD.
if (SDValue Op =
lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
return Op;
// Try to create an in-lane repeating shuffle mask and then shuffle the
// the results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return V;
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle. However, if we have AVX2 and either inputs are already in place,
// we will be able to shuffle even across lanes the other input in a single
// instruction so skip this pattern.
if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
isShuffleMaskInputInPlace(1, Mask))))
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return Result;
// If we have VLX support, we can use VEXPAND.
if (Subtarget.hasVLX())
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask,
V1, V2, DAG, Subtarget))
return V;
// If we have AVX2 then we always want to lower with a blend because an v4 we
// can fully permute the elements.
if (Subtarget.hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
Mask, DAG);
// Otherwise fall back on generic lowering.
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG);
}
/// \brief Handle lowering of 4-lane 64-bit integer shuffles.
///
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v4i64 shuffling..
static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i64, V1, V2,
Mask, Subtarget, DAG))
return Broadcast;
if (V2.isUndef()) {
// When the shuffle is mirrored between the 128-bit lanes of the unit, we
// can use lower latency instructions that will operate on both lanes.
SmallVector<int, 2> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
SmallVector<int, 4> PSHUFDMask;
scaleShuffleMask(2, RepeatedMask, PSHUFDMask);
return DAG.getBitcast(
MVT::v4i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
DAG.getBitcast(MVT::v8i32, V1),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
}
// AVX2 provides a direct instruction for permuting a single input across
// lanes.
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
}
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// If we have VLX support, we can use VALIGN or VEXPAND.
if (Subtarget.hasVLX()) {
if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2,
Mask, Subtarget, DAG))
return Rotate;
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask,
V1, V2, DAG, Subtarget))
return V;
}
// Try to use PALIGNR.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2,
Mask, Subtarget, DAG))
return Rotate;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
return V;
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle. However, if we have AVX2 and either inputs are already in place,
// we will be able to shuffle even across lanes the other input in a single
// instruction so skip this pattern.
if (!isShuffleMaskInputInPlace(0, Mask) &&
!isShuffleMaskInputInPlace(1, Mask))
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
return Result;
// Otherwise fall back on generic blend lowering.
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
Mask, DAG);
}
/// \brief Handle lowering of 8-lane 32-bit floating point shuffles.
///
/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8f32, V1, V2,
Mask, Subtarget, DAG))
return Broadcast;
// If the shuffle mask is repeated in each 128-bit lane, we have many more
// options to efficiently lower the shuffle.
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) {
assert(RepeatedMask.size() == 4 &&
"Repeated masks must be half the mask width!");
// Use even/odd duplicate instructions for masks that match their pattern.
if (isShuffleEquivalent(V1, V2, RepeatedMask, {0, 0, 2, 2}))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1);
if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3}))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);
if (V2.isUndef())
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
return V;
// Otherwise, fall back to a SHUFPS sequence. Here it is important that we
// have already handled any direct blends.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG);
}
// Try to create an in-lane repeating shuffle mask and then shuffle the
// the results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
return V;
// If we have a single input shuffle with different shuffle patterns in the
// two 128-bit lanes use the variable mask to VPERMILPS.
if (V2.isUndef()) {
SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask))
return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, V1, VPermMask);
if (Subtarget.hasAVX2())
return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);
// Otherwise, fall back.
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
DAG);
}
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
return Result;
// If we have VLX support, we can use VEXPAND.
if (Subtarget.hasVLX())
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask,
V1, V2, DAG, Subtarget))
return V;
// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
// since after split we get a more efficient code using vpunpcklwd and
// vpunpckhwd instrs than vblend.
if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2,
Mask, DAG))
return V;
// If we have AVX2 then we always want to lower with a blend because at v8 we
// can fully permute the elements.
if (Subtarget.hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
Mask, DAG);
// Otherwise fall back on generic lowering.
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG);
}
/// \brief Handle lowering of 8-lane 32-bit integer shuffles.
///
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v8i32 shuffling..
static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
// since after split we get a more efficient code than vblend by using
// vpunpcklwd and vpunpckhwd instrs.
if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
!Subtarget.hasAVX512())
if (SDValue V =
lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, DAG))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i32, V1, V2,
Mask, Subtarget, DAG))
return Broadcast;
// If the shuffle mask is repeated in each 128-bit lane we can use more
// efficient instructions that mirror the shuffles across the two 128-bit
// lanes.
SmallVector<int, 4> RepeatedMask;
bool Is128BitLaneRepeatedShuffle =
is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask);
if (Is128BitLaneRepeatedShuffle) {
assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
if (V2.isUndef())
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
return V;
}
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// If we have VLX support, we can use VALIGN or EXPAND.
if (Subtarget.hasVLX()) {
if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2,
Mask, Subtarget, DAG))
return Rotate;
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask,
V1, V2, DAG, Subtarget))
return V;
}
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
return Rotate;
// Try to create an in-lane repeating shuffle mask and then shuffle the
// results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
return V;
// If the shuffle patterns aren't repeated but it is a single input, directly
// generate a cross-lane VPERMD instruction.
if (V2.isUndef()) {
SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32, VPermMask, V1);
}
// Assume that a single SHUFPS is faster than an alternative sequence of
// multiple instructions (even if the CPU has a domain penalty).
// If some CPU is harmed by the domain switch, we can fix it in a later pass.
if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1);
SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2);
SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask,
CastV1, CastV2, DAG);
return DAG.getBitcast(MVT::v8i32, ShufPS);
}
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
return Result;
// Otherwise fall back on generic blend lowering.
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
Mask, DAG);
}
/// \brief Handle lowering of 16-lane 16-bit integer shuffles.
///
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v16i16 shuffling..
static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
assert(Subtarget.hasAVX2() && "We can only lower v16i16 with AVX2!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i16, V1, V2,
Mask, Subtarget, DAG))
return Broadcast;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
return V;
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
return Rotate;
// Try to create an in-lane repeating shuffle mask and then shuffle the
// the results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
return V;
if (V2.isUndef()) {
// There are no generalized cross-lane shuffle operations available on i16
// element types.
if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask))
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
Mask, DAG);
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
// As this is a single-input shuffle, the repeated mask should be
// a strictly valid v8i16 mask that we can pass through to the v8i16
// lowering to handle even the v16 case.
return lowerV8I16GeneralSingleInputVectorShuffle(
DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
}
}
if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
DL, MVT::v16i16, Mask, V1, V2, Zeroable, Subtarget, DAG))
return PSHUFB;
// AVX512BWVL can lower to VPERMW.
if (Subtarget.hasBWI() && Subtarget.hasVLX())
return lowerVectorShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG);
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
return Result;
// Otherwise fall back on generic lowering.
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG);
}
/// \brief Handle lowering of 32-lane 8-bit integer shuffles.
///
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v32i8 shuffling..
static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
assert(Subtarget.hasAVX2() && "We can only lower v32i8 with AVX2!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v32i8, V1, V2,
Mask, Subtarget, DAG))
return Broadcast;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
return V;
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
return Rotate;
// Try to create an in-lane repeating shuffle mask and then shuffle the
// the results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
return V;
// There are no generalized cross-lane shuffle operations available on i8
// element types.
if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
DAG);
if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
return PSHUFB;
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
return Result;
// Otherwise fall back on generic lowering.
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG);
}
/// \brief High-level routine to lower various 256-bit x86 vector shuffles.
///
/// This routine either breaks down the specific type of a 256-bit x86 vector
/// shuffle or splits it into two 128-bit shuffles and fuses the results back
/// together based on the available instructions.
static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// If we have a single input to the zero element, insert that into V1 if we
// can do so cheaply.
int NumElts = VT.getVectorNumElements();
int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });
if (NumV2Elements == 1 && Mask[0] >= NumElts)
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Insertion;
// Handle special cases where the lower or upper half is UNDEF.
if (SDValue V =
lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
// There is a really nice hard cut-over between AVX1 and AVX2 that means we
// can check for those subtargets here and avoid much of the subtarget
// querying in the per-vector-type lowering routines. With AVX1 we have
// essentially *zero* ability to manipulate a 256-bit vector with integer
// types. Since we'll use floating point types there eventually, just
// immediately cast everything to a float and operate entirely in that domain.
if (VT.isInteger() && !Subtarget.hasAVX2()) {
int ElementBits = VT.getScalarSizeInBits();
if (ElementBits < 32) {
// No floating point type available, if we can't use the bit operations
// for masking/blending then decompose into 128-bit vectors.
if (SDValue V =
lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
return V;
if (SDValue V = lowerVectorShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return V;
return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
}
MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
VT.getVectorNumElements());
V1 = DAG.getBitcast(FpVT, V1);
V2 = DAG.getBitcast(FpVT, V2);
return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
}
switch (VT.SimpleTy) {
case MVT::v4f64:
return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4i64:
return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8f32:
return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i32:
return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i16:
return lowerV16I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v32i8:
return lowerV32I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
default:
llvm_unreachable("Not a valid 256-bit x86 vector type!");
}
}
/// \brief Try to lower a vector shuffle as a 128-bit shuffles.
static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
assert(VT.getScalarSizeInBits() == 64 &&
"Unexpected element type size for 128bit shuffle.");
// To handle 256 bit vector requires VLX and most probably
// function lowerV2X128VectorShuffle() is better solution.
assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle.");
SmallVector<int, 4> WidenedMask;
if (!canWidenShuffleElements(Mask, WidenedMask))
return SDValue();
// Check for patterns which can be matched with a single insert of a 256-bit
// subvector.
bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask,
{0, 1, 2, 3, 0, 1, 2, 3});
if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask,
{0, 1, 2, 3, 8, 9, 10, 11})) {
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0, DL));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
OnlyUsesV1 ? V1 : V2,
DAG.getIntPtrConstant(0, DL));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
assert(WidenedMask.size() == 4);
// See if this is an insertion of the lower 128-bits of V2 into V1.
bool IsInsert = true;
int V2Index = -1;
for (int i = 0; i < 4; ++i) {
assert(WidenedMask[i] >= -1);
if (WidenedMask[i] < 0)
continue;
// Make sure all V1 subvectors are in place.
if (WidenedMask[i] < 4) {
if (WidenedMask[i] != i) {
IsInsert = false;
break;
}
} else {
// Make sure we only have a single V2 index and its the lowest 128-bits.
if (V2Index >= 0 || WidenedMask[i] != 4) {
IsInsert = false;
break;
}
V2Index = i;
}
}
if (IsInsert && V2Index >= 0) {
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
DAG.getIntPtrConstant(0, DL));
return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
}
// Try to lower to to vshuf64x2/vshuf32x4.
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
unsigned PermMask = 0;
// Insure elements came from the same Op.
for (int i = 0; i < 4; ++i) {
assert(WidenedMask[i] >= -1);
if (WidenedMask[i] < 0)
continue;
SDValue Op = WidenedMask[i] >= 4 ? V2 : V1;
unsigned OpIndex = i / 2;
if (Ops[OpIndex].isUndef())
Ops[OpIndex] = Op;
else if (Ops[OpIndex] != Op)
return SDValue();
// Convert the 128-bit shuffle mask selection values into 128-bit selection
// bits defined by a vshuf64x2 instruction's immediate control byte.
PermMask |= (WidenedMask[i] % 4) << (i * 2);
}
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
DAG.getConstant(PermMask, DL, MVT::i8));
}
/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
if (V2.isUndef()) {
// Use low duplicate instructions for masks that match their pattern.
if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6}))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1);
if (!is128BitLaneCrossingShuffleMask(MVT::v8f64, Mask)) {
// Non-half-crossing single input shuffles can be lowered with an
// interleaved permutation.
unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3) |
((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) |
((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7);
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1,
DAG.getConstant(VPERMILPMask, DL, MVT::i8));
}
SmallVector<int, 4> RepeatedMask;
if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask))
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8f64, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
}
if (SDValue Shuf128 =
lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, V1, V2, DAG))
return Shuf128;
if (SDValue Unpck =
lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
return Unpck;
// Check if the blend happens to exactly fit that of SHUFPD.
if (SDValue Op =
lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))
return Op;
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1,
V2, DAG, Subtarget))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// If the shuffle mask is repeated in each 128-bit lane, we have many more
// options to efficiently lower the shuffle.
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16f32, Mask, RepeatedMask)) {
assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
// Use even/odd duplicate instructions for masks that match their pattern.
if (isShuffleEquivalent(V1, V2, RepeatedMask, {0, 0, 2, 2}))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1);
if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3}))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1);
if (V2.isUndef())
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v16f32, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue Unpck =
lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
return Unpck;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// Otherwise, fall back to a SHUFPS sequence.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
}
// If we have AVX512F support, we can use VEXPAND.
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
V1, V2, DAG, Subtarget))
return V;
return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
if (SDValue Shuf128 =
lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
return Shuf128;
if (V2.isUndef()) {
// When the shuffle is mirrored between the 128-bit lanes of the unit, we
// can use lower latency instructions that will operate on all four
// 128-bit lanes.
SmallVector<int, 2> Repeated128Mask;
if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) {
SmallVector<int, 4> PSHUFDMask;
scaleShuffleMask(2, Repeated128Mask, PSHUFDMask);
return DAG.getBitcast(
MVT::v8i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32,
DAG.getBitcast(MVT::v16i32, V1),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
}
SmallVector<int, 4> Repeated256Mask;
if (is256BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated256Mask))
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8i64, V1,
getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG));
}
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// Try to use VALIGN.
if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i64, V1, V2,
Mask, Subtarget, DAG))
return Rotate;
// Try to use PALIGNR.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2,
Mask, Subtarget, DAG))
return Rotate;
if (SDValue Unpck =
lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
return Unpck;
// If we have AVX512F support, we can use VEXPAND.
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1,
V2, DAG, Subtarget))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
// If the shuffle mask is repeated in each 128-bit lane we can use more
// efficient instructions that mirror the shuffles across the four 128-bit
// lanes.
SmallVector<int, 4> RepeatedMask;
bool Is128BitLaneRepeatedShuffle =
is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask);
if (Is128BitLaneRepeatedShuffle) {
assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
if (V2.isUndef())
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
return V;
}
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// Try to use VALIGN.
if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v16i32, V1, V2,
Mask, Subtarget, DAG))
return Rotate;
// Try to use byte rotation instructions.
if (Subtarget.hasBWI())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
return Rotate;
// Assume that a single SHUFPS is faster than using a permv shuffle.
// If some CPU is harmed by the domain switch, we can fix it in a later pass.
if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1);
SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2);
SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
CastV1, CastV2, DAG);
return DAG.getBitcast(MVT::v16i32, ShufPS);
}
// If we have AVX512F support, we can use VEXPAND.
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask,
V1, V2, DAG, Subtarget))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
return V;
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG))
return Rotate;
if (V2.isUndef()) {
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) {
// As this is a single-input shuffle, the repeated mask should be
// a strictly valid v8i16 mask that we can pass through to the v8i16
// lowering to handle even the v32 case.
return lowerV8I16GeneralSingleInputVectorShuffle(
DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG);
}
}
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
return V;
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return Rotate;
if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
DL, MVT::v64i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
return PSHUFB;
// VBMI can use VPERMV/VPERMV3 byte shuffles.
if (Subtarget.hasVBMI())
return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);
// Try to create an in-lane repeating shuffle mask and then shuffle the
// the results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
// FIXME: Implement direct support for this type!
return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
}
/// \brief High-level routine to lower various 512-bit x86 vector shuffles.
///
/// This routine either breaks down the specific type of a 512-bit x86 vector
/// shuffle or splits it into two 256-bit shuffles and fuses the results back
/// together based on the available instructions.
static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.hasAVX512() &&
"Cannot lower 512-bit vectors w/ basic ISA!");
// If we have a single input to the zero element, insert that into V1 if we
// can do so cheaply.
int NumElts = Mask.size();
int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });
if (NumV2Elements == 1 && Mask[0] >= NumElts)
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Insertion;
// Check for being able to broadcast a single element.
if (SDValue Broadcast =
lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Dispatch to each element type for lowering. If we don't have support for
// specific element type shuffles at 512 bits, immediately split them and
// lower them. Each lowering routine of a given type is allowed to assume that
// the requisite ISA extensions for that element type are available.
switch (VT.SimpleTy) {
case MVT::v8f64:
return lowerV8F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16f32:
return lowerV16F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i64:
return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i32:
return lowerV16I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v32i16:
return lowerV32I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v64i8:
return lowerV64I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
default:
llvm_unreachable("Not a valid 512-bit x86 vector type!");
}
}
// Lower vXi1 vector shuffles.
// There is no a dedicated instruction on AVX-512 that shuffles the masks.
// The only way to shuffle bits is to sign-extend the mask vector to SIMD
// vector, shuffle and then truncate it back.
static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.hasAVX512() &&
"Cannot lower 512-bit vectors w/o basic ISA!");
MVT ExtVT;
switch (VT.SimpleTy) {
default:
llvm_unreachable("Expected a vector of i1 elements");
case MVT::v2i1:
ExtVT = MVT::v2i64;
break;
case MVT::v4i1:
ExtVT = MVT::v4i32;
break;
case MVT::v8i1:
ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL
break;
case MVT::v16i1:
ExtVT = MVT::v16i32;
break;
case MVT::v32i1:
ExtVT = MVT::v32i16;
break;
case MVT::v64i1:
ExtVT = MVT::v64i8;
break;
}
if (ISD::isBuildVectorAllZeros(V1.getNode()))
V1 = getZeroVector(ExtVT, Subtarget, DAG, DL);
else if (ISD::isBuildVectorAllOnes(V1.getNode()))
V1 = getOnesVector(ExtVT, DAG, DL);
else
V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1);
if (V2.isUndef())
V2 = DAG.getUNDEF(ExtVT);
else if (ISD::isBuildVectorAllZeros(V2.getNode()))
V2 = getZeroVector(ExtVT, Subtarget, DAG, DL);
else if (ISD::isBuildVectorAllOnes(V2.getNode()))
V2 = getOnesVector(ExtVT, DAG, DL);
else
V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2);
SDValue Shuffle = DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask);
// i1 was sign extended we can use X86ISD::CVT2MASK.
int NumElems = VT.getVectorNumElements();
if ((Subtarget.hasBWI() && (NumElems >= 32)) ||
(Subtarget.hasDQI() && (NumElems < 32)))
return DAG.getNode(X86ISD::CVT2MASK, DL, VT, Shuffle);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle);
}
/// Helper function that returns true if the shuffle mask should be
/// commuted to improve canonicalization.
static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
int NumElements = Mask.size();
int NumV1Elements = 0, NumV2Elements = 0;
for (int M : Mask)
if (M < 0)
continue;
else if (M < NumElements)
++NumV1Elements;
else
++NumV2Elements;
// Commute the shuffle as needed such that more elements come from V1 than
// V2. This allows us to match the shuffle pattern strictly on how many
// elements come from V1 without handling the symmetric cases.
if (NumV2Elements > NumV1Elements)
return true;
assert(NumV1Elements > 0 && "No V1 indices");
if (NumV2Elements == 0)
return false;
// When the number of V1 and V2 elements are the same, try to minimize the
// number of uses of V2 in the low half of the vector. When that is tied,
// ensure that the sum of indices for V1 is equal to or lower than the sum
// indices for V2. When those are equal, try to ensure that the number of odd
// indices for V1 is lower than the number of odd indices for V2.
if (NumV1Elements == NumV2Elements) {
int LowV1Elements = 0, LowV2Elements = 0;
for (int M : Mask.slice(0, NumElements / 2))
if (M >= NumElements)
++LowV2Elements;
else if (M >= 0)
++LowV1Elements;
if (LowV2Elements > LowV1Elements)
return true;
if (LowV2Elements == LowV1Elements) {
int SumV1Indices = 0, SumV2Indices = 0;
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (Mask[i] >= NumElements)
SumV2Indices += i;
else if (Mask[i] >= 0)
SumV1Indices += i;
if (SumV2Indices < SumV1Indices)
return true;
if (SumV2Indices == SumV1Indices) {
int NumV1OddIndices = 0, NumV2OddIndices = 0;
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (Mask[i] >= NumElements)
NumV2OddIndices += i % 2;
else if (Mask[i] >= 0)
NumV1OddIndices += i % 2;
if (NumV2OddIndices < NumV1OddIndices)
return true;
}
}
}
return false;
}
/// \brief Top-level lowering for x86 vector shuffles.
///
/// This handles decomposition, canonicalization, and lowering of all x86
/// vector shuffles. Most of the specific lowering strategies are encapsulated
/// above in helper routines. The canonicalization attempts to widen shuffles
/// to involve fewer lanes of wider elements, consolidate symmetric patterns
/// s.t. only one of the two inputs needs to be tested, etc.
static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
int NumElements = VT.getVectorNumElements();
SDLoc DL(Op);
bool Is1BitVector = (VT.getVectorElementType() == MVT::i1);
assert((VT.getSizeInBits() != 64 || Is1BitVector) &&
"Can't lower MMX shuffles");
bool V1IsUndef = V1.isUndef();
bool V2IsUndef = V2.isUndef();
if (V1IsUndef && V2IsUndef)
return DAG.getUNDEF(VT);
// When we create a shuffle node we put the UNDEF node to second operand,
// but in some cases the first operand may be transformed to UNDEF.
// In this case we should just commute the node.
if (V1IsUndef)
return DAG.getCommutedVectorShuffle(*SVOp);
// Check for non-undef masks pointing at an undef vector and make the masks
// undef as well. This makes it easier to match the shuffle based solely on
// the mask.
if (V2IsUndef)
for (int M : Mask)
if (M >= NumElements) {
SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
for (int &M : NewMask)
if (M >= NumElements)
M = -1;
return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
}
// Check for illegal shuffle mask element index values.
int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit;
assert(llvm::all_of(Mask,
[&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
"Out of bounds shuffle index");
// We actually see shuffles that are entirely re-arrangements of a set of
// zero inputs. This mostly happens while decomposing complex shuffles into
// simple ones. Directly lower these as a buildvector of zeros.
APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
if (Zeroable.isAllOnesValue())
return getZeroVector(VT, Subtarget, DAG, DL);
// Try to collapse shuffles into using a vector type with fewer elements but
// wider element types. We cap this to not form integers or floating point
// elements wider than 64 bits, but it might be interesting to form i128
// integers to handle flipping the low and high halves of AVX 256-bit vectors.
SmallVector<int, 16> WidenedMask;
if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
canWidenShuffleElements(Mask, WidenedMask)) {
MVT NewEltVT = VT.isFloatingPoint()
? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
: MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
// Make sure that the new vector type is legal. For example, v2f64 isn't
// legal on SSE1.
if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
V1 = DAG.getBitcast(NewVT, V1);
V2 = DAG.getBitcast(NewVT, V2);
return DAG.getBitcast(
VT, DAG.getVectorShuffle(NewVT, DL, V1, V2, WidenedMask));
}
}
// Commute the shuffle if it will improve canonicalization.
if (canonicalizeShuffleMaskWithCommute(Mask))
return DAG.getCommutedVectorShuffle(*SVOp);
// For each vector width, delegate to a specialized lowering routine.
if (VT.is128BitVector())
return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
DAG);
if (VT.is256BitVector())
return lower256BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
DAG);
if (VT.is512BitVector())
return lower512BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
DAG);
if (Is1BitVector)
return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG);
llvm_unreachable("Unimplemented!");
}
/// \brief Try to lower a VSELECT instruction to a vector shuffle.
static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Cond = Op.getOperand(0);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
auto *CondBV = cast<BuildVectorSDNode>(Cond);
// Only non-legal VSELECTs reach this lowering, convert those into generic
// shuffles and re-use the shuffle lowering path for blends.
SmallVector<int, 32> Mask;
for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) {
SDValue CondElt = CondBV->getOperand(i);
Mask.push_back(
isa<ConstantSDNode>(CondElt) ? i + (isNullConstant(CondElt) ? Size : 0)
: -1);
}
return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask);
}
SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
// A vselect where all conditions and data are constants can be optimized into
// a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
if (ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(0).getNode()) &&
ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(1).getNode()) &&
ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
return SDValue();
// If this VSELECT has a vector if i1 as a mask, it will be directly matched
// with patterns on the mask registers on AVX-512.
if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1)
return Op;
// Try to lower this to a blend-style vector shuffle. This can handle all
// constant condition cases.
if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG))
return BlendOp;
// Variable blends are only legal from SSE4.1 onward.
if (!Subtarget.hasSSE41())
return SDValue();
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
// If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition
// into an i1 condition so that we can use the mask-based 512-bit blend
// instructions.
if (VT.getSizeInBits() == 512) {
SDValue Cond = Op.getOperand(0);
// The vNi1 condition case should be handled above as it can be trivially
// lowered.
assert(Cond.getValueType().getScalarSizeInBits() ==
VT.getScalarSizeInBits() &&
"Should have a size-matched integer condition!");
// Build a mask by testing the condition against itself (tests for zero).
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue Mask = DAG.getNode(X86ISD::TESTM, dl, MaskVT, Cond, Cond);
// Now return a new VSELECT using the mask.
return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2));
}
// Only some types will be legal on some subtargets. If we can emit a legal
// VSELECT-matching blend, return Op, and but if we need to expand, return
// a null value.
switch (VT.SimpleTy) {
default:
// Most of the vector types have blends past SSE4.1.
return Op;
case MVT::v32i8:
// The byte blends for AVX vectors were introduced only in AVX2.
if (Subtarget.hasAVX2())
return Op;
return SDValue();
case MVT::v8i16:
case MVT::v16i16:
// AVX-512 BWI and VLX features support VSELECT with i16 elements.
if (Subtarget.hasBWI() && Subtarget.hasVLX())
return Op;
// FIXME: We should custom lower this by fixing the condition and using i8
// blends.
return SDValue();
}
}
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
if (!Op.getOperand(0).getSimpleValueType().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
if (VT == MVT::f32) {
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
// the result back to FR32 register. It's only worth matching if the
// result has a single use which is a store or a bitcast to i32. And in
// the case of a store, it's not worth it if the index is a constant 0,
// because a MOVSSmr can be used instead, which is smaller and faster.
if (!Op.hasOneUse())
return SDValue();
SDNode *User = *Op.getNode()->use_begin();
if ((User->getOpcode() != ISD::STORE ||
isNullConstant(Op.getOperand(1))) &&
(User->getOpcode() != ISD::BITCAST ||
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Op.getOperand(0)),
Op.getOperand(1));
return DAG.getBitcast(MVT::f32, Extract);
}
if (VT == MVT::i32 || VT == MVT::i64) {
// ExtractPS/pextrq works with constant index.
if (isa<ConstantSDNode>(Op.getOperand(1)))
return Op;
}
return SDValue();
}
/// Extract one bit from mask vector, like v16i1 or v8i1.
/// AVX-512 feature.
SDValue
X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
SDLoc dl(Vec);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
MVT EltVT = Op.getSimpleValueType();
assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) &&
"Unexpected vector type in ExtractBitFromMaskVector");
// variable index can't be handled in mask registers,
// extend vector to VR512/128
if (!isa<ConstantSDNode>(Idx)) {
unsigned NumElts = VecVT.getVectorNumElements();
// Extending v8i1/v16i1 to 512-bit get better performance on KNL
// than extending to 128/256bit.
unsigned VecSize = (NumElts <= 4 ? 128 : 512);
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize/NumElts), NumElts);
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVT, Vec);
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
ExtVT.getVectorElementType(), Ext, Idx);
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) ||
(VecVT.getVectorNumElements() < 8)) {
// Use kshiftlw/rw instruction.
VecVT = MVT::v16i1;
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
DAG.getUNDEF(VecVT),
Vec,
DAG.getIntPtrConstant(0, dl));
}
unsigned MaxSift = VecVT.getVectorNumElements() - 1;
if (MaxSift - IdxVal)
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(MaxSift, dl, MVT::i8));
return DAG.getNode(X86ISD::VEXTRACT, dl, Op.getSimpleValueType(), Vec,
DAG.getIntPtrConstant(0, dl));
}
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
if (VecVT.getVectorElementType() == MVT::i1)
return ExtractBitFromMaskVector(Op, DAG);
if (!isa<ConstantSDNode>(Idx)) {
// Its more profitable to go through memory (1 cycles throughput)
// than using VMOVD + VPERMV/PSHUFB sequence ( 2/3 cycles throughput)
// IACA tool was used to get performance estimation
// (https://software.intel.com/en-us/articles/intel-architecture-code-analyzer)
//
// example : extractelement <16 x i8> %a, i32 %i
//
// Block Throughput: 3.00 Cycles
// Throughput Bottleneck: Port5
//
// | Num Of | Ports pressure in cycles | |
// | Uops | 0 - DV | 5 | 6 | 7 | |
// ---------------------------------------------
// | 1 | | 1.0 | | | CP | vmovd xmm1, edi
// | 1 | | 1.0 | | | CP | vpshufb xmm0, xmm0, xmm1
// | 2 | 1.0 | 1.0 | | | CP | vpextrb eax, xmm0, 0x0
// Total Num Of Uops: 4
//
//
// Block Throughput: 1.00 Cycles
// Throughput Bottleneck: PORT2_AGU, PORT3_AGU, Port4
//
// | | Ports pressure in cycles | |
// |Uops| 1 | 2 - D |3 - D | 4 | 5 | |
// ---------------------------------------------------------
// |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18], xmm0
// |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18]
// |1 | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1]
// Total Num Of Uops: 4
return SDValue();
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
if (VecVT.is256BitVector() || VecVT.is512BitVector()) {
// Get the 128-bit vector.
Vec = extract128BitVector(Vec, IdxVal, DAG, dl);
MVT EltVT = VecVT.getVectorElementType();
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
// this can be done with a mask.
IdxVal &= ElemsPerChunk - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getConstant(IdxVal, dl, MVT::i32));
}
assert(VecVT.is128BitVector() && "Unexpected vector length");
MVT VT = Op.getSimpleValueType();
if (VT.getSizeInBits() == 16) {
// If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless
// we're going to zero extend the register or fold the store (SSE41 only).
if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) &&
!(Subtarget.hasSSE41() && MayFoldIntoStore(Op)))
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec), Idx));
// Transform it so it match pextrw which produces a 32-bit result.
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
if (Subtarget.hasSSE41())
if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
return Res;
// TODO: We only extract a single element from v16i8, we can probably afford
// to be more aggressive here before using the default approach of spilling to
// stack.
if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) {
// Extract either the lowest i32 or any i16, and extract the sub-byte.
int DWordIdx = IdxVal / 4;
if (DWordIdx == 0) {
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec),
DAG.getIntPtrConstant(DWordIdx, dl));
int ShiftVal = (IdxVal % 4) * 8;
if (ShiftVal != 0)
Res = DAG.getNode(ISD::SRL, dl, MVT::i32, Res,
DAG.getConstant(ShiftVal, dl, MVT::i32));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
}
int WordIdx = IdxVal / 2;
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
DAG.getBitcast(MVT::v8i16, Vec),
DAG.getIntPtrConstant(WordIdx, dl));
int ShiftVal = (IdxVal % 2) * 8;
if (ShiftVal != 0)
Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res,
DAG.getConstant(ShiftVal, dl, MVT::i16));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
}
if (VT.getSizeInBits() == 32) {
if (IdxVal == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
int Mask[4] = { static_cast<int>(IdxVal), -1, -1, -1 };
Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0, dl));
}
if (VT.getSizeInBits() == 64) {
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
if (IdxVal == 0)
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
int Mask[2] = { 1, -1 };
Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0, dl));
}
return SDValue();
}
/// Insert one bit to mask vector, like v16i1 or v8i1.
/// AVX-512 feature.
SDValue
X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
SDValue Elt = Op.getOperand(1);
SDValue Idx = Op.getOperand(2);
MVT VecVT = Vec.getSimpleValueType();
if (!isa<ConstantSDNode>(Idx)) {
// Non constant index. Extend source and destination,
// insert element and then truncate the result.
MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32);
SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
unsigned NumElems = VecVT.getVectorNumElements();
if(Vec.isUndef()) {
if (IdxVal)
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
return EltInVec;
}
// Insertion of one bit into first position
if (IdxVal == 0 ) {
// Clean top bits of vector.
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(NumElems - 1, dl, MVT::i8));
EltInVec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, EltInVec,
DAG.getConstant(NumElems - 1, dl, MVT::i8));
// Clean the first bit in source vector.
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(1, dl, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
// Insertion of one bit into last position
if (IdxVal == NumElems -1) {
// Move the bit to the last position inside the vector.
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
// Clean the last bit in the source vector.
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(1, dl, MVT::i8));
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
// Use shuffle to insert element.
SmallVector<int, 64> MaskVec(NumElems);
for (unsigned i = 0; i != NumElems; ++i)
MaskVec[i] = (i == IdxVal) ? NumElems : i;
return DAG.getVectorShuffle(VecVT, dl, Vec, EltInVec, MaskVec);
}
SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
if (EltVT == MVT::i1)
return InsertBitToMaskVector(Op, DAG);
SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
if (!isa<ConstantSDNode>(N2))
return SDValue();
auto *N2C = cast<ConstantSDNode>(N2);
unsigned IdxVal = N2C->getZExtValue();
bool IsZeroElt = X86::isZeroNode(N1);
bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);
// If we are inserting a element, see if we can do this more efficiently with
// a blend shuffle with a rematerializable vector than a costly integer
// insertion.
if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() &&
16 <= EltVT.getSizeInBits()) {
SmallVector<int, 8> BlendMask;
for (unsigned i = 0; i != NumElts; ++i)
BlendMask.push_back(i == IdxVal ? i + NumElts : i);
SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
: DAG.getConstant(-1, dl, VT);
return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);
}
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
// into that, and then insert the subvector back into the result.
if (VT.is256BitVector() || VT.is512BitVector()) {
// With a 256-bit vector, we can insert into the zero element efficiently
// using a blend if we have AVX or AVX2 and the right data type.
if (VT.is256BitVector() && IdxVal == 0) {
// TODO: It is worthwhile to cast integer to floating point and back
// and incur a domain crossing penalty if that's what we'll end up
// doing anyway after extracting to a 128-bit vector.
if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
(Subtarget.hasAVX2() && EltVT == MVT::i32)) {
SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
N2 = DAG.getIntPtrConstant(1, dl);
return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2);
}
}
// Get the desired 128-bit vector chunk.
SDValue V = extract128BitVector(N0, IdxVal, DAG, dl);
// Insert the element into the desired chunk.
unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
assert(isPowerOf2_32(NumEltsIn128));
// Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, dl, MVT::i32));
// Insert the changed part back into the bigger vector
return insert128BitVector(N0, V, IdxVal, DAG, dl);
}
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument. SSE41 required for pinsrb.
if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) {
unsigned Opc;
if (VT == MVT::v8i16) {
assert(Subtarget.hasSSE2() && "SSE2 required for PINSRW");
Opc = X86ISD::PINSRW;
} else {
assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector");
assert(Subtarget.hasSSE41() && "SSE41 required for PINSRB");
Opc = X86ISD::PINSRB;
}
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
}
if (Subtarget.hasSSE41()) {
if (EltVT == MVT::f32) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into
// these bits. For example (insert (extract, 3), 2) could be matched by
// putting the '3' into bits [7:6] of X86ISD::INSERTPS.
// Bits [5:4] of the constant are the destination select. This is the
// value of the incoming immediate.
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize();
if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
// If this is an insertion of 32-bits into the low 32-bits of
// a vector, we prefer to generate a blend with immediate rather
// than an insertps. Blends are simpler operations in hardware and so
// will always have equal or better performance than insertps.
// But if optimizing for size and there's a load folding opportunity,
// generate insertps because blendps does not have a 32-bit memory
// operand form.
N2 = DAG.getIntPtrConstant(1, dl);
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2);
}
N2 = DAG.getIntPtrConstant(IdxVal << 4, dl);
// Create this as a scalar to vector..
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
}
// PINSR* works with constant index.
if (EltVT == MVT::i32 || EltVT == MVT::i64)
return Op;
}
return SDValue();
}
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
MVT OpVT = Op.getSimpleValueType();
// It's always cheaper to replace a xor+movd with xorps and simplifies further
// combines.
if (X86::isZeroNode(Op.getOperand(0)))
return getZeroVector(OpVT, Subtarget, DAG, dl);
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
unsigned SizeFactor = OpVT.getSizeInBits() / 128;
MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
OpVT.getVectorNumElements() / SizeFactor);
Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
// Insert the 128-bit vector.
return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
}
assert(OpVT.is128BitVector() && "Expected an SSE type!");
// Pass through a v4i32 SCALAR_TO_VECTOR as that's what we use in tblgen.
if (OpVT == MVT::v4i32)
return Op;
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
return DAG.getBitcast(
OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt));
}
// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
// a simple subregister reference or explicit instructions to grab
// upper bits of a vector.
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.hasAVX() && "EXTRACT_SUBVECTOR requires AVX");
SDLoc dl(Op);
SDValue In = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
MVT ResVT = Op.getSimpleValueType();
// When v1i1 is legal a scalarization of a vselect with a vXi1 Cond
// would result with: v1i1 = extract_subvector(vXi1, idx).
// Lower these into extract_vector_elt which is already selectable.
if (ResVT == MVT::v1i1) {
assert(Subtarget.hasAVX512() &&
"Boolean EXTRACT_SUBVECTOR requires AVX512");
MVT EltVT = ResVT.getVectorElementType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT LegalVT =
(TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT();
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx);
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res);
}
assert((In.getSimpleValueType().is256BitVector() ||
In.getSimpleValueType().is512BitVector()) &&
"Can only extract from 256-bit or 512-bit vectors");
// If the input is a buildvector just emit a smaller one.
unsigned ElemsPerChunk = ResVT.getVectorNumElements();
if (In.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getBuildVector(
ResVT, dl, makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk));
// Everything else is legal.
return Op;
}
// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
// simple superregister reference or explicit instructions to insert
// the upper bits of a vector.
static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1);
return insert1BitVector(Op, DAG, Subtarget);
}
// Returns the appropriate wrapper opcode for a global reference.
unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const {
// References to absolute symbols are never PC-relative.
if (GV && GV->isAbsoluteSymbolRef())
return X86ISD::Wrapper;
CodeModel::Model M = getTargetMachine().getCodeModel();
if (Subtarget.isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
return X86ISD::WrapperRIP;
return X86ISD::Wrapper;
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target counterpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDValue
X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr);
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetConstantPool(
CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), OpFlag);
SDLoc DL(CP);
Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (OpFlag) {
Result =
DAG.getNode(ISD::ADD, DL, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
}
return Result;
}
SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr);
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
SDLoc DL(JT);
Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (OpFlag)
Result =
DAG.getNode(ISD::ADD, DL, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
return Result;
}
SDValue
X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
unsigned char OpFlag = Subtarget.classifyGlobalReference(nullptr, *Mod);
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag);
SDLoc DL(Op);
Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (isPositionIndependent() && !Subtarget.is64Bit()) {
Result =
DAG.getNode(ISD::ADD, DL, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
}
// For symbols that require a load from a stub to get the address, emit the
// load.
if (isGlobalStubReference(OpFlag))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
SDValue
X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
// Create the TargetBlockAddressAddress node.
unsigned char OpFlags =
Subtarget.classifyBlockAddressReference();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
Result = DAG.getNode(getGlobalWrapperKind(), dl, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (isGlobalRelativeToPICBase(OpFlags)) {
Result = DAG.getNode(ISD::ADD, dl, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
}
return Result;
}
SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV,
const SDLoc &dl, int64_t Offset,
SelectionDAG &DAG) const {
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
unsigned char OpFlags = Subtarget.classifyGlobalReference(GV);
CodeModel::Model M = DAG.getTarget().getCodeModel();
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
if (OpFlags == X86II::MO_NO_FLAG &&
X86::isOffsetSuitableForCodeModel(Offset, M)) {
// A direct static reference to a global.
Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
Offset = 0;
} else {
Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags);
}
Result = DAG.getNode(getGlobalWrapperKind(GV), dl, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (isGlobalRelativeToPICBase(OpFlags)) {
Result = DAG.getNode(ISD::ADD, dl, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
}
// For globals that require a load from a stub to get the address, emit the
// load.
if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
if (Offset != 0)
Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result,
DAG.getConstant(Offset, dl, PtrVT));
return Result;
}
SDValue
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
return LowerGlobalAddress(GV, SDLoc(Op), Offset, DAG);
}
static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags, bool LocalDynamic = false) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDLoc dl(GA);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(),
OperandFlags);
X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;
if (InFlag) {
SDValue Ops[] = { Chain, TGA, *InFlag };
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
} else {
SDValue Ops[] = { Chain, TGA };
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
}
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
MFI.setAdjustsStack(true);
MFI.setHasCalls(true);
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
static SDValue
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
SDValue InFlag;
SDLoc dl(GA); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), PtrVT), InFlag);
InFlag = Chain.getValue(1);
return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
static SDValue
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
X86::RAX, X86II::MO_TLSGD);
}
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
const EVT PtrVT,
bool is64Bit) {
SDLoc dl(GA);
// Get the start address of the TLS block for this module.
X86MachineFunctionInfo *MFI = DAG.getMachineFunction()
.getInfo<X86MachineFunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
SDValue Base;
if (is64Bit) {
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
X86II::MO_TLSLD, /*LocalDynamic=*/true);
} else {
SDValue InFlag;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag);
InFlag = Chain.getValue(1);
Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
X86II::MO_TLSLDM, /*LocalDynamic=*/true);
}
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
// of Base.
// Build x@dtpoff.
unsigned char OperandFlags = X86II::MO_DTPOFF;
unsigned WrapperKind = X86ISD::Wrapper;
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
// Add x@dtpoff with the base.
return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
}
// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT, TLSModel::Model model,
bool is64Bit, bool isPIC) {
SDLoc dl(GA);
// Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
is64Bit ? 257 : 256));
SDValue ThreadPointer =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
MachinePointerInfo(Ptr));
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
// initialexec.
unsigned WrapperKind = X86ISD::Wrapper;
if (model == TLSModel::LocalExec) {
OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
} else if (model == TLSModel::InitialExec) {
if (is64Bit) {
OperandFlags = X86II::MO_GOTTPOFF;
WrapperKind = X86ISD::WrapperRIP;
} else {
OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF;
}
} else {
llvm_unreachable("Unexpected model");
}
// emit "addl x@ntpoff,%eax" (local exec)
// or "addl x@indntpoff,%eax" (initial exec)
// or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
SDValue TGA =
DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec) {
if (isPIC && !is64Bit) {
Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
Offset);
}
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
}
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
}
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().Options.EmulatedTLS)
return LowerToTLSEmulatedModel(GA, DAG);
const GlobalValue *GV = GA->getGlobal();
auto PtrVT = getPointerTy(DAG.getDataLayout());
bool PositionIndependent = isPositionIndependent();
if (Subtarget.isTargetELF()) {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
if (Subtarget.is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
case TLSModel::LocalDynamic:
return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT,
Subtarget.is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
PositionIndependent);
}
llvm_unreachable("Unknown TLS model.");
}
if (Subtarget.isTargetDarwin()) {
// Darwin only has one model of TLS. Lower to that.
unsigned char OpFlag = 0;
unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ?
X86ISD::WrapperRIP : X86ISD::Wrapper;
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
bool PIC32 = PositionIndependent && !Subtarget.is64Bit();
if (PIC32)
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
OpFlag = X86II::MO_TLVP;
SDLoc DL(Op);
SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
GA->getValueType(0),
GA->getOffset(), OpFlag);
SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC32, the address is actually $g + Offset.
if (PIC32)
Offset = DAG.getNode(ISD::ADD, DL, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
Offset);
// Lowering the machine isd will make sure everything is in the right
// location.
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
SDValue Args[] = { Chain, Offset };
Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
DAG.getIntPtrConstant(0, DL, true),
Chain.getValue(1), DL);
// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setAdjustsStack(true);
// And our return value (tls address) is in the standard call return value
// location.
unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1));
}
if (Subtarget.isTargetKnownWindowsMSVC() ||
Subtarget.isTargetWindowsItanium() ||
Subtarget.isTargetWindowsGNU()) {
// Just use the implicit TLS architecture
// Need to generate something similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
// ; from TEB
// mov ecx, dword [rel _tls_index]: Load index (from C runtime)
// mov rcx, qword [rdx+rcx*8]
// mov eax, .tls$:tlsvar
// [rax+rcx] contains the address
// Windows 64bit: gs:0x58
// Windows 32bit: fs:__tls_array
SDLoc dl(GA);
SDValue Chain = DAG.getEntryNode();
// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
// %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
// use its literal value of 0x2C.
Value *Ptr = Constant::getNullValue(Subtarget.is64Bit()
? Type::getInt8PtrTy(*DAG.getContext(),
256)
: Type::getInt32PtrTy(*DAG.getContext(),
257));
SDValue TlsArray = Subtarget.is64Bit()
? DAG.getIntPtrConstant(0x58, dl)
: (Subtarget.isTargetWindowsGNU()
? DAG.getIntPtrConstant(0x2C, dl)
: DAG.getExternalSymbol("_tls_array", PtrVT));
SDValue ThreadPointer =
DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr));
SDValue res;
if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) {
res = ThreadPointer;
} else {
// Load the _tls_index variable
SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT);
if (Subtarget.is64Bit())
IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX,
MachinePointerInfo(), MVT::i32);
else
IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo());
auto &DL = DAG.getDataLayout();
SDValue Scale =
DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, PtrVT);
IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale);
res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX);
}
res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo());
// Get the offset of start of .tls section
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), X86II::MO_SECREL);
SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset);
}
llvm_unreachable("TLS not implemented for this target.");
}
/// Lower SRA_PARTS and friends, which return two i32 values
/// and take a 2 x i32 value to shift plus a shift amount.
static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
MVT VT = Op.getSimpleValueType();
unsigned VTBits = VT.getSizeInBits();
SDLoc dl(Op);
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
// X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
// generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
// during isel.
SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits - 1, dl, MVT::i8));
SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
DAG.getConstant(VTBits - 1, dl, MVT::i8))
: DAG.getConstant(0, dl, VT);
SDValue Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
} else {
Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
}
// If the shift amount is larger or equal than the width of a part we can't
// rely on the results of shld/shrd. Insert a test and select the appropriate
// values for large shift amounts.
SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits, dl, MVT::i8));
SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
AndNode, DAG.getConstant(0, dl, MVT::i8));
SDValue Hi, Lo;
SDValue CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
if (Op.getOpcode() == ISD::SHL_PARTS) {
Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0);
Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1);
} else {
Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0);
Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1);
}
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
}
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SrcVT.isVector()) {
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
DAG.getUNDEF(SrcVT)));
}
if (SrcVT.getVectorElementType() == MVT::i1) {
if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
}
return SDValue();
}
assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
// These are really Legal; return the operand so the caller accepts it as
// Legal.
if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
return Op;
if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
Subtarget.is64Bit()) {
return Op;
}
SDValue ValueToStore = Op.getOperand(0);
if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
!Subtarget.is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Chain = DAG.getStore(
DAG.getEntryNode(), dl, ValueToStore, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
SDValue StackSlot,
SelectionDAG &DAG) const {
// Build the FILD
SDLoc DL(Op);
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
if (useSSE)
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
unsigned ByteSize = SrcVT.getSizeInBits()/8;
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
MachineMemOperand *MMO;
if (FI) {
int SSFI = FI->getIndex();
MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
MachineMemOperand::MOLoad, ByteSize, ByteSize);
} else {
MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
StackSlot = StackSlot.getOperand(1);
}
SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
X86ISD::FILD, DL,
Tys, Ops, SrcVT, MMO);
if (useSSE) {
Chain = Result.getValue(1);
SDValue InFlag = Result.getValue(2);
// FIXME: Currently the FST is flagged to the FILD_FLAG. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
unsigned SSFISize = Op.getValueSizeInBits()/8;
int SSFI = MF.getFrameInfo().CreateStackObject(SSFISize, SSFISize, false);
auto PtrVT = getPointerTy(MF.getDataLayout());
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
Tys = DAG.getVTList(MVT::Other);
SDValue Ops[] = {
Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
};
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
MachineMemOperand::MOStore, SSFISize, SSFISize);
Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
Ops, Op.getValueType(), MMO);
Result = DAG.getLoad(
Op.getValueType(), DL, Chain, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
}
return Result;
}
/// 64-bit unsigned integer to double expansion.
SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
SelectionDAG &DAG) const {
// This algorithm is not obvious. Here it is what we're trying to output:
/*
movq %rax, %xmm0
punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
#ifdef __SSE3__
haddpd %xmm0, %xmm0
#else
pshufd $0x4e, %xmm0, %xmm1
addpd %xmm1, %xmm0
#endif
*/
SDLoc dl(Op);
LLVMContext *Context = DAG.getContext();
// Build some magic constants.
static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
Constant *C0 = ConstantDataVector::get(*Context, CV0);
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);
SmallVector<Constant*,2> CV1;
CV1.push_back(
ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
APInt(64, 0x4330000000000000ULL))));
CV1.push_back(
ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
// Load the 64-bit value into an XMM register.
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
Op.getOperand(0));
SDValue CLod0 =
DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
/* Alignment = */ 16);
SDValue Unpck1 =
getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0);
SDValue CLod1 =
DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
/* Alignment = */ 16);
SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
// TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
if (Subtarget.hasSSE3()) {
// FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub);
SDValue Shuffle = DAG.getVectorShuffle(MVT::v4i32, dl, S2F, S2F, {2,3,0,1});
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
DAG.getIntPtrConstant(0, dl));
}
/// 32-bit unsigned integer to float expansion.
SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
// FP constant to bias correct the final result.
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
MVT::f64);
// Load the 32-bit value into an XMM register.
SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
Op.getOperand(0));
// Zero out the upper parts of the register.
Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getBitcast(MVT::v2f64, Load),
DAG.getIntPtrConstant(0, dl));
// Or the load with the bias.
SDValue Or = DAG.getNode(
ISD::OR, dl, MVT::v2i64,
DAG.getBitcast(MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)),
DAG.getBitcast(MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias)));
Or =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
// Subtract the bias.
// TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
// Handle final rounding.
MVT DestVT = Op.getSimpleValueType();
if (DestVT.bitsLT(MVT::f64))
return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
DAG.getIntPtrConstant(0, dl));
if (DestVT.bitsGT(MVT::f64))
return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
// Handle final rounding.
return Sub;
}
static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget, SDLoc &DL) {
if (Op.getSimpleValueType() != MVT::v2f64)
return SDValue();
SDValue N0 = Op.getOperand(0);
assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type");
// Legalize to v4i32 type.
N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
DAG.getUNDEF(MVT::v2i32));
if (Subtarget.hasAVX512())
return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0);
// Same implementation as VectorLegalizer::ExpandUINT_TO_FLOAT,
// but using v2i32 to v2f64 with X86ISD::CVTSI2P.
SDValue HalfWord = DAG.getConstant(16, DL, MVT::v4i32);
SDValue HalfWordMask = DAG.getConstant(0x0000FFFF, DL, MVT::v4i32);
// Two to the power of half-word-size.
SDValue TWOHW = DAG.getConstantFP(1 << 16, DL, MVT::v2f64);
// Clear upper part of LO, lower HI.
SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord);
SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask);
SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI);
fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW);
SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO);
// Add the two halves.
return DAG.getNode(ISD::FADD, DL, MVT::v2f64, fHI, fLO);
}
static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// The algorithm is the following:
// #ifdef __SSE4_1__
// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
// (uint4) 0x53000000, 0xaa);
// #else
// uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
// uint4 hi = (v >> 16) | (uint4) 0x53000000;
// #endif
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
// return (float4) lo + fhi;
// We shouldn't use it when unsafe-fp-math is enabled though: we might later
// reassociate the two FADDs, and if we do that, the algorithm fails
// spectacularly (PR24512).
// FIXME: If we ever have some kind of Machine FMF, this should be marked
// as non-fast and always be enabled. Why isn't SDAG FMF enough? Because
// there's also the MachineCombiner reassociations happening on Machine IR.
if (DAG.getTarget().Options.UnsafeFPMath)
return SDValue();
SDLoc DL(Op);
SDValue V = Op->getOperand(0);
MVT VecIntVT = V.getSimpleValueType();
bool Is128 = VecIntVT == MVT::v4i32;
MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
// If we convert to something else than the supported type, e.g., to v4f64,
// abort early.
if (VecFloatVT != Op->getSimpleValueType(0))
return SDValue();
assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) &&
"Unsupported custom type");
// In the #idef/#else code, we have in common:
// - The vector of constants:
// -- 0x4b000000
// -- 0x53000000
// - A shift:
// -- v >> 16
// Create the splat vector for 0x4b000000.
SDValue VecCstLow = DAG.getConstant(0x4b000000, DL, VecIntVT);
// Create the splat vector for 0x53000000.
SDValue VecCstHigh = DAG.getConstant(0x53000000, DL, VecIntVT);
// Create the right shift.
SDValue VecCstShift = DAG.getConstant(16, DL, VecIntVT);
SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift);
SDValue Low, High;
if (Subtarget.hasSSE41()) {
MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow);
SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
// Low will be bitcasted right away, so do not bother bitcasting back to its
// original type.
Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32));
// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
// (uint4) 0x53000000, 0xaa);
SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift);
// High will be bitcasted right away, so do not bother bitcasting back to
// its original type.
High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i32));
} else {
SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT);
// uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask);
Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow);
// uint4 hi = (v >> 16) | (uint4) 0x53000000;
High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh);
}
// Create the vector constant for -(0x1.0p39f + 0x1.0p23f).
SDValue VecCstFAdd = DAG.getConstantFP(
APFloat(APFloat::IEEEsingle(), APInt(32, 0xD3000080)), DL, VecFloatVT);
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
// TODO: Are there any fast-math-flags to propagate here?
SDValue FHigh =
DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
// return (float4) lo + fhi;
SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low);
return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
}
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
MVT SrcVT = N0.getSimpleValueType();
SDLoc dl(Op);
if (SrcVT.getVectorElementType() == MVT::i1) {
if (SrcVT == MVT::v2i1)
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
}
switch (SrcVT.SimpleTy) {
default:
llvm_unreachable("Custom UINT_TO_FP is not supported!");
case MVT::v4i8:
case MVT::v4i16:
case MVT::v8i8:
case MVT::v8i16: {
MVT NVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
}
case MVT::v2i32:
return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl);
case MVT::v4i32:
case MVT::v8i32:
return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget);
case MVT::v16i8:
case MVT::v16i16:
assert(Subtarget.hasAVX512());
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
}
}
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
if (Op.getSimpleValueType().isVector())
return lowerUINT_TO_FP_vec(Op, DAG);
MVT SrcVT = N0.getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
(SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
// Conversions from unsigned i32 to f32/f64 are legal,
// using VCVTUSI2SS/SD. Same for i64 in 64-bit mode.
return Op;
}
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG);
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i32(Op, DAG);
if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
return SDValue();
// Make a 64-bit buffer, and use it to build an FILD.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
if (SrcVT == MVT::i32) {
SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, MachinePointerInfo());
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
OffsetSlot, MachinePointerInfo());
SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
return Fild;
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
SDValue ValueToStore = Op.getOperand(0);
if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore, StackSlot,
MachinePointerInfo());
// For i64 source, we need to add the appropriate power of 2 if the input
// was negative. This is the same as the optimization in
// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
// we must be careful to do the computation in x87 extended precision, not
// in SSE. (The generic code can't know it's OK to do this, or how to.)
int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
MachineMemOperand::MOLoad, 8, 8);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
MVT::i64, MMO);
APInt FF(32, 0x5F800000ULL);
// Check whether the sign bit is set.
SDValue SignSet = DAG.getSetCC(
dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
// Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
SDValue FudgePtr = DAG.getConstantPool(
ConstantInt::get(*DAG.getContext(), FF.zext(64)), PtrVT);
// Get a pointer to FF if the sign bit was set, or to 0 otherwise.
SDValue Zero = DAG.getIntPtrConstant(0, dl);
SDValue Four = DAG.getIntPtrConstant(4, dl);
SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four);
FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset);
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
SDValue Fudge = DAG.getExtLoad(
ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
/* Alignment = */ 4);
// Extend everything to 80 bits to force it to be done on x87.
// TODO: Are there any fast-math-flags to propagate here?
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
DAG.getIntPtrConstant(0, dl));
}
// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
// is legal, or has an fp128 or f16 source (which needs to be promoted to f32),
// just return an <SDValue(), SDValue()> pair.
// Otherwise it is assumed to be a conversion from one of f32, f64 or f80
// to i16, i32 or i64, and we lower it to a legal sequence.
// If lowered to the final integer result we return a <result, SDValue()> pair.
// Otherwise we lower it to a sequence ending with a FIST, return a
// <FIST, StackSlot> pair, and the caller is responsible for loading
// the final integer result from StackSlot.
std::pair<SDValue,SDValue>
X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
bool IsSigned, bool IsReplace) const {
SDLoc DL(Op);
EVT DstTy = Op.getValueType();
EVT TheVT = Op.getOperand(0).getValueType();
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
// f16 must be promoted before using the lowering in this routine.
// fp128 does not use this lowering.
return std::make_pair(SDValue(), SDValue());
}
// If using FIST to compute an unsigned i64, we'll need some fixup
// to handle values above the maximum signed i64. A FIST is always
// used for the 32-bit subtarget, but also for f80 on a 64-bit target.
bool UnsignedFixup = !IsSigned &&
DstTy == MVT::i64 &&
(!Subtarget.is64Bit() ||
!isScalarFPTypeInSSEReg(TheVT));
if (!IsSigned && DstTy != MVT::i64 && !Subtarget.hasAVX512()) {
// Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
// The low 32 bits of the fist result will have the correct uint32 result.
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
DstTy = MVT::i64;
}
assert(DstTy.getSimpleVT() <= MVT::i64 &&
DstTy.getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_INT to lower!");
// These are really Legal.
if (DstTy == MVT::i32 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
if (Subtarget.is64Bit() &&
DstTy == MVT::i64 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
// We lower FP->int64 into FISTP64 followed by a load from a temporary
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
unsigned Opc;
switch (DstTy.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
if (UnsignedFixup) {
//
// Conversion to unsigned i64 is implemented with a select,
// depending on whether the source value fits in the range
// of a signed i64. Let Thresh be the FP equivalent of
// 0x8000000000000000ULL.
//
// Adjust i32 = (Value < Thresh) ? 0 : 0x80000000;
// FistSrc = (Value < Thresh) ? Value : (Value - Thresh);
// Fist-to-mem64 FistSrc
// Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
// to XOR'ing the high 32 bits with Adjust.
//
// Being a power of 2, Thresh is exactly representable in all FP formats.
// For X87 we'd like to use the smallest FP type for this constant, but
// for DAG type consistency we have to match the FP operand type.
APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000));
LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
bool LosesInfo = false;
if (TheVT == MVT::f64)
// The rounding mode is irrelevant as the conversion should be exact.
Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
&LosesInfo);
else if (TheVT == MVT::f80)
Status = Thresh.convert(APFloat::x87DoubleExtended(),
APFloat::rmNearestTiesToEven, &LosesInfo);
assert(Status == APFloat::opOK && !LosesInfo &&
"FP conversion should have been exact");
SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
SDValue Cmp = DAG.getSetCC(DL,
getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TheVT),
Value, ThreshVal, ISD::SETLT);
Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(0x80000000, DL, MVT::i32));
SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TheVT),
Value, ThreshVal, ISD::SETLT);
Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
}
// FIXME This causes a redundant load/store if the SSE-class value is already
// in memory, such as if it is on the callstack.
if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, DL, Value, StackSlot,
MachinePointerInfo::getFixedStack(MF, SSFI));
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDValue Ops[] = {
Chain, StackSlot, DAG.getValueType(TheVT)
};
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
MachineMemOperand::MOLoad, MemSize, MemSize);
Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
}
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
MachineMemOperand::MOStore, MemSize, MemSize);
if (UnsignedFixup) {
// Insert the FIST, load its result as two i32's,
// and XOR the high i32 with Adjust.
SDValue FistOps[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
FistOps, DstTy, MMO);
SDValue Low32 =
DAG.getLoad(MVT::i32, DL, FIST, StackSlot, MachinePointerInfo());
SDValue HighAddr = DAG.getMemBasePlusOffset(StackSlot, 4, DL);
SDValue High32 =
DAG.getLoad(MVT::i32, DL, FIST, HighAddr, MachinePointerInfo());
High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
if (Subtarget.is64Bit()) {
// Join High32 and Low32 into a 64-bit result.
// (High32 << 32) | Low32
Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
DAG.getConstant(32, DL, MVT::i8));
SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
return std::make_pair(Result, SDValue());
}
SDValue ResultOps[] = { Low32, High32 };
SDValue pair = IsReplace
? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps)
: DAG.getMergeValues(ResultOps, DL);
return std::make_pair(pair, SDValue());
} else {
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
Ops, DstTy, MMO);
return std::make_pair(FIST, StackSlot);
}
}
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In);
// Optimize vectors in AVX mode:
//
// v8i16 -> v8i32
// Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
// Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
// Concat upper and lower parts.
//
// v4i32 -> v4i64
// Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
// Concat upper and lower parts.
//
if (((VT != MVT::v16i16) || (InVT != MVT::v16i8)) &&
((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
return SDValue();
if (Subtarget.hasInt256())
return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
SDValue Undef = DAG.getUNDEF(InVT);
bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getBitcast(HVT, OpLo);
OpHi = DAG.getBitcast(HVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc DL(Op);
unsigned NumElts = VT.getVectorNumElements();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1 &&
(NumElts == 8 || NumElts == 16 || Subtarget.hasBWI()))
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
if (InVT.getVectorElementType() != MVT::i1)
return SDValue();
// Extend VT if the target is 256 or 128bit vector and VLX is not supported.
MVT ExtVT = VT;
if (!VT.is512BitVector() && !Subtarget.hasVLX())
ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
SDValue One =
DAG.getConstant(APInt(ExtVT.getScalarSizeInBits(), 1), DL, ExtVT);
SDValue Zero =
DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT);
SDValue SelectedVal = DAG.getSelect(DL, ExtVT, In, One, Zero);
if (VT == ExtVT)
return SelectedVal;
return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal);
}
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
if (Subtarget.hasFp256())
if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
return Res;
return SDValue();
}
static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
if (Subtarget.hasFp256())
if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
return Res;
assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
VT.getVectorNumElements() != SVT.getVectorNumElements());
return SDValue();
}
/// Helper to recursively truncate vector elements in half with PACKSS.
/// It makes use of the fact that vector comparison results will be all-zeros
/// or all-ones to use (vXi8 PACKSS(vYi16, vYi16)) instead of matching types.
/// AVX2 (Int256) sub-targets require extra shuffling as the PACKSS operates
/// within each 128-bit lane.
static SDValue truncateVectorCompareWithPACKSS(EVT DstVT, SDValue In,
const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// Requires SSE2 but AVX512 has fast truncate.
if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
return SDValue();
EVT SrcVT = In.getValueType();
// No truncation required, we might get here due to recursive calls.
if (SrcVT == DstVT)
return In;
// We only support vector truncation to 128bits or greater from a
// 256bits or greater source.
if ((DstVT.getSizeInBits() % 128) != 0)
return SDValue();
if ((SrcVT.getSizeInBits() % 256) != 0)
return SDValue();
unsigned NumElems = SrcVT.getVectorNumElements();
assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
assert(SrcVT.getSizeInBits() > DstVT.getSizeInBits() && "Illegal truncation");
EVT PackedSVT =
EVT::getIntegerVT(*DAG.getContext(), SrcVT.getScalarSizeInBits() / 2);
// Extract lower/upper subvectors.
unsigned NumSubElts = NumElems / 2;
unsigned SrcSizeInBits = SrcVT.getSizeInBits();
SDValue Lo = extractSubVector(In, 0 * NumSubElts, DAG, DL, SrcSizeInBits / 2);
SDValue Hi = extractSubVector(In, 1 * NumSubElts, DAG, DL, SrcSizeInBits / 2);
// 256bit -> 128bit truncate - PACKSS lower/upper 128-bit subvectors.
if (SrcVT.is256BitVector()) {
Lo = DAG.getBitcast(MVT::v8i16, Lo);
Hi = DAG.getBitcast(MVT::v8i16, Hi);
SDValue Res = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, Lo, Hi);
return DAG.getBitcast(DstVT, Res);
}
// AVX2: 512bit -> 256bit truncate - PACKSS lower/upper 256-bit subvectors.
// AVX2: 512bit -> 128bit truncate - PACKSS(PACKSS, PACKSS).
if (SrcVT.is512BitVector() && Subtarget.hasInt256()) {
Lo = DAG.getBitcast(MVT::v16i16, Lo);
Hi = DAG.getBitcast(MVT::v16i16, Hi);
SDValue Res = DAG.getNode(X86ISD::PACKSS, DL, MVT::v32i8, Lo, Hi);
// 256-bit PACKSS(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)),
// so we need to shuffle to get ((LO0,HI0),(LO1,HI1)).
Res = DAG.getBitcast(MVT::v4i64, Res);
Res = DAG.getVectorShuffle(MVT::v4i64, DL, Res, Res, {0, 2, 1, 3});
if (DstVT.is256BitVector())
return DAG.getBitcast(DstVT, Res);
// If 512bit -> 128bit truncate another stage.
EVT PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems);
Res = DAG.getBitcast(PackedVT, Res);
return truncateVectorCompareWithPACKSS(DstVT, Res, DL, DAG, Subtarget);
}
// Recursively pack lower/upper subvectors, concat result and pack again.
assert(SrcVT.getSizeInBits() >= 512 && "Expected 512-bit vector or greater");
EVT PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems / 2);
Lo = truncateVectorCompareWithPACKSS(PackedVT, Lo, DL, DAG, Subtarget);
Hi = truncateVectorCompareWithPACKSS(PackedVT, Hi, DL, DAG, Subtarget);
PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
return truncateVectorCompareWithPACKSS(DstVT, Res, DL, DAG, Subtarget);
}
static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT InVT = In.getSimpleValueType();
assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type.");
// Shift LSB to MSB and use VPMOVB/W2M or TESTD/Q.
unsigned ShiftInx = InVT.getScalarSizeInBits() - 1;
if (InVT.getScalarSizeInBits() <= 16) {
if (Subtarget.hasBWI()) {
// legal, will go to VPMOVB2M, VPMOVW2M
// Shift packed bytes not supported natively, bitcast to word
MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
DAG.getBitcast(ExtVT, In),
DAG.getConstant(ShiftInx, DL, ExtVT));
ShiftNode = DAG.getBitcast(InVT, ShiftNode);
return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
}
// Use TESTD/Q, extended vector to packed dword/qword.
assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
"Unexpected vector type.");
unsigned NumElts = InVT.getVectorNumElements();
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
InVT = ExtVT;
ShiftInx = InVT.getScalarSizeInBits() - 1;
}
SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
DAG.getConstant(ShiftInx, DL, InVT));
return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode);
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT InVT = In.getSimpleValueType();
if (VT == MVT::i1) {
assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) &&
"Invalid scalar TRUNCATE operation");
if (InVT.getSizeInBits() >= 32)
return SDValue();
In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
return DAG.getNode(ISD::TRUNCATE, DL, VT, In);
}
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
if (VT.getVectorElementType() == MVT::i1)
return LowerTruncateVecI1(Op, DAG, Subtarget);
// vpmovqb/w/d, vpmovdb/w, vpmovwb
if (Subtarget.hasAVX512()) {
// word to byte only under BWI
if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) // v16i16 -> v16i8
return DAG.getNode(X86ISD::VTRUNC, DL, VT,
getExtendInVec(X86ISD::VSEXT, DL, MVT::v16i32, In, DAG));
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
}
// Truncate with PACKSS if we are truncating a vector zero/all-bits result.
if (InVT.getScalarSizeInBits() == DAG.ComputeNumSignBits(In))
if (SDValue V = truncateVectorCompareWithPACKSS(VT, In, DL, DAG, Subtarget))
return V;
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget.hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
In = DAG.getBitcast(MVT::v8i32, In);
In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
DAG.getIntPtrConstant(0, DL));
}
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0, DL));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(2, DL));
OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
static const int ShufMask[] = {0, 2, 4, 6};
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask);
}
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
// On AVX2, v8i32 -> v8i16 becomes PSHUFB.
if (Subtarget.hasInt256()) {
In = DAG.getBitcast(MVT::v32i8, In);
// The PSHUFB mask:
static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1,
16, 17, 20, 21, 24, 25, 28, 29,
-1, -1, -1, -1, -1, -1, -1, -1 };
In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
In = DAG.getBitcast(MVT::v4i64, In);
static const int ShufMask2[] = {0, 2, -1, -1};
In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0, DL));
return DAG.getBitcast(VT, In);
}
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(0, DL));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(4, DL));
OpLo = DAG.getBitcast(MVT::v16i8, OpLo);
OpHi = DAG.getBitcast(MVT::v16i8, OpHi);
// The PSHUFB mask:
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1};
OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1);
OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1);
OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
// The MOVLHPS Mask:
static const int ShufMask2[] = {0, 1, 4, 5};
SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
return DAG.getBitcast(MVT::v8i16, res);
}
// Handle truncation of V256 to V128 using shuffles.
if (!VT.is128BitVector() || !InVT.is256BitVector())
return SDValue();
assert(Subtarget.hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2);
SmallVector<int, 16> MaskVec(NumElems * 2, -1);
// Prepare truncation shuffle mask
for (unsigned i = 0; i != NumElems; ++i)
MaskVec[i] = i * 2;
In = DAG.getBitcast(NVT, In);
SDValue V = DAG.getVectorShuffle(NVT, DL, In, In, MaskVec);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
DAG.getIntPtrConstant(0, DL));
}
SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
MVT VT = Op.getSimpleValueType();
if (VT.isVector()) {
assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
SDValue Src = Op.getOperand(0);
SDLoc dl(Op);
if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32)));
}
return SDValue();
}
assert(!VT.isVector());
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
IsSigned, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
if (!FIST.getNode())
return Op;
if (StackSlot.getNode())
// Load the result.
return DAG.getLoad(VT, SDLoc(Op), FIST, StackSlot, MachinePointerInfo());
// The node is the result.
return FIST;
}
static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
return DAG.getNode(X86ISD::VFPEXT, DL, VT,
DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
In, DAG.getUNDEF(SVT)));
}
/// The only differences between FABS and FNEG are the mask and the logic op.
/// FNEG also has a folding opportunity for FNEG(FABS(x)).
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) &&
"Wrong opcode for lowering FABS or FNEG.");
bool IsFABS = (Op.getOpcode() == ISD::FABS);
// If this is a FABS and it has an FNEG user, bail out to fold the combination
// into an FNABS. We'll lower the FABS after that if it is still in use.
if (IsFABS)
for (SDNode *User : Op->uses())
if (User->getOpcode() == ISD::FNEG)
return Op;
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
bool IsF128 = (VT == MVT::f128);
// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
// decide if we should generate a 16-byte constant mask when we only need 4 or
// 8 bytes for the scalar case.
MVT LogicVT;
MVT EltVT;
if (VT.isVector()) {
LogicVT = VT;
EltVT = VT.getVectorElementType();
} else if (IsF128) {
// SSE instructions are used for optimized f128 logical operations.
LogicVT = MVT::f128;
EltVT = VT;
} else {
// There are no scalar bitwise logical SSE/AVX instructions, so we
// generate a 16-byte vector constant and logic op even for the scalar case.
// Using a 16-byte mask allows folding the load of the mask with
// the logic op, so it can save (~4 bytes) on code size.
LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
EltVT = VT;
}
unsigned EltBits = EltVT.getSizeInBits();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
APInt MaskElt =
IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
const fltSemantics &Sem =
EltVT == MVT::f64 ? APFloat::IEEEdouble() :
(IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
unsigned LogicOp =
IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
if (VT.isVector() || IsF128)
return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
// For the scalar case extend to a 128-bit vector, perform the logic op,
// and extract the scalar result back out.
Operand = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Operand);
SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode,
DAG.getIntPtrConstant(0, dl));
}
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue Mag = Op.getOperand(0);
SDValue Sign = Op.getOperand(1);
SDLoc dl(Op);
// If the sign operand is smaller, extend it first.
MVT VT = Op.getSimpleValueType();
if (Sign.getSimpleValueType().bitsLT(VT))
Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign);
// And if it is bigger, shrink it first.
if (Sign.getSimpleValueType().bitsGT(VT))
Sign = DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(1, dl));
// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
bool IsF128 = (VT == MVT::f128);
assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 ||
VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
"Unexpected type in LowerFCOPYSIGN");
MVT EltVT = VT.getScalarType();
const fltSemantics &Sem =
EltVT == MVT::f64 ? APFloat::IEEEdouble()
: (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
// Perform all scalar logic operations as 16-byte vectors because there are no
// scalar FP logic instructions in SSE.
// TODO: This isn't necessary. If we used scalar types, we might avoid some
// unnecessary splats, but we might miss load folding opportunities. Should
// this decision be based on OptimizeForSize?
bool IsFakeVector = !VT.isVector() && !IsF128;
MVT LogicVT = VT;
if (IsFakeVector)
LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
// The mask constants are automatically splatted for vector types.
unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDValue SignMask = DAG.getConstantFP(
APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
SDValue MagMask = DAG.getConstantFP(
APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
// First, clear all bits but the sign bit from the second operand (sign).
if (IsFakeVector)
Sign = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Sign);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Sign, SignMask);
// Next, clear the sign bit from the first operand (magnitude).
// TODO: If we had general constant folding for FP logic ops, this check
// wouldn't be necessary.
SDValue MagBits;
if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Mag)) {
APFloat APF = Op0CN->getValueAPF();
APF.clearSign();
MagBits = DAG.getConstantFP(APF, dl, LogicVT);
} else {
// If the magnitude operand wasn't a constant, we need to AND out the sign.
if (IsFakeVector)
Mag = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Mag);
MagBits = DAG.getNode(X86ISD::FAND, dl, LogicVT, Mag, MagMask);
}
// OR the magnitude value with the sign bit.
SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit);
return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or,
DAG.getIntPtrConstant(0, dl));
}
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT OpVT = N0.getSimpleValueType();
assert((OpVT == MVT::f32 || OpVT == MVT::f64) &&
"Unexpected type for FGETSIGN");
// Lower ISD::FGETSIGN to (AND (X86ISD::MOVMSK ...) 1).
MVT VecVT = (OpVT == MVT::f32 ? MVT::v4f32 : MVT::v2f64);
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, N0);
Res = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32, Res);
Res = DAG.getZExtOrTrunc(Res, dl, VT);
Res = DAG.getNode(ISD::AND, dl, VT, Res, DAG.getConstant(1, dl, VT));
return Res;
}
// Check whether an OR'd tree is PTEST-able.
static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget.hasSSE41())
return SDValue();
if (!Op->hasOneUse())
return SDValue();
SDNode *N = Op.getNode();
SDLoc DL(N);
SmallVector<SDValue, 8> Opnds;
DenseMap<SDValue, unsigned> VecInMap;
SmallVector<SDValue, 8> VecIns;
EVT VT = MVT::Other;
// Recognize a special case where a vector is casted into wide integer to
// test all 0s.
Opnds.push_back(N->getOperand(0));
Opnds.push_back(N->getOperand(1));
for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot;
// BFS traverse all OR'd operands.
if (I->getOpcode() == ISD::OR) {
Opnds.push_back(I->getOperand(0));
Opnds.push_back(I->getOperand(1));
// Re-evaluate the number of nodes to be traversed.
e += 2; // 2 more nodes (LHS and RHS) are pushed.
continue;
}
// Quit if a non-EXTRACT_VECTOR_ELT
if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
// Quit if without a constant index.
SDValue Idx = I->getOperand(1);
if (!isa<ConstantSDNode>(Idx))
return SDValue();
SDValue ExtractedFromVec = I->getOperand(0);
DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec);
if (M == VecInMap.end()) {
VT = ExtractedFromVec.getValueType();
// Quit if not 128/256-bit vector.
if (!VT.is128BitVector() && !VT.is256BitVector())
return SDValue();
// Quit if not the same type.
if (VecInMap.begin() != VecInMap.end() &&
VT != VecInMap.begin()->first.getValueType())
return SDValue();
M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first;
VecIns.push_back(ExtractedFromVec);
}
M->second |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
}
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Not extracted from 128-/256-bit vector.");
unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;
for (DenseMap<SDValue, unsigned>::const_iterator
I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) {
// Quit if not all elements are used.
if (I->second != FullMask)
return SDValue();
}
MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
// Cast all vectors into TestVT for PTEST.
for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]);
// If more than one full vector is evaluated, OR them first before PTEST.
for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
// Each iteration will OR 2 nodes and append the result until there is only
// 1 node left, i.e. the final OR'd value of all vectors.
SDValue LHS = VecIns[Slot];
SDValue RHS = VecIns[Slot + 1];
VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
}
return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back());
}
/// \brief return true if \c Op has a use that doesn't just read flags.
static bool hasNonFlagsUse(SDValue Op) {
for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE;
++UI) {
SDNode *User = *UI;
unsigned UOpNo = UI.getOperandNo();
if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
// Look pass truncate.
UOpNo = User->use_begin().getOperandNo();
User = *User->use_begin();
}
if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC &&
!(User->getOpcode() == ISD::SELECT && UOpNo == 0))
return true;
}
return false;
}
// Emit KTEST instruction for bit vectors on AVX-512
static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (Op.getOpcode() == ISD::BITCAST) {
auto hasKTEST = [&](MVT VT) {
unsigned SizeInBits = VT.getSizeInBits();
return (Subtarget.hasDQI() && (SizeInBits == 8 || SizeInBits == 16)) ||
(Subtarget.hasBWI() && (SizeInBits == 32 || SizeInBits == 64));
};
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = Op0.getValueType().getSimpleVT();
if (Op0VT.isVector() && Op0VT.getVectorElementType() == MVT::i1 &&
hasKTEST(Op0VT))
return DAG.getNode(X86ISD::KTEST, SDLoc(Op), Op0VT, Op0, Op0);
}
return SDValue();
}
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1) {
SDValue ExtOp = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Op);
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, ExtOp,
DAG.getConstant(0, dl, MVT::i8));
}
// CF and OF aren't always set the way we want. Determine which
// of these we need.
bool NeedCF = false;
bool NeedOF = false;
switch (X86CC) {
default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
NeedCF = true;
break;
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
case X86::COND_O: case X86::COND_NO: {
// Check if we really need to set the
// Overflow flag. If NoSignedWrap is present
// that is not actually needed.
switch (Op->getOpcode()) {
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::SHL:
if (Op.getNode()->getFlags().hasNoSignedWrap())
break;
LLVM_FALLTHROUGH;
default:
NeedOF = true;
break;
}
break;
}
}
// See if we can use the EFLAGS value from the operand instead of
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
if (Op.getResNo() != 0 || NeedOF || NeedCF) {
// Emit KTEST for bit vectors
if (auto Node = EmitKTEST(Op, DAG, Subtarget))
return Node;
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, dl, Op.getValueType()));
}
unsigned Opcode = 0;
unsigned NumOperands = 0;
// Truncate operations may prevent the merge of the SETCC instruction
// and the arithmetic instruction before it. Attempt to truncate the operands
// of the arithmetic instruction and use a reduced bit-width instruction.
bool NeedTruncation = false;
SDValue ArithOp = Op;
if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
SDValue Arith = Op->getOperand(0);
// Both the trunc and the arithmetic op need to have one user each.
if (Arith->hasOneUse())
switch (Arith.getOpcode()) {
default: break;
case ISD::ADD:
case ISD::SUB:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
NeedTruncation = true;
ArithOp = Arith;
}
}
}
// Sometimes flags can be set either with an AND or with an SRL/SHL
// instruction. SRL/SHL variant should be preferred for masks longer than this
// number of bits.
const int ShiftToAndMaxMaskWidth = 32;
const bool ZeroCheck = (X86CC == X86::COND_E || X86CC == X86::COND_NE);
// NOTICE: In the code below we use ArithOp to hold the arithmetic operation
// which may be the result of a CAST. We use the variable 'Op', which is the
// non-casted variable when we check for possible users.
switch (ArithOp.getOpcode()) {
case ISD::ADD:
// Due to an isel shortcoming, be conservative if this add is likely to be
// selected as part of a load-modify-store instruction. When the root node
// in a match is a store, isel doesn't know how to remap non-chain non-flag
// uses of other nodes in the match, such as the ADD in this case. This
// leads to the ADD being left around and reselected, with the result being
// two adds in the output. Alas, even if none our users are stores, that
// doesn't prove we're O.K. Ergo, if we have any parents that aren't
// CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
// climbing the DAG back to the root, and it doesn't seem to be worth the
// effort.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() != ISD::CopyToReg &&
UI->getOpcode() != ISD::SETCC &&
UI->getOpcode() != ISD::STORE)
goto default_case;
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(ArithOp.getOperand(1))) {
// An add of one will be selected as an INC.
if (C->isOne() && !Subtarget.slowIncDec()) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}
// An add of negative one (subtract of one) will be selected as a DEC.
if (C->isAllOnesValue() && !Subtarget.slowIncDec()) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
}
}
// Otherwise use a regular EFLAGS-setting add.
Opcode = X86ISD::ADD;
NumOperands = 2;
break;
case ISD::SHL:
case ISD::SRL:
// If we have a constant logical shift that's only used in a comparison
// against zero turn it into an equivalent AND. This allows turning it into
// a TEST instruction later.
if (ZeroCheck && Op->hasOneUse() &&
isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
unsigned ShAmt = Op->getConstantOperandVal(1);
if (ShAmt >= BitWidth) // Avoid undefined shifts.
break;
APInt Mask = ArithOp.getOpcode() == ISD::SRL
? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
: APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
if (!Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
break;
Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
DAG.getConstant(Mask, dl, VT));
}
break;
case ISD::AND:
// If the primary 'and' result isn't used, don't bother using X86ISD::AND,
// because a TEST instruction will be better. However, AND should be
// preferred if the instruction can be combined into ANDN.
if (!hasNonFlagsUse(Op)) {
SDValue Op0 = ArithOp->getOperand(0);
SDValue Op1 = ArithOp->getOperand(1);
EVT VT = ArithOp.getValueType();
bool isAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1);
bool isLegalAndnType = VT == MVT::i32 || VT == MVT::i64;
bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI();
// If we cannot select an ANDN instruction, check if we can replace
// AND+IMM64 with a shift before giving up. This is possible for masks
// like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag.
if (!isProperAndn) {
if (!ZeroCheck)
break;
assert(!isa<ConstantSDNode>(Op0) && "AND node isn't canonicalized");
auto *CN = dyn_cast<ConstantSDNode>(Op1);
if (!CN)
break;
const APInt &Mask = CN->getAPIntValue();
if (Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
break; // Prefer TEST instruction.
unsigned BitWidth = Mask.getBitWidth();
unsigned LeadingOnes = Mask.countLeadingOnes();
unsigned TrailingZeros = Mask.countTrailingZeros();
if (LeadingOnes + TrailingZeros == BitWidth) {
assert(TrailingZeros < VT.getSizeInBits() &&
"Shift amount should be less than the type width");
MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
SDValue ShAmt = DAG.getConstant(TrailingZeros, dl, ShTy);
Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt);
break;
}
unsigned LeadingZeros = Mask.countLeadingZeros();
unsigned TrailingOnes = Mask.countTrailingOnes();
if (LeadingZeros + TrailingOnes == BitWidth) {
assert(LeadingZeros < VT.getSizeInBits() &&
"Shift amount should be less than the type width");
MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
SDValue ShAmt = DAG.getConstant(LeadingZeros, dl, ShTy);
Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt);
break;
}
break;
}
}
LLVM_FALLTHROUGH;
case ISD::SUB:
case ISD::OR:
case ISD::XOR:
// Due to the ISEL shortcoming noted above, be conservative if this op is
// likely to be selected as part of a load-modify-store instruction.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() == ISD::STORE)
goto default_case;
// Otherwise use a regular EFLAGS-setting instruction.
switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
case ISD::OR: {
if (!NeedTruncation && ZeroCheck) {
if (SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG))
return EFLAGS;
}
Opcode = X86ISD::OR;
break;
}
}
NumOperands = 2;
break;
case X86ISD::ADD:
case X86ISD::SUB:
case X86ISD::INC:
case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
return SDValue(Op.getNode(), 1);
default:
default_case:
break;
}
// If we found that truncation is beneficial, perform the truncation and
// update 'Op'.
if (NeedTruncation) {
EVT VT = Op.getValueType();
SDValue WideVal = Op->getOperand(0);
EVT WideVT = WideVal.getValueType();
unsigned ConvertedOp = 0;
// Use a target machine opcode to prevent further DAGCombine
// optimizations that may separate the arithmetic operations
// from the setcc node.
switch (WideVal.getOpcode()) {
default: break;
case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
case ISD::AND: ConvertedOp = X86ISD::AND; break;
case ISD::OR: ConvertedOp = X86ISD::OR; break;
case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
}
if (ConvertedOp) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
}
}
}
if (Opcode == 0) {
// Emit KTEST for bit vectors
if (auto Node = EmitKTEST(Op, DAG, Subtarget))
return Node;
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, dl, Op.getValueType()));
}
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands);
SDValue New = DAG.getNode(Opcode, dl, VTs, Ops);
DAG.ReplaceAllUsesWith(Op, New);
return SDValue(New.getNode(), 1);
}
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
/// equivalent.
SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
const SDLoc &dl, SelectionDAG &DAG) const {
if (isNullConstant(Op1))
return EmitTest(Op0, X86CC, dl, DAG);
assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&
"Unexpected comparison operation for MVT::i1 operands");
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
// Only promote the compare up to I32 if it is a 16 bit operation
// with an immediate. 16 bit immediates are to be avoided.
if ((Op0.getValueType() == MVT::i16 &&
(isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1))) &&
!DAG.getMachineFunction().getFunction()->optForMinSize() &&
!Subtarget.isAtom()) {
unsigned ExtendOp =
isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0);
Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1);
}
// Use SUB instead of CMP to enable CSE between SUB and CMP.
SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
Op0, Op1);
return SDValue(Sub.getNode(), 1);
}
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
/// Convert a comparison if required by the subtarget.
SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
SelectionDAG &DAG) const {
// If the subtarget does not support the FUCOMI instruction, floating-point
// comparisons have to be converted.
if (Subtarget.hasCMov() ||
Cmp.getOpcode() != X86ISD::CMP ||
!Cmp.getOperand(0).getValueType().isFloatingPoint() ||
!Cmp.getOperand(1).getValueType().isFloatingPoint())
return Cmp;
// The instruction selector will select an FUCOM instruction instead of
// FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
// build an SDNode sequence that transfers the result from FPSW into EFLAGS:
// (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8))))
SDLoc dl(Cmp);
SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
DAG.getConstant(8, dl, MVT::i8));
SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
// Some 64-bit targets lack SAHF support, but they do support FCOMI.
assert(Subtarget.hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
/// Check if replacement of SQRT with RSQRT should be disabled.
bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// We never want to use both SQRT and RSQRT instructions for the same input.
if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
return false;
if (VT.isVector())
return Subtarget.hasFastVectorFSQRT();
return Subtarget.hasFastScalarFSQRT();
}
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
SelectionDAG &DAG, int Enabled,
int &RefinementSteps,
bool &UseOneConstNR,
bool Reciprocal) const {
EVT VT = Op.getValueType();
// SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
// rsqrt estimate with refinement on x86 prior to FMA requires at least 16
// instructions: convert to single, rsqrtss, convert back to double, refine
// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
(VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
(VT == MVT::v8f32 && Subtarget.hasAVX())) {
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = 1;
UseOneConstNR = false;
return DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
}
return SDValue();
}
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
int Enabled,
int &RefinementSteps) const {
EVT VT = Op.getValueType();
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
// reciprocal estimate with refinement on x86 prior to FMA requires
// 15 instructions: convert to single, rcpss, convert back to double, refine
// (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
(VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
(VT == MVT::v8f32 && Subtarget.hasAVX())) {
// Enable estimate codegen with 1 refinement step for vector division.
// Scalar division estimates are disabled because they break too much
// real-world code. These defaults are intended to match GCC behavior.
if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified)
return SDValue();
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = 1;
return DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
}
return SDValue();
}
/// If we have at least two divisions that use the same divisor, convert to
/// multiplication by a reciprocal. This may need to be adjusted for a given
/// CPU if a division's cost is not at least twice the cost of a multiplication.
/// This is because we still need one division to calculate the reciprocal and
/// then we need two multiplies by that reciprocal as replacements for the
/// original divisions.
unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
return 2;
}
/// Helper for creating a X86ISD::SETCC node.
static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
SelectionDAG &DAG) {
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(Cond, dl, MVT::i8), EFLAGS);
}
/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition
/// according to equal/not-equal condition code \p CC.
static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
// If Src is i8, promote it to i32 with any_extend. There is no i8 BT
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i32 value is ok. We extend to i32 because
// the encoding for the i16 version is larger than the i32 version.
// Also promote i16 to i32 for performance / code size reason.
if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
// See if we can use the 32-bit instruction instead of the 64-bit one for a
// shorter encoding. Since the former takes the modulo 32 of BitNo and the
// latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
// known to be zero.
if (Src.getValueType() == MVT::i64 &&
DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
// If the operand types disagree, extend the shift amount to match. Since
// BT ignores high bits (like shifts) we can use anyextend.
if (Src.getValueType() != BitNo.getValueType())
BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
return getSETCC(Cond, BT, dl , DAG);
}
/// Result of 'and' is compared against zero. Change to a BT node if possible.
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
if (Op1.getOpcode() == ISD::TRUNCATE)
Op1 = Op1.getOperand(0);
SDValue LHS, RHS;
if (Op1.getOpcode() == ISD::SHL)
std::swap(Op0, Op1);
if (Op0.getOpcode() == ISD::SHL) {
if (isOneConstant(Op0.getOperand(0))) {
// If we looked past a truncate, check that it's only truncating away
// known zeros.
unsigned BitWidth = Op0.getValueSizeInBits();
unsigned AndBitWidth = And.getValueSizeInBits();
if (BitWidth > AndBitWidth) {
KnownBits Known;
DAG.computeKnownBits(Op0, Known);
if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth)
return SDValue();
}
LHS = Op1;
RHS = Op0.getOperand(1);
}
} else if (Op1.getOpcode() == ISD::Constant) {
ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
uint64_t AndRHSVal = AndRHS->getZExtValue();
SDValue AndLHS = Op0;
if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
LHS = AndLHS.getOperand(0);
RHS = AndLHS.getOperand(1);
}
// Use BT if the immediate can't be encoded in a TEST instruction.
if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
LHS = AndLHS;
RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, LHS.getValueType());
}
}
if (LHS.getNode())
return getBitTestCondition(LHS, RHS, CC, dl, DAG);
return SDValue();
}
// Convert (truncate (srl X, N) to i1) to (bt X, N)
static SDValue LowerTruncateToBT(SDValue Op, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1 &&
"Expected TRUNCATE to i1 node");
if (Op.getOperand(0).getOpcode() != ISD::SRL)
return SDValue();
SDValue ShiftRight = Op.getOperand(0);
return getBitTestCondition(ShiftRight.getOperand(0), ShiftRight.getOperand(1),
CC, dl, DAG);
}
/// Result of 'and' or 'trunc to i1' is compared against zero.
/// Change to a BT node if possible.
SDValue X86TargetLowering::LowerToBT(SDValue Op, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) const {
if (Op.getOpcode() == ISD::AND)
return LowerAndToBT(Op, CC, dl, DAG);
if (Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1)
return LowerTruncateToBT(Op, CC, dl, DAG);
return SDValue();
}
/// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
/// CMPs.
static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
SDValue &Op1) {
unsigned SSECC;
bool Swap = false;
// SSE Condition code mapping:
// 0 - EQ
// 1 - LT
// 2 - LE
// 3 - UNORD
// 4 - NEQ
// 5 - NLT
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
case ISD::SETGT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETLT:
case ISD::SETOLT: SSECC = 1; break;
case ISD::SETOGE:
case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETLE:
case ISD::SETOLE: SSECC = 2; break;
case ISD::SETUO: SSECC = 3; break;
case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGE: SSECC = 5; break;
case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
case ISD::SETUEQ:
case ISD::SETONE: SSECC = 8; break;
}
if (Swap)
std::swap(Op0, Op1);
return SSECC;
}
/// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then
/// concatenate the result back.
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
SDLoc dl(Op);
SDValue CC = Op.getOperand(2);
// Extract the LHS vectors
SDValue LHS = Op.getOperand(0);
SDValue LHS1 = extract128BitVector(LHS, 0, DAG, dl);
SDValue LHS2 = extract128BitVector(LHS, NumElems / 2, DAG, dl);
// Extract the RHS vectors
SDValue RHS = Op.getOperand(1);
SDValue RHS1 = extract128BitVector(RHS, 0, DAG, dl);
SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl);
// Issue the operation on the smaller types and concatenate the result back
MVT EltVT = VT.getVectorElementType();
MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
assert(Op0.getSimpleValueType().getVectorElementType() == MVT::i1 &&
"Unexpected type for boolean compare operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0,
DAG.getConstant(-1, dl, VT));
SDValue NotOp1 = DAG.getNode(ISD::XOR, dl, VT, Op1,
DAG.getConstant(-1, dl, VT));
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETEQ:
// (x == y) -> ~(x ^ y)
return DAG.getNode(ISD::XOR, dl, VT,
DAG.getNode(ISD::XOR, dl, VT, Op0, Op1),
DAG.getConstant(-1, dl, VT));
case ISD::SETNE:
// (x != y) -> (x ^ y)
return DAG.getNode(ISD::XOR, dl, VT, Op0, Op1);
case ISD::SETUGT:
case ISD::SETGT:
// (x > y) -> (x & ~y)
return DAG.getNode(ISD::AND, dl, VT, Op0, NotOp1);
case ISD::SETULT:
case ISD::SETLT:
// (x < y) -> (~x & y)
return DAG.getNode(ISD::AND, dl, VT, NotOp0, Op1);
case ISD::SETULE:
case ISD::SETLE:
// (x <= y) -> (~x | y)
return DAG.getNode(ISD::OR, dl, VT, NotOp0, Op1);
case ISD::SETUGE:
case ISD::SETGE:
// (x >=y) -> (x | ~y)
return DAG.getNode(ISD::OR, dl, VT, Op0, NotOp1);
}
}
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
assert(VT.getVectorElementType() == MVT::i1 &&
"Cannot set masked compare for this operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
unsigned Opc = 0;
bool Unsigned = false;
bool Swap = false;
unsigned SSECC;
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: SSECC = 4; break;
case ISD::SETEQ: Opc = X86ISD::PCMPEQM; break;
case ISD::SETUGT: SSECC = 6; Unsigned = true; break;
case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGT: Opc = X86ISD::PCMPGTM; break;
case ISD::SETULT: SSECC = 1; Unsigned = true; break;
case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT
case ISD::SETGE: Swap = true; SSECC = 2; break; // LE + swap
case ISD::SETULE: Unsigned = true; LLVM_FALLTHROUGH;
case ISD::SETLE: SSECC = 2; break;
}
if (Swap)
std::swap(Op0, Op1);
if (Opc)
return DAG.getNode(Opc, dl, VT, Op0, Op1);
Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, dl, MVT::i8));
}
/// \brief Try to turn a VSETULT into a VSETULE by modifying its second
/// operand \p Op1. If non-trivial (for example because it's not constant)
/// return an empty value.
static SDValue ChangeVSETULTtoVSETULE(const SDLoc &dl, SDValue Op1,
SelectionDAG &DAG) {
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1.getNode());
if (!BV)
return SDValue();
MVT VT = Op1.getSimpleValueType();
MVT EVT = VT.getVectorElementType();
unsigned n = VT.getVectorNumElements();
SmallVector<SDValue, 8> ULTOp1;
for (unsigned i = 0; i < n; ++i) {
ConstantSDNode *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EVT)
return SDValue();
// Avoid underflow.
APInt Val = Elt->getAPIntValue();
if (Val == 0)
return SDValue();
ULTOp1.push_back(DAG.getConstant(Val - 1, dl, EVT));
}
return DAG.getBuildVector(VT, dl, ULTOp1);
}
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
SDLoc dl(Op);
if (isFP) {
#ifndef NDEBUG
MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
unsigned Opc;
if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) {
assert(VT.getVectorNumElements() <= 16);
Opc = X86ISD::CMPM;
} else {
Opc = X86ISD::CMPP;
// The SSE/AVX packed FP comparison nodes are defined with a
// floating-point vector result that matches the operand type. This allows
// them to work with an SSE1 target (integer vector types are not legal).
VT = Op0.getSimpleValueType();
}
// In the two cases not handled by SSE compare predicates (SETUEQ/SETONE),
// emit two comparisons and a logic op to tie them together.
// TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is
// available.
SDValue Cmp;
unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1);
if (SSECC == 8) {
// LLVM predicate is SETUEQ or SETONE.
unsigned CC0, CC1;
unsigned CombineOpc;
if (Cond == ISD::SETUEQ) {
CC0 = 3; // UNORD
CC1 = 0; // EQ
CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FOR) :
static_cast<unsigned>(ISD::OR);
} else {
assert(Cond == ISD::SETONE);
CC0 = 7; // ORD
CC1 = 4; // NEQ
CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FAND) :
static_cast<unsigned>(ISD::AND);
}
SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC0, dl, MVT::i8));
SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC1, dl, MVT::i8));
Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
} else {
// Handle all other FP comparisons here.
Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, dl, MVT::i8));
}
// If this is SSE/AVX CMPP, bitcast the result back to integer to match the
// result type of SETCC. The bitcast is expected to be optimized away
// during combining/isel.
if (Opc == X86ISD::CMPP)
Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
return Cmp;
}
MVT VTOp0 = Op0.getSimpleValueType();
assert(VTOp0 == Op1.getSimpleValueType() &&
"Expected operands with same type!");
assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() &&
"Invalid number of packed elements for source and destination!");
if (VT.is128BitVector() && VTOp0.is256BitVector()) {
// On non-AVX512 targets, a vector of MVT::i1 is promoted by the type
// legalizer to a wider vector type. In the case of 'vsetcc' nodes, the
// legalizer firstly checks if the first operand in input to the setcc has
// a legal type. If so, then it promotes the return type to that same type.
// Otherwise, the return type is promoted to the 'next legal type' which,
// for a vector of MVT::i1 is always a 128-bit integer vector type.
//
// We reach this code only if the following two conditions are met:
// 1. Both return type and operand type have been promoted to wider types
// by the type legalizer.
// 2. The original operand type has been promoted to a 256-bit vector.
//
// Note that condition 2. only applies for AVX targets.
SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, Cond);
return DAG.getZExtOrTrunc(NewOp, dl, VT);
}
// The non-AVX512 code below works under the assumption that source and
// destination types are the same.
assert((Subtarget.hasAVX512() || (VT == VTOp0)) &&
"Value types for source and destination must be the same!");
// Break 256-bit integer vector compare into smaller ones.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntVSETCC(Op, DAG);
// Operands are boolean (vectors of i1)
MVT OpVT = Op1.getSimpleValueType();
if (OpVT.getVectorElementType() == MVT::i1)
return LowerBoolVSETCC_AVX512(Op, DAG);
// The result is boolean, but operands are int/float
if (VT.getVectorElementType() == MVT::i1) {
// In AVX-512 architecture setcc returns mask with i1 elements,
// But there is no compare instruction for i8 and i16 elements in KNL.
// In this case use SSE compare
bool UseAVX512Inst =
(OpVT.is512BitVector() ||
OpVT.getScalarSizeInBits() >= 32 ||
(Subtarget.hasBWI() && Subtarget.hasVLX()));
if (UseAVX512Inst)
return LowerIntVSETCC_AVX512(Op, DAG);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
}
// Lower using XOP integer comparisons.
if ((VT == MVT::v16i8 || VT == MVT::v8i16 ||
VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) {
// Translate compare code to XOP PCOM compare mode.
unsigned CmpMode = 0;
switch (Cond) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETULT:
case ISD::SETLT: CmpMode = 0x00; break;
case ISD::SETULE:
case ISD::SETLE: CmpMode = 0x01; break;
case ISD::SETUGT:
case ISD::SETGT: CmpMode = 0x02; break;
case ISD::SETUGE:
case ISD::SETGE: CmpMode = 0x03; break;
case ISD::SETEQ: CmpMode = 0x04; break;
case ISD::SETNE: CmpMode = 0x05; break;
}
// Are we comparing unsigned or signed integers?
unsigned Opc =
ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CmpMode, dl, MVT::i8));
}
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ
: X86ISD::PCMPGT;
bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT ||
Cond == ISD::SETGE || Cond == ISD::SETUGE;
bool Invert = Cond == ISD::SETNE ||
(Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
// If both operands are known non-negative, then an unsigned compare is the
// same as a signed compare and there's no need to flip signbits.
// TODO: We could check for more general simplifications here since we're
// computing known bits.
bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) &&
!(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1));
// Special case: Use min/max operations for SETULE/SETUGE
MVT VET = VT.getVectorElementType();
bool HasMinMax =
(Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) ||
(Subtarget.hasSSE2() && (VET == MVT::i8));
bool MinMax = false;
if (HasMinMax) {
switch (Cond) {
default: break;
case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break;
case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break;
}
if (MinMax)
Swap = Invert = FlipSigns = false;
}
bool HasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
bool Subus = false;
if (!MinMax && HasSubus) {
// As another special case, use PSUBUS[BW] when it's profitable. E.g. for
// Op0 u<= Op1:
// t = psubus Op0, Op1
// pcmpeq t, <0..0>
switch (Cond) {
default: break;
case ISD::SETULT: {
// If the comparison is against a constant we can turn this into a
// setule. With psubus, setule does not require a swap. This is
// beneficial because the constant in the register is no longer
// destructed as the destination so it can be hoisted out of a loop.
// Only do this pre-AVX since vpcmp* is no longer destructive.
if (Subtarget.hasAVX())
break;
if (SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG)) {
Op1 = ULEOp1;
Subus = true; Invert = false; Swap = false;
}
break;
}
// Psubus is better than flip-sign because it requires no inversion.
case ISD::SETUGE: Subus = true; Invert = false; Swap = true; break;
case ISD::SETULE: Subus = true; Invert = false; Swap = false; break;
}
if (Subus) {
Opc = X86ISD::SUBUS;
FlipSigns = false;
}
}
if (Swap)
std::swap(Op0, Op1);
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
if (VT == MVT::v2i64) {
if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) {
assert(Subtarget.hasSSE2() && "Don't know how to lower!");
// First cast everything to the right type.
Op0 = DAG.getBitcast(MVT::v4i32, Op0);
Op1 = DAG.getBitcast(MVT::v4i32, Op1);
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations. The lower
// compare is always unsigned.
SDValue SB;
if (FlipSigns) {
SB = DAG.getConstant(0x80000000U, dl, MVT::v4i32);
} else {
SDValue Sign = DAG.getConstant(0x80000000U, dl, MVT::i32);
SDValue Zero = DAG.getConstant(0x00000000U, dl, MVT::i32);
SB = DAG.getBuildVector(MVT::v4i32, dl, {Sign, Zero, Sign, Zero});
}
Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);
// Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
// Create masks for only the low parts/high parts of the 64 bit integers.
static const int MaskHi[] = { 1, 1, 3, 3 };
static const int MaskLo[] = { 0, 0, 2, 2 };
SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
return DAG.getBitcast(VT, Result);
}
if (Opc == X86ISD::PCMPEQ && !Subtarget.hasSSE41()) {
// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
// pcmpeqd + pshufd + pand.
assert(Subtarget.hasSSE2() && !FlipSigns && "Don't know how to lower!");
// First cast everything to the right type.
Op0 = DAG.getBitcast(MVT::v4i32, Op0);
Op1 = DAG.getBitcast(MVT::v4i32, Op1);
// Do the compare.
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
// Make sure the lower and upper halves are both all-ones.
static const int Mask[] = { 1, 0, 3, 2 };
SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
return DAG.getBitcast(VT, Result);
}
}
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
MVT EltVT = VT.getVectorElementType();
SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl,
VT);
Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM);
Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM);
}
SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
// If the logical-not of the result is required, perform that now.
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
if (MinMax)
Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result);
if (Subus)
Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
getZeroVector(VT, Subtarget, DAG, dl));
return Result;
}
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDLoc dl(Op);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// Optimize to BT if possible.
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
// Lower (trunc (X >> N) to i1) to BT(X, N).
if (Op0.hasOneUse() && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) {
if (VT == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC);
return NewSetCC;
}
}
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
// these.
if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// If the input is a setcc, then reuse the input setcc or use a new one with
// the inverted condition.
if (Op0.getOpcode() == X86ISD::SETCC) {
X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
if (!Invert)
return Op0;
CCode = X86::GetOppositeBranchCondition(CCode);
SDValue SetCC = getSETCC(CCode, Op0.getOperand(1), dl, DAG);
if (VT == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
return SetCC;
}
}
if (Op0.getValueType() == MVT::i1 && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
if (isOneConstant(Op1)) {
ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true);
return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, dl, MVT::i1), NewCC);
}
if (!isNullConstant(Op1)) {
SDValue Xor = DAG.getNode(ISD::XOR, dl, MVT::i1, Op0, Op1);
return DAG.getSetCC(dl, VT, Xor, DAG.getConstant(0, dl, MVT::i1), CC);
}
}
bool IsFP = Op1.getSimpleValueType().isFloatingPoint();
X86::CondCode X86CC = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
if (X86CC == X86::COND_INVALID)
return SDValue();
SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG);
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
SDValue SetCC = getSETCC(X86CC, EFLAGS, dl, DAG);
if (VT == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
return SetCC;
}
SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue Carry = Op.getOperand(2);
SDValue Cond = Op.getOperand(3);
SDLoc DL(Op);
assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get());
// Recreate the carry if needed.
EVT CarryVT = Carry.getValueType();
APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
Carry, DAG.getConstant(NegOne, DL, CarryVT));
SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1));
SDValue SetCC = getSETCC(CC, Cmp.getValue(1), DL, DAG);
if (Op.getSimpleValueType() == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
return SetCC;
}
/// Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getOpcode();
if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
Opc == X86ISD::SAHF)
return true;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL ||
Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR ||
Opc == X86ISD::XOR || Opc == X86ISD::AND))
return true;
if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
return true;
return false;
}
static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
if (V.getOpcode() != ISD::TRUNCATE)
return false;
SDValue VOp0 = V.getOperand(0);
unsigned InBits = VOp0.getValueSizeInBits();
unsigned Bits = V.getValueSizeInBits();
return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
}
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool AddTest = true;
SDValue Cond = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
SDLoc DL(Op);
MVT VT = Op1.getSimpleValueType();
SDValue CC;
// Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
// are available or VBLENDV if AVX is available.
// Otherwise FP cmovs get lowered into a less efficient branch sequence later.
if (Cond.getOpcode() == ISD::SETCC &&
((Subtarget.hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
(Subtarget.hasSSE1() && VT == MVT::f32)) &&
VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
int SSECC = translateX86FSETCC(
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
if (SSECC != 8) {
if (Subtarget.hasAVX512()) {
SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0,
CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
return DAG.getNode(VT.isVector() ? X86ISD::SELECT : X86ISD::SELECTS,
DL, VT, Cmp, Op1, Op2);
}
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
DAG.getConstant(SSECC, DL, MVT::i8));
// If we have AVX, we can use a variable vector select (VBLENDV) instead
// of 3 logic instructions for size savings and potentially speed.
// Unfortunately, there is no scalar form of VBLENDV.
// If either operand is a constant, don't try this. We can expect to
// optimize away at least one of the logic instructions later in that
// case, so that sequence would be faster than a variable blend.
// BLENDV was introduced with SSE 4.1, but the 2 register form implicitly
// uses XMM0 as the selection register. That may need just as many
// instructions as the AND/ANDN/OR sequence due to register moves, so
// don't bother.
if (Subtarget.hasAVX() &&
!isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {
// Convert to vectors, do a VSELECT, and convert back to scalar.
// All of the conversions should be optimized away.
MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);
MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
VCmp = DAG.getBitcast(VCmpVT, VCmp);
SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
VSel, DAG.getIntPtrConstant(0, DL));
}
SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);
}
}
// AVX512 fallback is to lower selects of scalar floats to masked moves.
if ((VT == MVT::f64 || VT == MVT::f32) && Subtarget.hasAVX512()) {
SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond);
return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
}
if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
SDValue Op1Scalar;
if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
Op1Scalar = ConvertI1VectorToInteger(Op1, DAG);
else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
Op1Scalar = Op1.getOperand(0);
SDValue Op2Scalar;
if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
Op2Scalar = ConvertI1VectorToInteger(Op2, DAG);
else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
Op2Scalar = Op2.getOperand(0);
if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
SDValue newSelect = DAG.getSelect(DL, Op1Scalar.getValueType(), Cond,
Op1Scalar, Op2Scalar);
if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
return DAG.getBitcast(VT, newSelect);
SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
DAG.getIntPtrConstant(0, DL));
}
}
if (VT == MVT::v4i1 || VT == MVT::v2i1) {
SDValue zeroConst = DAG.getIntPtrConstant(0, DL);
Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
DAG.getUNDEF(MVT::v8i1), Op1, zeroConst);
Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
DAG.getUNDEF(MVT::v8i1), Op2, zeroConst);
SDValue newSelect = DAG.getSelect(DL, MVT::v8i1, Cond, Op1, Op2);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
}
if (Cond.getOpcode() == ISD::SETCC) {
if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
Cond = NewCond;
// If the condition was updated, it's possible that the operands of the
// select were also updated (for example, EmitTest has a RAUW). Refresh
// the local references to the select operands in case they got stale.
Op1 = Op.getOperand(1);
Op2 = Op.getOperand(2);
}
}
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
// (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
// (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
// (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y
// (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y
if (Cond.getOpcode() == X86ISD::SETCC &&
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
isNullConstant(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
unsigned CondCode =
cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
SDValue CmpOp0 = Cmp.getOperand(0);
// Apply further optimizations for special cases
// (select (x != 0), -1, 0) -> neg & sbb
// (select (x == 0), 0, -1) -> neg & sbb
if (isNullConstant(Y) &&
(isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0);
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, DL, MVT::i8),
SDValue(Neg.getNode(), 1));
return Res;
}
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
SDValue Res = // Res = 0 or -1.
DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
Res = DAG.getNOT(DL, Res, Res.getValueType());
if (!isNullConstant(Op2))
Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
return Res;
} else if (!Subtarget.hasCMov() && CondCode == X86::COND_E &&
Cmp.getOperand(0).getOpcode() == ISD::AND &&
isOneConstant(Cmp.getOperand(0).getOperand(1))) {
SDValue CmpOp0 = Cmp.getOperand(0);
SDValue Src1, Src2;
// true if Op2 is XOR or OR operator and one of its operands
// is equal to Op1
// ( a , a op b) || ( b , a op b)
auto isOrXorPattern = [&]() {
if ((Op2.getOpcode() == ISD::XOR || Op2.getOpcode() == ISD::OR) &&
(Op2.getOperand(0) == Op1 || Op2.getOperand(1) == Op1)) {
Src1 =
Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0);
Src2 = Op1;
return true;
}
return false;
};
if (isOrXorPattern()) {
SDValue Neg;
unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits();
// we need mask of all zeros or ones with same size of the other
// operands.
if (CmpSz > VT.getSizeInBits())
Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0);
else if (CmpSz < VT.getSizeInBits())
Neg = DAG.getNode(ISD::AND, DL, VT,
DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)),
DAG.getConstant(1, DL, VT));
else
Neg = CmpOp0;
SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Neg); // -(and (x, 0x1))
SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y
}
}
}
// Look past (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
isOneConstant(Cond.getOperand(1)))
Cond = Cond.getOperand(0);
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
unsigned CondOpcode = Cond.getOpcode();
if (CondOpcode == X86ISD::SETCC ||
CondOpcode == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
MVT VT = Op.getSimpleValueType();
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
!isScalarFPTypeInSSEReg(VT)) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
Opc == X86ISD::BT) { // FIXME
Cond = Cmp;
AddTest = false;
}
} else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
Cond.getOperand(0).getValueType() != MVT::i8)) {
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
unsigned X86Opcode;
unsigned X86Cond;
SDVTList VTs;
switch (CondOpcode) {
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
default: llvm_unreachable("unexpected overflowing operator");
}
if (CondOpcode == ISD::UMULO)
VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
MVT::i32);
else
VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);
if (CondOpcode == ISD::UMULO)
Cond = X86Op.getValue(2);
else
Cond = X86Op.getValue(1);
CC = DAG.getConstant(X86Cond, DL, MVT::i8);
AddTest = false;
}
if (AddTest) {
// Look past the truncate if the high bits are known zero.
if (isTruncWithZeroHighBitsInput(Cond, DAG))
Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
AddTest = false;
}
}
}
if (AddTest) {
CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8);
Cond = EmitTest(Cond, X86::COND_NE, DL, DAG);
}
// a < b ? -1 : 0 -> RES = ~setcc_carry
// a < b ? 0 : -1 -> RES = setcc_carry
// a >= b ? -1 : 0 -> RES = setcc_carry
// a >= b ? 0 : -1 -> RES = ~setcc_carry
if (Cond.getOpcode() == X86ISD::SUB) {
Cond = ConvertCmpIfNecessary(Cond, DAG);
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
(isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(isNullConstant(Op1) || isNullConstant(Op2))) {
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, DL, MVT::i8),
Cond);
if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B))
return DAG.getNOT(DL, Res, Res.getValueType());
return Res;
}
}
// X86 doesn't have an i8 cmov. If both operands are the result of a truncate
// widen the cmov and push the truncate through. This avoids introducing a new
// branch during isel and doesn't add any extensions.
if (Op.getValueType() == MVT::i8 &&
Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
if (T1.getValueType() == T2.getValueType() &&
// Blacklist CopyFromReg to avoid partial register stalls.
T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond);
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
}
}
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = { Op2, Op1, CC, Cond };
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
}
static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
MVT VTElt = VT.getVectorElementType();
MVT InVTElt = InVT.getVectorElementType();
SDLoc dl(Op);
// SKX processor
if ((InVTElt == MVT::i1) &&
(((Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16)) ||
((Subtarget.hasDQI() && VTElt.getSizeInBits() >= 32))))
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
unsigned NumElts = VT.getVectorNumElements();
if (VT.is512BitVector() && InVTElt != MVT::i1 &&
(NumElts == 8 || NumElts == 16 || Subtarget.hasBWI())) {
if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
return getExtendInVec(In.getOpcode(), dl, VT, In.getOperand(0), DAG);
return getExtendInVec(X86ISD::VSEXT, dl, VT, In, DAG);
}
if (InVTElt != MVT::i1)
return SDValue();
MVT ExtVT = VT;
if (!VT.is512BitVector() && !Subtarget.hasVLX())
ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
SDValue V;
if (Subtarget.hasDQI()) {
V = getExtendInVec(X86ISD::VSEXT, dl, ExtVT, In, DAG);
assert(!VT.is512BitVector() && "Unexpected vector type");
} else {
SDValue NegOne = getOnesVector(ExtVT, DAG, dl);
SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);
V = DAG.getSelect(dl, ExtVT, In, NegOne, Zero);
if (ExtVT == VT)
return V;
}
return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);
}
// Lowering for SIGN_EXTEND_VECTOR_INREG and ZERO_EXTEND_VECTOR_INREG.
// For sign extend this needs to handle all vector sizes and SSE4.1 and
// non-SSE4.1 targets. For zero extend this should only handle inputs of
// MVT::v64i8 when BWI is not supported, but AVX512 is.
static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue In = Op->getOperand(0);
MVT VT = Op->getSimpleValueType(0);
MVT InVT = In.getSimpleValueType();
assert(VT.getSizeInBits() == InVT.getSizeInBits());
MVT SVT = VT.getVectorElementType();
MVT InSVT = InVT.getVectorElementType();
assert(SVT.getSizeInBits() > InSVT.getSizeInBits());
if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
return SDValue();
if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
return SDValue();
if (!(VT.is128BitVector() && Subtarget.hasSSE2()) &&
!(VT.is256BitVector() && Subtarget.hasInt256()) &&
!(VT.is512BitVector() && Subtarget.hasAVX512()))
return SDValue();
SDLoc dl(Op);
// For 256-bit vectors, we only need the lower (128-bit) half of the input.
// For 512-bit vectors, we need 128-bits or 256-bits.
if (VT.getSizeInBits() > 128) {
// Input needs to be at least the same number of elements as output, and
// at least 128-bits.
int InSize = InSVT.getSizeInBits() * VT.getVectorNumElements();
In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128));
}
assert((Op.getOpcode() != ISD::ZERO_EXTEND_VECTOR_INREG ||
InVT == MVT::v64i8) && "Zero extend only for v64i8 input!");
// SSE41 targets can use the pmovsx* instructions directly for 128-bit results,
// so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
// need to be handled here for 256/512-bit results.
if (Subtarget.hasInt256()) {
assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
X86ISD::VSEXT : X86ISD::VZEXT;
return DAG.getNode(ExtOpc, dl, VT, In);
}
// We should only get here for sign extend.
assert(Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
"Unexpected opcode!");
// pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
SDValue Curr = In;
MVT CurrVT = InVT;
// As SRAI is only available on i16/i32 types, we expand only up to i32
// and handle i64 separately.
while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) {
Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
Curr = DAG.getBitcast(CurrVT, Curr);
}
SDValue SignExt = Curr;
if (CurrVT != InVT) {
unsigned SignExtShift =
CurrVT.getScalarSizeInBits() - InSVT.getSizeInBits();
SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
DAG.getConstant(SignExtShift, dl, MVT::i8));
}
if (CurrVT == VT)
return SignExt;
if (VT == MVT::v2i64 && CurrVT == MVT::v4i32) {
SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
DAG.getConstant(31, dl, MVT::i8));
SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
return DAG.getBitcast(VT, Ext);
}
return SDValue();
}
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
return LowerSIGN_EXTEND_AVX512(Op, Subtarget, DAG);
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
(VT != MVT::v8i32 || InVT != MVT::v8i16) &&
(VT != MVT::v16i16 || InVT != MVT::v16i8))
return SDValue();
if (Subtarget.hasInt256())
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
// Optimize vectors in AVX mode
// Sign extend v8i16 to v8i32 and
// v4i32 to v4i64
//
// Divide input vector into two parts
// for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
// concat the vectors to original VT
unsigned NumElems = InVT.getVectorNumElements();
SDValue Undef = DAG.getUNDEF(InVT);
SmallVector<int,8> ShufMask1(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask1[i] = i;
SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask1);
SmallVector<int,8> ShufMask2(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask2[i] = i + NumElems/2;
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask2);
MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements() / 2);
OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT);
OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
// Lower truncating store. We need a special lowering to vXi1 vectors
static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
StoreSDNode *St = cast<StoreSDNode>(StOp.getNode());
SDLoc dl(St);
EVT MemVT = St->getMemoryVT();
assert(St->isTruncatingStore() && "We only custom truncating store.");
assert(MemVT.isVector() && MemVT.getVectorElementType() == MVT::i1 &&
"Expected truncstore of i1 vector");
SDValue Op = St->getValue();
MVT OpVT = Op.getValueType().getSimpleVT();
unsigned NumElts = OpVT.getVectorNumElements();
if ((Subtarget.hasVLX() && Subtarget.hasBWI() && Subtarget.hasDQI()) ||
NumElts == 16) {
// Truncate and store - everything is legal
Op = DAG.getNode(ISD::TRUNCATE, dl, MemVT, Op);
if (MemVT.getSizeInBits() < 8)
Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
DAG.getUNDEF(MVT::v8i1), Op,
DAG.getIntPtrConstant(0, dl));
return DAG.getStore(St->getChain(), dl, Op, St->getBasePtr(),
St->getMemOperand());
}
// A subset, assume that we have only AVX-512F
if (NumElts <= 8) {
if (NumElts < 8) {
// Extend to 8-elts vector
MVT ExtVT = MVT::getVectorVT(OpVT.getScalarType(), 8);
Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ExtVT,
DAG.getUNDEF(ExtVT), Op, DAG.getIntPtrConstant(0, dl));
}
Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i1, Op);
return DAG.getStore(St->getChain(), dl, Op, St->getBasePtr(),
St->getMemOperand());
}
// v32i8
assert(OpVT == MVT::v32i8 && "Unexpected operand type");
// Divide the vector into 2 parts and store each part separately
SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, Op,
DAG.getIntPtrConstant(0, dl));
Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::v16i1, Lo);
SDValue BasePtr = St->getBasePtr();
SDValue StLo = DAG.getStore(St->getChain(), dl, Lo, BasePtr,
St->getMemOperand());
SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, Op,
DAG.getIntPtrConstant(16, dl));
Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::v16i1, Hi);
SDValue BasePtrHi =
DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getConstant(2, dl, BasePtr.getValueType()));
SDValue StHi = DAG.getStore(St->getChain(), dl, Hi,
BasePtrHi, St->getMemOperand());
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StLo, StHi);
}
static SDValue LowerExtended1BitVectorLoad(SDValue Op,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
EVT MemVT = Ld->getMemoryVT();
assert(MemVT.isVector() && MemVT.getScalarType() == MVT::i1 &&
"Expected i1 vector load");
unsigned ExtOpcode = Ld->getExtensionType() == ISD::ZEXTLOAD ?
ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
MVT VT = Op.getValueType().getSimpleVT();
unsigned NumElts = VT.getVectorNumElements();
if ((Subtarget.hasBWI() && NumElts >= 32) ||
(Subtarget.hasDQI() && NumElts < 16) ||
NumElts == 16) {
// Load and extend - everything is legal
if (NumElts < 8) {
SDValue Load = DAG.getLoad(MVT::v8i1, dl, Ld->getChain(),
Ld->getBasePtr(),
Ld->getMemOperand());
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
DAG.getIntPtrConstant(0, dl));
}
SDValue Load = DAG.getLoad(MemVT, dl, Ld->getChain(),
Ld->getBasePtr(),
Ld->getMemOperand());
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
// Finally, do a normal sign-extend to the desired register.
return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Load);
}
if (NumElts <= 8) {
// A subset, assume that we have only AVX-512F
unsigned NumBitsToLoad = 8;
MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
Ld->getBasePtr(),
Ld->getMemOperand());
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad);
SDValue BitVec = DAG.getBitcast(MaskVT, Load);
if (NumElts == 8)
return DAG.getNode(ExtOpcode, dl, VT, BitVec);
// we should take care to v4i1 and v2i1
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
DAG.getIntPtrConstant(0, dl));
}
assert(VT == MVT::v32i8 && "Unexpected extload type");
SmallVector<SDValue, 2> Chains;
SDValue BasePtr = Ld->getBasePtr();
SDValue LoadLo = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(),
Ld->getBasePtr(),
Ld->getMemOperand());
Chains.push_back(LoadLo.getValue(1));
SDValue BasePtrHi =
DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getConstant(2, dl, BasePtr.getValueType()));
SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(),
BasePtrHi,
Ld->getMemOperand());
Chains.push_back(LoadHi.getValue(1));
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
SDValue Lo = DAG.getNode(ExtOpcode, dl, MVT::v16i8, LoadLo);
SDValue Hi = DAG.getNode(ExtOpcode, dl, MVT::v16i8, LoadHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v32i8, Lo, Hi);
}
// Lower vector extended loads using a shuffle. If SSSE3 is not available we
// may emit an illegal shuffle but the expansion is still better than scalar
// code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise
// we'll emit a shuffle and a arithmetic shift.
// FIXME: Is the expansion actually better than scalar code? It doesn't seem so.
// TODO: It is possible to support ZExt by zeroing the undef values during
// the shuffle phase or after the shuffle.
static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT RegVT = Op.getSimpleValueType();
assert(RegVT.isVector() && "We only custom lower vector sext loads.");
assert(RegVT.isInteger() &&
"We only custom lower integer vector sext loads.");
// Nothing useful we can do without SSE2 shuffles.
assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2.");
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
EVT MemVT = Ld->getMemoryVT();
if (MemVT.getScalarType() == MVT::i1)
return LowerExtended1BitVectorLoad(Op, Subtarget, DAG);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned RegSz = RegVT.getSizeInBits();
ISD::LoadExtType Ext = Ld->getExtensionType();
assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)
&& "Only anyext and sext are currently implemented.");
assert(MemVT != RegVT && "Cannot extend to the same type");
assert(MemVT.isVector() && "Must load a vector from memory");
unsigned NumElems = RegVT.getVectorNumElements();
unsigned MemSz = MemVT.getSizeInBits();
assert(RegSz > MemSz && "Register size must be greater than the mem size");
if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) {
// The only way in which we have a legal 256-bit vector result but not the
// integer 256-bit operations needed to directly lower a sextload is if we
// have AVX1 but not AVX2. In that case, we can always emit a sextload to
// a 128-bit vector and a normal sign_extend to 256-bits that should get
// correctly legalized. We do this late to allow the canonical form of
// sextload to persist throughout the rest of the DAG combiner -- it wants
// to fold together any extensions it can, and so will fuse a sign_extend
// of an sextload into a sextload targeting a wider value.
SDValue Load;
if (MemSz == 128) {
// Just switch this to a normal load.
assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, "
"it must be a legal 128-bit vector "
"type!");
Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
} else {
assert(MemSz < 128 &&
"Can't extend a type wider than 128 bits to a 256 bit vector!");
// Do an sext load to a 128-bit vector type. We want to use the same
// number of elements, but elements half as wide. This will end up being
// recursively lowered by this routine, but will succeed as we definitely
// have all the necessary features if we're using AVX1.
EVT HalfEltVT =
EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2);
EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems);
Load =
DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), MemVT, Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
}
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
// Finally, do a normal sign-extend to the desired register.
return DAG.getSExtOrTrunc(Load, dl, RegVT);
}
// All sizes must be a power of two.
assert(isPowerOf2_32(RegSz * MemSz * NumElems) &&
"Non-power-of-two elements are not custom lowered!");
// Attempt to load the original value using scalar loads.
// Find the largest scalar type that divides the total loaded size.
MVT SclrLoadTy = MVT::i8;
for (MVT Tp : MVT::integer_valuetypes()) {
if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
SclrLoadTy = Tp;
}
}
// On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
(64 <= MemSz))
SclrLoadTy = MVT::f64;
// Calculate the number of scalar loads that we need to perform
// in order to load our vector from memory.
unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
assert((Ext != ISD::SEXTLOAD || NumLoads == 1) &&
"Can only lower sext loads with a single scalar load!");
unsigned loadRegZize = RegSz;
if (Ext == ISD::SEXTLOAD && RegSz >= 256)
loadRegZize = 128;
// Represent our vector as a sequence of elements which are the
// largest scalar that we can load.
EVT LoadUnitVecVT = EVT::getVectorVT(
*DAG.getContext(), SclrLoadTy, loadRegZize / SclrLoadTy.getSizeInBits());
// Represent the data using the same element type that is stored in
// memory. In practice, we ''widen'' MemVT.
EVT WideVecVT =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
loadRegZize / MemVT.getScalarSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
"Invalid vector type");
// We can't shuffle using an illegal type.
assert(TLI.isTypeLegal(WideVecVT) &&
"We only lower types that form legal widened vector types");
SmallVector<SDValue, 8> Chains;
SDValue Ptr = Ld->getBasePtr();
SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl,
TLI.getPointerTy(DAG.getDataLayout()));
SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
for (unsigned i = 0; i < NumLoads; ++i) {
// Perform a single load.
SDValue ScalarLoad =
DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
Ld->getAlignment(), Ld->getMemOperand()->getFlags());
Chains.push_back(ScalarLoad.getValue(1));
// Create the first element type using SCALAR_TO_VECTOR in order to avoid
// another round of DAGCombining.
if (i == 0)
Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
else
Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
ScalarLoad, DAG.getIntPtrConstant(i, dl));
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
// Bitcast the loaded value to a vector of the original element type, in
// the size of the target vector type.
SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res);
unsigned SizeRatio = RegSz / MemSz;
if (Ext == ISD::SEXTLOAD) {
// If we have SSE4.1, we can directly emit a VSEXT node.
if (Subtarget.hasSSE41()) {
SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, RegVT, SlicedVec, DAG);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Sext;
}
// Otherwise we'll use SIGN_EXTEND_VECTOR_INREG to sign extend the lowest
// lanes.
assert(TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND_VECTOR_INREG, RegVT) &&
"We can't implement a sext load without SIGN_EXTEND_VECTOR_INREG!");
SDValue Shuff = DAG.getSignExtendVectorInReg(SlicedVec, dl, RegVT);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Shuff;
}
// Redistribute the loaded elements into the different locations.
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i * SizeRatio] = i;
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
DAG.getUNDEF(WideVecVT), ShuffleVec);
// Bitcast to the requested type.
Shuff = DAG.getBitcast(RegVT, Shuff);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Shuff;
}
/// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes
/// each of which has no other use apart from the AND / OR.
static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
Opc = Op.getOpcode();
if (Opc != ISD::OR && Opc != ISD::AND)
return false;
return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
Op.getOperand(0).hasOneUse() &&
Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
Op.getOperand(1).hasOneUse());
}
/// Return true if node is an ISD::XOR of a X86ISD::SETCC and 1 and that the
/// SETCC node has a single use.
static bool isXor1OfSetCC(SDValue Op) {
if (Op.getOpcode() != ISD::XOR)
return false;
if (isOneConstant(Op.getOperand(1)))
return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
Op.getOperand(0).hasOneUse();
return false;
}
SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
SDValue Dest = Op.getOperand(2);
SDLoc dl(Op);
SDValue CC;
bool Inverted = false;
if (Cond.getOpcode() == ISD::SETCC) {
// Check for setcc([su]{add,sub,mul}o == 0).
if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
isNullConstant(Cond.getOperand(1)) &&
Cond.getOperand(0).getResNo() == 1 &&
(Cond.getOperand(0).getOpcode() == ISD::SADDO ||
Cond.getOperand(0).getOpcode() == ISD::UADDO ||
Cond.getOperand(0).getOpcode() == ISD::SSUBO ||
Cond.getOperand(0).getOpcode() == ISD::USUBO ||
Cond.getOperand(0).getOpcode() == ISD::SMULO ||
Cond.getOperand(0).getOpcode() == ISD::UMULO)) {
Inverted = true;
Cond = Cond.getOperand(0);
} else {
if (SDValue NewCond = LowerSETCC(Cond, DAG))
Cond = NewCond;
}
}
#if 0
// FIXME: LowerXALUO doesn't handle these!!
else if (Cond.getOpcode() == X86ISD::ADD ||
Cond.getOpcode() == X86ISD::SUB ||
Cond.getOpcode() == X86ISD::SMUL ||
Cond.getOpcode() == X86ISD::UMUL)
Cond = LowerXALUO(Cond, DAG);
#endif
// Look pass (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
isOneConstant(Cond.getOperand(1)))
Cond = Cond.getOperand(0);
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
unsigned CondOpcode = Cond.getOpcode();
if (CondOpcode == X86ISD::SETCC ||
CondOpcode == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
// FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
Cond = Cmp;
addTest = false;
} else {
switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
default: break;
case X86::COND_O:
case X86::COND_B:
// These can only come from an arithmetic instruction with overflow,
// e.g. SADDO, UADDO.
Cond = Cond.getOperand(1);
addTest = false;
break;
}
}
}
CondOpcode = Cond.getOpcode();
if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
Cond.getOperand(0).getValueType() != MVT::i8)) {
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
unsigned X86Opcode;
unsigned X86Cond;
SDVTList VTs;
// Keep this in sync with LowerXALUO, otherwise we might create redundant
// instructions that can't be removed afterwards (i.e. X86ISD::ADD and
// X86ISD::INC).
switch (CondOpcode) {
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
case ISD::SADDO:
if (isOneConstant(RHS)) {
X86Opcode = X86ISD::INC; X86Cond = X86::COND_O;
break;
}
X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
case ISD::SSUBO:
if (isOneConstant(RHS)) {
X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O;
break;
}
X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
default: llvm_unreachable("unexpected overflowing operator");
}
if (Inverted)
X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
if (CondOpcode == ISD::UMULO)
VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
MVT::i32);
else
VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);
if (CondOpcode == ISD::UMULO)
Cond = X86Op.getValue(2);
else
Cond = X86Op.getValue(1);
CC = DAG.getConstant(X86Cond, dl, MVT::i8);
addTest = false;
} else {
unsigned CondOpc;
if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
SDValue Cmp = Cond.getOperand(0).getOperand(1);
if (CondOpc == ISD::OR) {
// Also, recognize the pattern generated by an FCMP_UNE. We can emit
// two branches instead of an explicit OR instruction with a
// separate test.
if (Cmp == Cond.getOperand(1).getOperand(1) &&
isX86LogicalCmp(Cmp)) {
CC = Cond.getOperand(0).getOperand(0);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
CC = Cond.getOperand(1).getOperand(0);
Cond = Cmp;
addTest = false;
}
} else { // ISD::AND
// Also, recognize the pattern generated by an FCMP_OEQ. We can emit
// two branches instead of an explicit AND instruction with a
// separate test. However, we only do this if this block doesn't
// have a fall-through edge, because this requires an explicit
// jmp when the condition is false.
if (Cmp == Cond.getOperand(1).getOperand(1) &&
isX86LogicalCmp(Cmp) &&
Op.getNode()->hasOneUse()) {
X86::CondCode CCode =
(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, dl, MVT::i8);
SDNode *User = *Op.getNode()->use_begin();
// Look for an unconditional branch following this conditional branch.
// We need this because we need to reverse the successors in order
// to implement FCMP_OEQ.
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
Dest = FalseBB;
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
X86::CondCode CCode =
(X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, dl, MVT::i8);
Cond = Cmp;
addTest = false;
}
}
}
} else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
// Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
// It should be transformed during dag combiner except when the condition
// is set by a arithmetics with overflow node.
X86::CondCode CCode =
(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, dl, MVT::i8);
Cond = Cond.getOperand(0).getOperand(1);
addTest = false;
} else if (Cond.getOpcode() == ISD::SETCC &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETOEQ) {
// For FCMP_OEQ, we can emit
// two branches instead of an explicit AND instruction with a
// separate test. However, we only do this if this block doesn't
// have a fall-through edge, because this requires an explicit
// jmp when the condition is false.
if (Op.getNode()->hasOneUse()) {
SDNode *User = *Op.getNode()->use_begin();
// Look for an unconditional branch following this conditional branch.
// We need this because we need to reverse the successors in order
// to implement FCMP_OEQ.
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
Dest = FalseBB;
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
CC = DAG.getConstant(X86::COND_P, dl, MVT::i8);
Cond = Cmp;
addTest = false;
}
}
} else if (Cond.getOpcode() == ISD::SETCC &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
// For FCMP_UNE, we can emit
// two branches instead of an explicit AND instruction with a
// separate test. However, we only do this if this block doesn't
// have a fall-through edge, because this requires an explicit
// jmp when the condition is false.
if (Op.getNode()->hasOneUse()) {
SDNode *User = *Op.getNode()->use_begin();
// Look for an unconditional branch following this conditional branch.
// We need this because we need to reverse the successors in order
// to implement FCMP_UNE.
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
CC = DAG.getConstant(X86::COND_NP, dl, MVT::i8);
Cond = Cmp;
addTest = false;
Dest = FalseBB;
}
}
}
}
if (addTest) {
// Look pass the truncate if the high bits are known zero.
if (isTruncWithZeroHighBitsInput(Cond, DAG))
Cond = Cond.getOperand(0);
// We know the result is compared against zero. Try to match it to BT.
if (Cond.hasOneUse()) {
if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
addTest = false;
}
}
}
if (addTest) {
X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
CC = DAG.getConstant(X86Cond, dl, MVT::i8);
Cond = EmitTest(Cond, X86Cond, dl, DAG);
}
Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cond);
}
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca are needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
// that the guard pages used by the OS virtual memory manager are allocated in
// correct sequence.
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
SplitStack || EmitStackProbe;
SDLoc dl(Op);
// Get the inputs.
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
EVT VT = Node->getValueType(0);
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
bool Is64Bit = Subtarget.is64Bit();
MVT SPTy = getPointerTy(DAG.getDataLayout());
SDValue Result;
if (!Lower) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
Result = DAG.getNode(ISD::AND, dl, VT, Result,
DAG.getConstant(-(uint64_t)Align, dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
} else if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
if (Is64Bit) {
// The 64 bit implementation of segmented stacks needs to clobber both r10
// r11. This makes it impossible to use it along with nested parameters.
const Function *F = MF.getFunction();
for (const auto &A : F->args()) {
if (A.hasNestAttr())
report_fatal_error("Cannot use segmented stacks with functions that "
"have nested arguments.");
}
}
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
DAG.getRegister(Vreg, SPTy));
} else {
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size);
MF.getInfo<X86MachineFunctionInfo>()->setHasWinAlloca(true);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned SPReg = RegInfo->getStackRegister();
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
Chain = SP.getValue(1);
if (Align) {
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
DAG.getConstant(-(uint64_t)Align, dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
}
Result = SP;
}
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
SDValue Ops[2] = {Result, Chain};
return DAG.getMergeValues(Ops, dl);
}
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
auto PtrVT = getPointerTy(MF.getDataLayout());
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SDLoc DL(Op);
if (!Subtarget.is64Bit() ||
Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv())) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
}
// __va_list_tag:
// gp_offset (0 - 6 * 8)
// fp_offset (48 - 48 + 8 * 16)
// overflow_arg_area (point to parameters coming in memory).
// reg_save_area
SmallVector<SDValue, 8> MemOps;
SDValue FIN = Op.getOperand(1);
// Store gp_offset
SDValue Store = DAG.getStore(
Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsGPOffset(), DL, MVT::i32), FIN,
MachinePointerInfo(SV));
MemOps.push_back(Store);
// Store fp_offset
FIN = DAG.getMemBasePlusOffset(FIN, 4, DL);
Store = DAG.getStore(
Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN,
MachinePointerInfo(SV, 4));
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL));
SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
Store =
DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, MachinePointerInfo(SV, 8));
MemOps.push_back(Store);
// Store ptr to reg_save_area.
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
Store = DAG.getStore(
Op.getOperand(0), DL, RSFIN, FIN,
MachinePointerInfo(SV, Subtarget.isTarget64BitLP64() ? 16 : 12));
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget.is64Bit() &&
"LowerVAARG only handles 64-bit va_arg!");
assert(Op.getNumOperands() == 4);
MachineFunction &MF = DAG.getMachineFunction();
if (Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()))
// The Win64 ABI uses char* instead of a structure.
return DAG.expandVAArg(Op.getNode());
SDValue Chain = Op.getOperand(0);
SDValue SrcPtr = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
unsigned Align = Op.getConstantOperandVal(3);
SDLoc dl(Op);
EVT ArgVT = Op.getNode()->getValueType(0);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
uint8_t ArgMode;
// Decide which area this value should be read from.
// TODO: Implement the AMD64 ABI in its entirety. This simple
// selection mechanism works only for the basic types.
if (ArgVT == MVT::f80) {
llvm_unreachable("va_arg for f80 not yet implemented");
} else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
} else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
} else {
llvm_unreachable("Unhandled argument type in LowerVAARG");
}
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
assert(!Subtarget.useSoftFloat() &&
!(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) &&
Subtarget.hasSSE1());
}
// Insert VAARG_64 node into the DAG
// VAARG_64 returns two values: Variable Argument Address, Chain
SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32),
DAG.getConstant(ArgMode, dl, MVT::i8),
DAG.getConstant(Align, dl, MVT::i32)};
SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other);
SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
VTs, InstOps, MVT::i64,
MachinePointerInfo(SV),
/*Align=*/0,
/*Volatile=*/false,
/*ReadMem=*/true,
/*WriteMem=*/true);
Chain = VAARG.getValue(1);
// Load the next argument and return it
return DAG.getLoad(ArgVT, dl, Chain, VAARG, MachinePointerInfo());
}
static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows,
// where a va_list is still an i8*.
assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!");
if (Subtarget.isCallingConvWin64(
DAG.getMachineFunction().getFunction()->getCallingConv()))
// Probably a Win64 va_copy.
return DAG.expandVACopy(Op.getNode());
SDValue Chain = Op.getOperand(0);
SDValue DstPtr = Op.getOperand(1);
SDValue SrcPtr = Op.getOperand(2);
const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
SDLoc DL(Op);
return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
DAG.getIntPtrConstant(24, DL), 8, /*isVolatile*/false,
false, false,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
/// Handle vector element shifts where the shift amount is a constant.
/// Takes immediate version of shift as input.
static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
SDValue SrcOp, uint64_t ShiftAmt,
SelectionDAG &DAG) {
MVT ElementType = VT.getVectorElementType();
// Bitcast the source vector to the output type, this is mainly necessary for
// vXi8/vXi64 shifts.
if (VT != SrcOp.getSimpleValueType())
SrcOp = DAG.getBitcast(VT, SrcOp);
// Fold this packed shift into its first operand if ShiftAmt is 0.
if (ShiftAmt == 0)
return SrcOp;
// Check for ShiftAmt >= element width
if (ShiftAmt >= ElementType.getSizeInBits()) {
if (Opc == X86ISD::VSRAI)
ShiftAmt = ElementType.getSizeInBits() - 1;
else
return DAG.getConstant(0, dl, VT);
}
assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
&& "Unknown target vector shift-by-constant node");
// Fold this packed vector shift into a build vector if SrcOp is a
// vector of Constants or UNDEFs.
if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
SmallVector<SDValue, 8> Elts;
unsigned NumElts = SrcOp->getNumOperands();
ConstantSDNode *ND;
switch(Opc) {
default: llvm_unreachable("Unknown opcode!");
case X86ISD::VSHLI:
for (unsigned i=0; i!=NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->isUndef()) {
Elts.push_back(CurrentOp);
continue;
}
ND = cast<ConstantSDNode>(CurrentOp);
const APInt &C = ND->getAPIntValue();
Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType));
}
break;
case X86ISD::VSRLI:
for (unsigned i=0; i!=NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->isUndef()) {
Elts.push_back(CurrentOp);
continue;
}
ND = cast<ConstantSDNode>(CurrentOp);
const APInt &C = ND->getAPIntValue();
Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType));
}
break;
case X86ISD::VSRAI:
for (unsigned i=0; i!=NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->isUndef()) {
Elts.push_back(CurrentOp);
continue;
}
ND = cast<ConstantSDNode>(CurrentOp);
const APInt &C = ND->getAPIntValue();
Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType));
}
break;
}
return DAG.getBuildVector(VT, dl, Elts);
}
return DAG.getNode(Opc, dl, VT, SrcOp,
DAG.getConstant(ShiftAmt, dl, MVT::i8));
}
/// Handle vector element shifts where the shift amount may or may not be a
/// constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
SDValue SrcOp, SDValue ShAmt,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT SVT = ShAmt.getSimpleValueType();
assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
// Catch shift-by-constant.
if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
CShAmt->getZExtValue(), DAG);
// Change opcode to non-immediate version
switch (Opc) {
default: llvm_unreachable("Unknown target vector shift node");
case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
}
// Need to build a vector containing shift amount.
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
// +=================+============+=======================================+
// | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
// +=================+============+=======================================+
// | i64 | Yes, No | Use ShAmt as lowest elt |
// | i32 | Yes | zero-extend in-reg |
// | (i32 zext(i16)) | Yes | zero-extend in-reg |
// | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
// +=================+============+=======================================+
if (SVT == MVT::i64)
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
ShAmt = ShAmt.getOperand(0);
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt);
ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
} else if (Subtarget.hasSSE41() &&
ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
} else {
SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT),
DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
}
// The return type has to be a 128-bit type with the same element
// type as the input type.
MVT EltVT = VT.getVectorElementType();
MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
ShAmt = DAG.getBitcast(ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
/// \brief Return Mask with the necessary casting or extending
/// for \p Mask according to \p MaskVT when lowering masking intrinsics
static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
const SDLoc &dl) {
if (isAllOnesConstant(Mask))
return DAG.getTargetConstant(1, dl, MaskVT);
if (X86::isZeroNode(Mask))
return DAG.getTargetConstant(0, dl, MaskVT);
if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
// Mask should be extended
Mask = DAG.getNode(ISD::ANY_EXTEND, dl,
MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask);
}
if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) {
if (MaskVT == MVT::v64i1) {
assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
// In case 32bit mode, bitcast i64 is illegal, extend/split it.
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
DAG.getConstant(0, dl, MVT::i32));
Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
DAG.getConstant(1, dl, MVT::i32));
Lo = DAG.getBitcast(MVT::v32i1, Lo);
Hi = DAG.getBitcast(MVT::v32i1, Hi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
} else {
// MaskVT require < 64bit. Truncate mask (should succeed in any case),
// and bitcast.
MVT TruncVT = MVT::getIntegerVT(MaskVT.getSizeInBits());
return DAG.getBitcast(MaskVT,
DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Mask));
}
} else {
MVT BitcastVT = MVT::getVectorVT(MVT::i1,
Mask.getSimpleValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
}
}
/// \brief Return (and \p Op, \p Mask) for compare instructions or
/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
/// necessary casting or extending for \p Mask when lowering masking intrinsics
static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
unsigned OpcodeSelect = ISD::VSELECT;
SDLoc dl(Op);
if (isAllOnesConstant(Mask))
return Op;
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
switch (Op.getOpcode()) {
default: break;
case X86ISD::PCMPEQM:
case X86ISD::PCMPGTM:
case X86ISD::CMPM:
case X86ISD::CMPMU:
return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
case X86ISD::VFPCLASS:
case X86ISD::VFPCLASSS:
return DAG.getNode(ISD::OR, dl, VT, Op, VMask);
case X86ISD::VTRUNC:
case X86ISD::VTRUNCS:
case X86ISD::VTRUNCUS:
case X86ISD::CVTPS2PH:
// We can't use ISD::VSELECT here because it is not always "Legal"
// for the destination type. For example vpmovqb require only AVX512
// and vselect that can operate on byte element type require BWI
OpcodeSelect = X86ISD::SELECT;
break;
}
if (PreservedSrc.isUndef())
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
}
/// \brief Creates an SDNode for a predicated scalar operation.
/// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc).
/// The mask is coming as MVT::i8 and it should be transformed
/// to MVT::v1i1 while lowering masking intrinsics.
/// The main difference between ScalarMaskingNode and VectorMaskingNode is using
/// "X86select" instead of "vselect". We just can't create the "vselect" node
/// for a scalar instruction.
static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
if (auto *MaskConst = dyn_cast<ConstantSDNode>(Mask))
if (MaskConst->getZExtValue() & 0x1)
return Op;
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask);
if (Op.getOpcode() == X86ISD::FSETCCM ||
Op.getOpcode() == X86ISD::FSETCCM_RND)
return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
if (Op.getOpcode() == X86ISD::VFPCLASSS)
return DAG.getNode(ISD::OR, dl, VT, Op, IMask);
if (PreservedSrc.isUndef())
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc);
}
static int getSEHRegistrationNodeSize(const Function *Fn) {
if (!Fn->hasPersonalityFn())
report_fatal_error(
"querying registration node size for function without personality");
// The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
// WinEHStatePass for the full struct definition.
switch (classifyEHPersonality(Fn->getPersonalityFn())) {
case EHPersonality::MSVC_X86SEH: return 24;
case EHPersonality::MSVC_CXX: return 16;
default: break;
}
report_fatal_error(
"can only recover FP for 32-bit MSVC EH personality functions");
}
/// When the MSVC runtime transfers control to us, either to an outlined
/// function or when returning to a parent frame after catching an exception, we
/// recover the parent frame pointer by doing arithmetic on the incoming EBP.
/// Here's the math:
/// RegNodeBase = EntryEBP - RegNodeSize
/// ParentFP = RegNodeBase - ParentFrameOffset
/// Subtracting RegNodeSize takes us to the offset of the registration node, and
/// subtracting the offset (negative on x86) takes us back to the parent FP.
static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
SDValue EntryEBP) {
MachineFunction &MF = DAG.getMachineFunction();
SDLoc dl;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
// It's possible that the parent function no longer has a personality function
// if the exceptional code was optimized away, in which case we just return
// the incoming EBP.
if (!Fn->hasPersonalityFn())
return EntryEBP;
// Get an MCSymbol that will ultimately resolve to the frame offset of the EH
// registration, or the .set_setframe offset.
MCSymbol *OffsetSym =
MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
GlobalValue::dropLLVMManglingEscape(Fn->getName()));
SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
SDValue ParentFrameOffset =
DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal);
// Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after
// prologue to RBP in the parent function.
const X86Subtarget &Subtarget =
static_cast<const X86Subtarget &>(DAG.getSubtarget());
if (Subtarget.is64Bit())
return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset);
int RegNodeSize = getSEHRegistrationNodeSize(Fn);
// RegNodeBase = EntryEBP - RegNodeSize
// ParentFP = RegNodeBase - ParentFrameOffset
SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP,
DAG.getConstant(RegNodeSize, dl, PtrVT));
return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
}
static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// Helper to detect if the operand is CUR_DIRECTION rounding mode.
auto isRoundModeCurDirection = [](SDValue Rnd) {
if (!isa<ConstantSDNode>(Rnd))
return false;
unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
return Round == X86::STATIC_ROUNDING::CUR_DIRECTION;
};
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
MVT VT = Op.getSimpleValueType();
const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
if (IntrData) {
switch(IntrData->Type) {
case INTR_TYPE_1OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
case INTR_TYPE_2OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
case INTR_TYPE_3OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
case INTR_TYPE_4OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
case INTR_TYPE_1OP_MASK_RM: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue RoundingMode;
// We always add rounding mode to the Node.
// If the rounding mode is not specified, we add the
// "current direction" mode.
if (Op.getNumOperands() == 4)
RoundingMode =
DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
else
RoundingMode = Op.getOperand(4);
assert(IntrData->Opc1 == 0 && "Unexpected second opcode!");
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
RoundingMode),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_1OP_MASK: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
// We add rounding mode to the Node when
// - RM Opcode is specified and
// - RM is not "current direction".
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(4);
if (!isRoundModeCurDirection(Rnd)) {
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src, Rnd),
Mask, PassThru, Subtarget, DAG);
}
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue passThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(5);
if (!isRoundModeCurDirection(Rnd))
return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, VT, Src1, Src2, Rnd),
Mask, passThru, Subtarget, DAG);
}
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2),
Mask, passThru, Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src0 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
// There are 2 kinds of intrinsics in this group:
// (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands
// (2) With rounding mode and sae - 7 operands.
if (Op.getNumOperands() == 6) {
SDValue Sae = Op.getOperand(5);
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
Sae),
Mask, Src0, Subtarget, DAG);
}
assert(Op.getNumOperands() == 7 && "Unexpected intrinsic form");
SDValue RoundingMode = Op.getOperand(5);
SDValue Sae = Op.getOperand(6);
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
RoundingMode, Sae),
Mask, Src0, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK:
case INTR_TYPE_2OP_IMM8_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK)
Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(5);
if (!isRoundModeCurDirection(Rnd)) {
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src1, Src2, Rnd),
Mask, PassThru, Subtarget, DAG);
}
}
// TODO: Intrinsics should have fast-math-flags to propagate.
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
// We specify 2 possible modes for intrinsics, with/without rounding
// modes.
// First, we check if the intrinsic have rounding mode (6 operands),
// if not, we set rounding mode to "current".
SDValue Rnd;
if (Op.getNumOperands() == 6)
Rnd = Op.getOperand(5);
else
Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Rnd),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_SCALAR_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
SDValue Sae = Op.getOperand(6);
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
Src2, Src3, Sae),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Imm = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
// We specify 2 possible modes for intrinsics, with/without rounding
// modes.
// First, we check if the intrinsic have rounding mode (7 operands),
// if not, we set rounding mode to "current".
SDValue Rnd;
if (Op.getNumOperands() == 7)
Rnd = Op.getOperand(6);
else
Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Imm, Rnd),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_IMM8_MASK:
case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(6);
if (!isRoundModeCurDirection(Rnd)) {
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src1, Src2, Src3, Rnd),
Mask, PassThru, Subtarget, DAG);
}
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
case VPERM_2OP_MASK : {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
// Swap Src1 and Src2 in the node creation
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1),
Mask, PassThru, Subtarget, DAG);
}
case VPERM_3OP_MASKZ:
case VPERM_3OP_MASK:{
MVT VT = Op.getSimpleValueType();
// Src2 is the PassThru
SDValue Src1 = Op.getOperand(1);
// PassThru needs to be the same type as the destination in order
// to pattern match correctly.
SDValue Src2 = DAG.getBitcast(VT, Op.getOperand(2));
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
SDValue PassThru = SDValue();
// set PassThru element
if (IntrData->Type == VPERM_3OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
else
PassThru = Src2;
// Swap Src1 and Src2 in the node creation
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
dl, Op.getValueType(),
Src2, Src1, Src3),
Mask, PassThru, Subtarget, DAG);
}
case FMA_OP_MASK3:
case FMA_OP_MASKZ:
case FMA_OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
MVT VT = Op.getSimpleValueType();
SDValue PassThru = SDValue();
// set PassThru element
if (IntrData->Type == FMA_OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
else if (IntrData->Type == FMA_OP_MASK3)
PassThru = Src3;
else
PassThru = Src1;
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(5);
if (!isRoundModeCurDirection(Rnd))
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src1, Src2, Src3, Rnd),
Mask, PassThru, Subtarget, DAG);
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
dl, Op.getValueType(),
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
case FMA_OP_SCALAR_MASK:
case FMA_OP_SCALAR_MASK3:
case FMA_OP_SCALAR_MASKZ: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
MVT VT = Op.getSimpleValueType();
SDValue PassThru = SDValue();
// set PassThru element
if (IntrData->Type == FMA_OP_SCALAR_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
else if (IntrData->Type == FMA_OP_SCALAR_MASK3)
PassThru = Src3;
else
PassThru = Src1;
SDValue Rnd = Op.getOperand(5);
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl,
Op.getValueType(), Src1, Src2,
Src3, Rnd),
Mask, PassThru, Subtarget, DAG);
}
case TERLOG_OP_MASK:
case TERLOG_OP_MASKZ: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4));
SDValue Mask = Op.getOperand(5);
MVT VT = Op.getSimpleValueType();
SDValue PassThru = Src1;
// Set PassThru element.
if (IntrData->Type == TERLOG_OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Src3, Src4),
Mask, PassThru, Subtarget, DAG);
}
case CVTPD2PS:
// ISD::FP_ROUND has a second argument that indicates if the truncation
// does not change the value. Set it to 0 since it can change.
return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
DAG.getIntPtrConstant(0, dl));
case CVTPD2PS_MASK: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
// We add rounding mode to the Node when
// - RM Opcode is specified and
// - RM is not "current direction".
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(4);
if (!isRoundModeCurDirection(Rnd)) {
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src, Rnd),
Mask, PassThru, Subtarget, DAG);
}
}
assert(IntrData->Opc0 == ISD::FP_ROUND && "Unexpected opcode!");
// ISD::FP_ROUND has a second argument that indicates if the truncation
// does not change the value. Set it to 0 since it can change.
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
DAG.getIntPtrConstant(0, dl)),
Mask, PassThru, Subtarget, DAG);
}
case FPCLASS: {
// FPclass intrinsics with mask
SDValue Src1 = Op.getOperand(1);
MVT VT = Src1.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue Imm = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
MVT BitcastVT = MVT::getVectorVT(MVT::i1,
Mask.getSimpleValueType().getSizeInBits());
SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm);
SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask,
DAG.getTargetConstant(0, dl, MaskVT),
Subtarget, DAG);
SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
DAG.getUNDEF(BitcastVT), FPclassMask,
DAG.getIntPtrConstant(0, dl));
return DAG.getBitcast(Op.getValueType(), Res);
}
case FPCLASSS: {
SDValue Src1 = Op.getOperand(1);
SDValue Imm = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm);
SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask,
DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG);
return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, FPclassMask,
DAG.getIntPtrConstant(0, dl));
}
case CMP_MASK:
case CMP_MASK_CC: {
// Comparison intrinsics with masks.
// Example of transformation:
// (i8 (int_x86_avx512_mask_pcmpeq_q_128
// (v2i64 %a), (v2i64 %b), (i8 %mask))) ->
// (i8 (bitcast
// (v8i1 (insert_subvector undef,
// (v2i1 (and (PCMPEQM %a, %b),
// (extract_subvector
// (v8i1 (bitcast %mask)), 0))), 0))))
MVT VT = Op.getOperand(1).getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
MVT BitcastVT = MVT::getVectorVT(MVT::i1,
Mask.getSimpleValueType().getSizeInBits());
SDValue Cmp;
if (IntrData->Type == CMP_MASK_CC) {
SDValue CC = Op.getOperand(3);
CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
if (IntrData->Opc1 != 0) {
SDValue Rnd = Op.getOperand(5);
if (!isRoundModeCurDirection(Rnd))
Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2), CC, Rnd);
}
//default rounding mode
if(!Cmp.getNode())
Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2), CC);
} else {
assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!");
Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2));
}
SDValue CmpMask = getVectorMaskingNode(Cmp, Mask,
DAG.getTargetConstant(0, dl,
MaskVT),
Subtarget, DAG);
SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
DAG.getUNDEF(BitcastVT), CmpMask,
DAG.getIntPtrConstant(0, dl));
return DAG.getBitcast(Op.getValueType(), Res);
}
case CMP_MASK_SCALAR_CC: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3));
SDValue Mask = Op.getOperand(4);
SDValue Cmp;
if (IntrData->Opc1 != 0) {
SDValue Rnd = Op.getOperand(5);
if (!isRoundModeCurDirection(Rnd))
Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Rnd);
}
//default rounding mode
if(!Cmp.getNode())
Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC);
SDValue CmpMask = getScalarMaskingNode(Cmp, Mask,
DAG.getTargetConstant(0, dl,
MVT::i1),
Subtarget, DAG);
return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, CmpMask,
DAG.getIntPtrConstant(0, dl));
}
case COMI: { // Comparison intrinsics
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS, LHS);
SDValue SetCC;
switch (CC) {
case ISD::SETEQ: { // (ZF = 0 and PF = 0)
SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
break;
}
case ISD::SETNE: { // (ZF = 1 or PF = 1)
SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
break;
}
case ISD::SETGT: // (CF = 0 and ZF = 0)
SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
break;
case ISD::SETLT: { // The condition is opposite to GT. Swap the operands.
SetCC = getSETCC(X86::COND_A, InvComi, dl, DAG);
break;
}
case ISD::SETGE: // CF = 0
SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
break;
case ISD::SETLE: // The condition is opposite to GE. Swap the operands.
SetCC = getSETCC(X86::COND_AE, InvComi, dl, DAG);
break;
default:
llvm_unreachable("Unexpected illegal condition!");
}
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case COMI_RM: { // Comparison intrinsics with Sae
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
unsigned CondVal = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
SDValue Sae = Op.getOperand(4);
SDValue FCmp;
if (isRoundModeCurDirection(Sae))
FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
DAG.getConstant(CondVal, dl, MVT::i8));
else
FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS,
DAG.getConstant(CondVal, dl, MVT::i8), Sae);
return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i32, FCmp,
DAG.getIntPtrConstant(0, dl));
}
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
Op.getOperand(1), Op.getOperand(2), Subtarget,
DAG);
case COMPRESS_EXPAND_IN_REG: {
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
if (isAllOnesConstant(Mask)) // return data as is
return Op.getOperand(1);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
DataToCompress),
Mask, PassThru, Subtarget, DAG);
}
case BROADCASTM: {
SDValue Mask = Op.getOperand(1);
MVT MaskVT = MVT::getVectorVT(MVT::i1,
Mask.getSimpleValueType().getSizeInBits());
Mask = DAG.getBitcast(MaskVT, Mask);
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask);
}
case KUNPCK: {
MVT VT = Op.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2);
SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
// Arguments should be swapped.
SDValue Res = DAG.getNode(IntrData->Opc0, dl,
MVT::getVectorVT(MVT::i1, VT.getSizeInBits()),
Src2, Src1);
return DAG.getBitcast(VT, Res);
}
case MASK_BINOP: {
MVT VT = Op.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());
SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
SDValue Res = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Src2);
return DAG.getBitcast(VT, Res);
}
case FIXUPIMMS:
case FIXUPIMMS_MASKZ:
case FIXUPIMM:
case FIXUPIMM_MASKZ:{
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Imm = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
SDValue Passthru = (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMMS ) ?
Src1 : getZeroVector(VT, Subtarget, DAG, dl);
// We specify 2 possible modes for intrinsics, with/without rounding
// modes.
// First, we check if the intrinsic have rounding mode (7 operands),
// if not, we set rounding mode to "current".
SDValue Rnd;
if (Op.getNumOperands() == 7)
Rnd = Op.getOperand(6);
else
Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
if (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMM_MASKZ)
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Src3, Imm, Rnd),
Mask, Passthru, Subtarget, DAG);
else // Scalar - FIXUPIMMS, FIXUPIMMS_MASKZ
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Src3, Imm, Rnd),
Mask, Passthru, Subtarget, DAG);
}
case CONVERT_TO_MASK: {
MVT SrcVT = Op.getOperand(1).getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());
SDValue CvtMask = DAG.getNode(IntrData->Opc0, dl, MaskVT,
Op.getOperand(1));
SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
DAG.getUNDEF(BitcastVT), CvtMask,
DAG.getIntPtrConstant(0, dl));
return DAG.getBitcast(Op.getValueType(), Res);
}
case BRCST_SUBVEC_TO_VEC: {
SDValue Src = Op.getOperand(1);
SDValue Passthru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
EVT resVT = Passthru.getValueType();
SDValue subVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, resVT,
DAG.getUNDEF(resVT), Src,
DAG.getIntPtrConstant(0, dl));
SDValue immVal;
if (Src.getSimpleValueType().is256BitVector() && resVT.is512BitVector())
immVal = DAG.getConstant(0x44, dl, MVT::i8);
else
immVal = DAG.getConstant(0, dl, MVT::i8);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
subVec, subVec, immVal),
Mask, Passthru, Subtarget, DAG);
}
case BRCST32x2_TO_VEC: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
assert((VT.getScalarType() == MVT::i32 ||
VT.getScalarType() == MVT::f32) && "Unexpected type!");
//bitcast Src to packed 64
MVT ScalarVT = VT.getScalarType() == MVT::i32 ? MVT::i64 : MVT::f64;
MVT BitcastVT = MVT::getVectorVT(ScalarVT, Src.getValueSizeInBits()/64);
Src = DAG.getBitcast(BitcastVT, Src);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
Mask, PassThru, Subtarget, DAG);
}
default:
break;
}
}
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
// but second operand for node/instruction.
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestz_256:
case Intrinsic::x86_avx_ptestc_256:
case Intrinsic::x86_avx_ptestnzc_256:
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestc_ps:
case Intrinsic::x86_avx_vtestnzc_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
bool IsTestPacked = false;
X86::CondCode X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
IsTestPacked = true;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_avx_ptestz_256:
// ZF = 1
X86CC = X86::COND_E;
break;
case Intrinsic::x86_avx_vtestc_ps:
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestc_pd_256:
IsTestPacked = true;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_avx_ptestc_256:
// CF = 1
X86CC = X86::COND_B;
break;
case Intrinsic::x86_avx_vtestnzc_ps:
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestnzc_pd_256:
IsTestPacked = true;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestnzc_256:
// ZF and CF = 0
X86CC = X86::COND_A;
break;
}
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case Intrinsic::x86_avx512_kortestz_w:
case Intrinsic::x86_avx512_kortestc_w: {
X86::CondCode X86CC =
(IntNo == Intrinsic::x86_avx512_kortestz_w) ? X86::COND_E : X86::COND_B;
SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case Intrinsic::x86_avx512_knot_w: {
SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
SDValue RHS = DAG.getConstant(1, dl, MVT::v16i1);
SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS);
return DAG.getBitcast(MVT::i16, Res);
}
case Intrinsic::x86_avx512_kandn_w: {
SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
// Invert LHS for the not.
LHS = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS,
DAG.getConstant(1, dl, MVT::v16i1));
SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
SDValue Res = DAG.getNode(ISD::AND, dl, MVT::v16i1, LHS, RHS);
return DAG.getBitcast(MVT::i16, Res);
}
case Intrinsic::x86_avx512_kxnor_w: {
SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS);
// Invert result for the not.
Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, Res,
DAG.getConstant(1, dl, MVT::v16i1));
return DAG.getBitcast(MVT::i16, Res);
}
case Intrinsic::x86_sse42_pcmpistria128:
case Intrinsic::x86_sse42_pcmpestria128:
case Intrinsic::x86_sse42_pcmpistric128:
case Intrinsic::x86_sse42_pcmpestric128:
case Intrinsic::x86_sse42_pcmpistrio128:
case Intrinsic::x86_sse42_pcmpestrio128:
case Intrinsic::x86_sse42_pcmpistris128:
case Intrinsic::x86_sse42_pcmpestris128:
case Intrinsic::x86_sse42_pcmpistriz128:
case Intrinsic::x86_sse42_pcmpestriz128: {
unsigned Opcode;
X86::CondCode X86CC;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse42_pcmpistria128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_A;
break;
case Intrinsic::x86_sse42_pcmpestria128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_A;
break;
case Intrinsic::x86_sse42_pcmpistric128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_B;
break;
case Intrinsic::x86_sse42_pcmpestric128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_B;
break;
case Intrinsic::x86_sse42_pcmpistrio128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_O;
break;
case Intrinsic::x86_sse42_pcmpestrio128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_O;
break;
case Intrinsic::x86_sse42_pcmpistris128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_S;
break;
case Intrinsic::x86_sse42_pcmpestris128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_S;
break;
case Intrinsic::x86_sse42_pcmpistriz128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_E;
break;
case Intrinsic::x86_sse42_pcmpestriz128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_E;
break;
}
SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps);
SDValue SetCC = getSETCC(X86CC, SDValue(PCMP.getNode(), 1), dl, DAG);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case Intrinsic::x86_sse42_pcmpistri128:
case Intrinsic::x86_sse42_pcmpestri128: {
unsigned Opcode;
if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
Opcode = X86ISD::PCMPISTRI;
else
Opcode = X86ISD::PCMPESTRI;
SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps);
}
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
auto &Context = MF.getMMI().getContext();
MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
Twine(MF.getFunctionNumber()));
return DAG.getNode(X86ISD::Wrapper, dl, VT, DAG.getMCSymbol(S, PtrVT));
}
case Intrinsic::x86_seh_lsda: {
// Compute the symbol for the LSDA. We know it'll get emitted later.
MachineFunction &MF = DAG.getMachineFunction();
SDValue Op1 = Op.getOperand(1);
auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol(
GlobalValue::dropLLVMManglingEscape(Fn->getName()));
// Generate a simple absolute symbol reference. This intrinsic is only
// supported on 32-bit Windows, which isn't PIC.
SDValue Result = DAG.getMCSymbol(LSDASym, VT);
return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
}
case Intrinsic::x86_seh_recoverfp: {
SDValue FnOp = Op.getOperand(1);
SDValue IncomingFPOp = Op.getOperand(2);
GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
if (!Fn)
report_fatal_error(
"llvm.x86.seh.recoverfp must take a function as the first argument");
return recoverFramePointer(DAG, Fn, IncomingFPOp);
}
case Intrinsic::localaddress: {
// Returns one of the stack, base, or frame pointer registers, depending on
// which is used to reference local variables.
MachineFunction &MF = DAG.getMachineFunction();
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned Reg;
if (RegInfo->hasBasePointer(MF))
Reg = RegInfo->getBaseRegister();
else // This function handles the SP or FP case.
Reg = RegInfo->getPtrSizedFrameRegister(MF);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
}
}
static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
SDLoc dl(Op);
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
// Scale must be constant.
if (!C)
return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
EVT MaskVT = Mask.getValueType();
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let ExecutionDepsFix deal with it?
if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
return DAG.getMergeValues(RetOps, dl);
}
static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
SDLoc dl(Op);
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
// Scale must be constant.
if (!C)
return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
MVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let ExecutionDepsFix deal with it?
if (Src.isUndef() || ISD::isBuildVectorAllOnes(VMask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
return DAG.getMergeValues(RetOps, dl);
}
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
SDLoc dl(Op);
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
// Scale must be constant.
if (!C)
return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
MVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, VMask, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
return SDValue(Res, 1);
}
static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Mask, SDValue Base, SDValue Index,
SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
SDLoc dl(Op);
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
// Scale must be constant.
if (!C)
return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
MVT MaskVT =
MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
return SDValue(Res, 0);
}
/// Handles the lowering of builtin intrinsic that return the value
/// of the extended control register.
static void getExtendedControlRegister(SDNode *N, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SmallVectorImpl<SDValue> &Results) {
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue LO, HI;
// The ECX register is used to select the index of the XCR register to
// return.
SDValue Chain =
DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX, N->getOperand(2));
SDNode *N1 = DAG.getMachineNode(X86::XGETBV, DL, Tys, Chain);
Chain = SDValue(N1, 0);
// Reads the content of XCR and returns it in registers EDX:EAX.
if (Subtarget.is64Bit()) {
LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
LO.getValue(2));
} else {
LO = DAG.getCopyFromReg(Chain, DL, X86::EAX, MVT::i32, SDValue(N1, 1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
LO.getValue(2));
}
Chain = HI.getValue(1);
if (Subtarget.is64Bit()) {
// Merge the two 32-bit values into a 64-bit one..
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
DAG.getConstant(32, DL, MVT::i8));
Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
Results.push_back(Chain);
return;
}
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
SDValue Ops[] = { LO, HI };
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(Chain);
}
/// Handles the lowering of builtin intrinsics that read performance monitor
/// counters (x86_rdpmc).
static void getReadPerformanceCounter(SDNode *N, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SmallVectorImpl<SDValue> &Results) {
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue LO, HI;
// The ECX register is used to select the index of the performance counter
// to read.
SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX,
N->getOperand(2));
SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain);
// Reads the content of a 64-bit performance counter and returns it in the
// registers EDX:EAX.
if (Subtarget.is64Bit()) {
LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
LO.getValue(2));
} else {
LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
LO.getValue(2));
}
Chain = HI.getValue(1);
if (Subtarget.is64Bit()) {
// The EAX register is loaded with the low-order 32 bits. The EDX register
// is loaded with the supported high-order bits of the counter.
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
DAG.getConstant(32, DL, MVT::i8));
Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
Results.push_back(Chain);
return;
}
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
SDValue Ops[] = { LO, HI };
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(Chain);
}
/// Handles the lowering of builtin intrinsics that read the time stamp counter
/// (x86_rdtsc and x86_rdtscp). This function is also used to custom lower
/// READCYCLECOUNTER nodes.
static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SmallVectorImpl<SDValue> &Results) {
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0));
SDValue LO, HI;
// The processor's time-stamp counter (a 64-bit MSR) is stored into the
// EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
// and the EAX register is loaded with the low-order 32 bits.
if (Subtarget.is64Bit()) {
LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
LO.getValue(2));
} else {
LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
LO.getValue(2));
}
SDValue Chain = HI.getValue(1);
if (Opcode == X86ISD::RDTSCP_DAG) {
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
// Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
// the ECX register. Add 'ecx' explicitly to the chain.
SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
HI.getValue(2));
// Explicitly store the content of ECX at the location passed in input
// to the 'rdtscp' intrinsic.
Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2),
MachinePointerInfo());
}
if (Subtarget.is64Bit()) {
// The EDX register is loaded with the high-order 32 bits of the MSR, and
// the EAX register is loaded with the low-order 32 bits.
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
DAG.getConstant(32, DL, MVT::i8));
Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
Results.push_back(Chain);
return;
}
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
SDValue Ops[] = { LO, HI };
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(Chain);
}
static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SmallVector<SDValue, 2> Results;
SDLoc DL(Op);
getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
return DAG.getMergeValues(Results, DL);
}
static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
SDValue Chain = Op.getOperand(0);
SDValue RegNode = Op.getOperand(2);
WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
if (!EHInfo)
report_fatal_error("EH registrations only live in functions using WinEH");
// Cast the operand to an alloca, and remember the frame index.
auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode);
if (!FINode)
report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca");
EHInfo->EHRegNodeFrameIndex = FINode->getIndex();
// Return the chain operand without making any DAG nodes.
return Chain;
}
static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
SDValue Chain = Op.getOperand(0);
SDValue EHGuard = Op.getOperand(2);
WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
if (!EHInfo)
report_fatal_error("EHGuard only live in functions using WinEH");
// Cast the operand to an alloca, and remember the frame index.
auto *FINode = dyn_cast<FrameIndexSDNode>(EHGuard);
if (!FINode)
report_fatal_error("llvm.x86.seh.ehguard expects a static alloca");
EHInfo->EHGuardFrameIndex = FINode->getIndex();
// Return the chain operand without making any DAG nodes.
return Chain;
}
/// Emit Truncating Store with signed or unsigned saturation.
static SDValue
EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val,
SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
SDValue Ops[] = { Chain, Val, Ptr, Undef };
return SignedSat ?
DAG.getTargetMemSDNode<TruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
DAG.getTargetMemSDNode<TruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
}
/// Emit Masked Truncating Store with signed or unsigned saturation.
static SDValue
EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
MachineMemOperand *MMO, SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Val };
return SignedSat ?
DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo);
if (!IntrData) {
switch (IntNo) {
case llvm::Intrinsic::x86_seh_ehregnode:
return MarkEHRegistrationNode(Op, DAG);
case llvm::Intrinsic::x86_seh_ehguard:
return MarkEHGuard(Op, DAG);
case llvm::Intrinsic::x86_flags_read_u32:
case llvm::Intrinsic::x86_flags_read_u64:
case llvm::Intrinsic::x86_flags_write_u32:
case llvm::Intrinsic::x86_flags_write_u64: {
// We need a frame pointer because this will get lowered to a PUSH/POP
// sequence.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setHasCopyImplyingStackAdjustment(true);
// Don't do anything here, we will expand these intrinsics out later
// during ExpandISelPseudos in EmitInstrWithCustomInserter.
return SDValue();
}
case Intrinsic::x86_lwpins32:
case Intrinsic::x86_lwpins64: {
SDLoc dl(Op);
SDValue Chain = Op->getOperand(0);
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
SDValue LwpIns =
DAG.getNode(X86ISD::LWPINS, dl, VTs, Chain, Op->getOperand(2),
Op->getOperand(3), Op->getOperand(4));
SDValue SetCC = getSETCC(X86::COND_B, LwpIns.getValue(0), dl, DAG);
SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC);
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
LwpIns.getValue(1));
}
}
return SDValue();
}
SDLoc dl(Op);
switch(IntrData->Type) {
default: llvm_unreachable("Unknown Intrinsic Type");
case RDSEED:
case RDRAND: {
// Emit the node with the right value type.
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
// If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
// Otherwise return the value from Rand, which is always 0, casted to i32.
SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
DAG.getConstant(1, dl, Op->getValueType(1)),
DAG.getConstant(X86::COND_B, dl, MVT::i32),
SDValue(Result.getNode(), 1) };
SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
DAG.getVTList(Op->getValueType(1), MVT::Glue),
Ops);
// Return { result, isValid, chain }.
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
case GATHER_AVX2: {
SDValue Chain = Op.getOperand(0);
SDValue Src = Op.getOperand(2);
SDValue Base = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
Scale, Chain, Subtarget);
}
case GATHER: {
//gather(v1, mask, index, base, scale);
SDValue Chain = Op.getOperand(0);
SDValue Src = Op.getOperand(2);
SDValue Base = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale,
Chain, Subtarget);
}
case SCATTER: {
//scatter(base, mask, index, v1, scale);
SDValue Chain = Op.getOperand(0);
SDValue Base = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
SDValue Src = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
Scale, Chain, Subtarget);
}
case PREFETCH: {
SDValue Hint = Op.getOperand(6);
unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue();
assert((HintVal == 2 || HintVal == 3) &&
"Wrong prefetch hint in intrinsic: should be 2 or 3");
unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0);
SDValue Chain = Op.getOperand(0);
SDValue Mask = Op.getOperand(2);
SDValue Index = Op.getOperand(3);
SDValue Base = Op.getOperand(4);
SDValue Scale = Op.getOperand(5);
return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain,
Subtarget);
}
// Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP).
case RDTSC: {
SmallVector<SDValue, 2> Results;
getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget,
Results);
return DAG.getMergeValues(Results, dl);
}
// Read Performance Monitoring Counters.
case RDPMC: {
SmallVector<SDValue, 2> Results;
getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
// Get Extended Control Register.
case XGETBV: {
SmallVector<SDValue, 2> Results;
getExtendedControlRegister(Op.getNode(), dl, DAG, Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
// XTEST intrinsics.
case XTEST: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG);
SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
Ret, SDValue(InTrans.getNode(), 1));
}
// ADC/ADCX/SBB
case ADX: {
SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::i32);
SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
DAG.getConstant(-1, dl, MVT::i8));
SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
Op.getOperand(4), GenCF.getValue(1));
SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
Op.getOperand(5), MachinePointerInfo());
SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
SDValue Results[] = { SetCC, Store };
return DAG.getMergeValues(Results, dl);
}
case COMPRESS_TO_MEM: {
SDValue Mask = Op.getOperand(4);
SDValue DataToCompress = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
MVT VT = DataToCompress.getSimpleValueType();
MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
assert(MemIntr && "Expected MemIntrinsicSDNode!");
if (isAllOnesConstant(Mask)) // return just a store
return DAG.getStore(Chain, dl, DataToCompress, Addr,
MemIntr->getMemOperand());
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
return DAG.getMaskedStore(Chain, dl, DataToCompress, Addr, VMask, VT,
MemIntr->getMemOperand(),
false /* truncating */, true /* compressing */);
}
case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:
case TRUNCATE_TO_MEM_VI32: {
SDValue Mask = Op.getOperand(4);
SDValue DataToTruncate = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
assert(MemIntr && "Expected MemIntrinsicSDNode!");
EVT MemVT = MemIntr->getMemoryVT();
uint16_t TruncationOp = IntrData->Opc0;
switch (TruncationOp) {
case X86ISD::VTRUNC: {
if (isAllOnesConstant(Mask)) // return just a truncate store
return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT,
MemIntr->getMemOperand());
MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT,
MemIntr->getMemOperand(), true /* truncating */);
}
case X86ISD::VTRUNCUS:
case X86ISD::VTRUNCS: {
bool IsSigned = (TruncationOp == X86ISD::VTRUNCS);
if (isAllOnesConstant(Mask))
return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT,
MemIntr->getMemOperand(), DAG);
MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr,
VMask, MemVT, MemIntr->getMemOperand(), DAG);
}
default:
llvm_unreachable("Unsupported truncstore intrinsic");
}
}
case EXPAND_FROM_MEM: {
SDValue Mask = Op.getOperand(4);
SDValue PassThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
MVT VT = Op.getSimpleValueType();
MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
assert(MemIntr && "Expected MemIntrinsicSDNode!");
if (isAllOnesConstant(Mask)) // Return a regular (unmasked) vector load.
return DAG.getLoad(VT, dl, Chain, Addr, MemIntr->getMemOperand());
if (X86::isZeroNode(Mask))
return DAG.getUNDEF(VT);
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT,
MemIntr->getMemOperand(), ISD::NON_EXTLOAD,
true /* expanding */);
}
}
}
SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setReturnAddressIsTaken(true);
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
MachinePointerInfo());
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
MachinePointerInfo());
}
SDValue X86TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
DAG.getMachineFunction().getFrameInfo().setReturnAddressIsTaken(true);
return getReturnAddressFrameIndex(DAG);
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
EVT VT = Op.getValueType();
MFI.setFrameAddressIsTaken(true);
if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
// Depth > 0 makes no sense on targets which use Windows unwind codes. It
// is not possible to crawl up the stack without looking at the unwind codes
// simultaneously.
int FrameAddrIndex = FuncInfo->getFAIndex();
if (!FrameAddrIndex) {
// Set up a frame object for the return address.
unsigned SlotSize = RegInfo->getSlotSize();
FrameAddrIndex = MF.getFrameInfo().CreateFixedObject(
SlotSize, /*Offset=*/0, /*IsImmutable=*/false);
FuncInfo->setFAIndex(FrameAddrIndex);
}
return DAG.getFrameIndex(FrameAddrIndex, VT);
}
unsigned FrameReg =
RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
"Invalid Frame Register!");
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo());
return FrameAddr;
}
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const {
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
const MachineFunction &MF = DAG.getMachineFunction();
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("esp", X86::ESP)
.Case("rsp", X86::RSP)
.Case("ebp", X86::EBP)
.Case("rbp", X86::RBP)
.Default(0);
if (Reg == X86::EBP || Reg == X86::RBP) {
if (!TFI.hasFP(MF))
report_fatal_error("register " + StringRef(RegName) +
" is allocatable: function has no frame pointer");
#ifndef NDEBUG
else {
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned FrameReg =
RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
assert((FrameReg == X86::EBP || FrameReg == X86::RBP) &&
"Invalid Frame Register!");
}
#endif
}
if (Reg)
return Reg;
report_fatal_error("Invalid register name global variable");
}
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));
}
unsigned X86TargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR)
return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;
return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX;
}
unsigned X86TargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
// Funclet personalities don't use selectors (the runtime does the selection).
assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)));
return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;
}
bool X86TargetLowering::needsFixedCatchObjects() const {
return Subtarget.isTargetWin64();
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
SDLoc dl (Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
"Invalid Frame Register!");
SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
DAG.getIntPtrConstant(RegInfo->getSlotSize(),
dl));
StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain,
DAG.getRegister(StoreAddrReg, PtrVT));
}
SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
// If the subtarget is not 64bit, we may need the global base reg
// after isel expand pseudo, i.e., after CGBR pass ran.
// Therefore, ask for the GlobalBaseReg now, so that the pass
// inserts the code for us in case we need it.
// Otherwise, we will end up in a situation where we will
// reference a virtual register that is not defined!
if (!Subtarget.is64Bit()) {
const X86InstrInfo *TII = Subtarget.getInstrInfo();
(void)TII->getGlobalBaseReg(&DAG.getMachineFunction());
}
return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
DAG.getVTList(MVT::i32, MVT::Other),
Op.getOperand(0), Op.getOperand(1));
}
SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
Op.getOperand(0), Op.getOperand(1));
}
SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
Op.getOperand(0));
}
static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
return Op.getOperand(0);
}
SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
SDValue Root = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
SDLoc dl (Op);
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
if (Subtarget.is64Bit()) {
SDValue OutChains[6];
// Large code-model.
const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7;
const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
// Load the pointer to the nested function into R11.
unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
SDValue Addr = Trmp;
OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
Addr, MachinePointerInfo(TrmpAddr));
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(2, dl, MVT::i64));
OutChains[1] =
DAG.getStore(Root, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 2),
/* Alignment = */ 2);
// Load the 'nest' parameter value into R10.
// R10 is specified in X86CallingConv.td
OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(10, dl, MVT::i64));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
Addr, MachinePointerInfo(TrmpAddr, 10));
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(12, dl, MVT::i64));
OutChains[3] =
DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 12),
/* Alignment = */ 2);
// Jump to the nested function.
OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(20, dl, MVT::i64));
OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
Addr, MachinePointerInfo(TrmpAddr, 20));
unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(22, dl, MVT::i64));
OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, dl, MVT::i8),
Addr, MachinePointerInfo(TrmpAddr, 22));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
} else {
const Function *Func =
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
CallingConv::ID CC = Func->getCallingConv();
unsigned NestReg;
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
case CallingConv::C:
case CallingConv::X86_StdCall: {
// Pass 'nest' parameter in ECX.
// Must be kept in sync with X86CallingConv.td
NestReg = X86::ECX;
// Check that ECX wasn't needed by an 'inreg' parameter.
FunctionType *FTy = Func->getFunctionType();
const AttributeList &Attrs = Func->getAttributes();
if (!Attrs.isEmpty() && !Func->isVarArg()) {
unsigned InRegCount = 0;
unsigned Idx = 1;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
if (Attrs.hasAttribute(Idx, Attribute::InReg)) {
auto &DL = DAG.getDataLayout();
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32;
}
if (InRegCount > 2) {
report_fatal_error("Nest register in use - reduce number of inreg"
" parameters!");
}
}
break;
}
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
case CallingConv::Fast:
// Pass 'nest' parameter in EAX.
// Must be kept in sync with X86CallingConv.td
NestReg = X86::EAX;
break;
}
SDValue OutChains[4];
SDValue Addr, Disp;
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(10, dl, MVT::i32));
Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
// This is storing the opcode for MOV32ri.
const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;
OutChains[0] =
DAG.getStore(Root, dl, DAG.getConstant(MOV32ri | N86Reg, dl, MVT::i8),
Trmp, MachinePointerInfo(TrmpAddr));
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(1, dl, MVT::i32));
OutChains[1] =
DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 1),
/* Alignment = */ 1);
const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(5, dl, MVT::i32));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8),
Addr, MachinePointerInfo(TrmpAddr, 5),
/* Alignment = */ 1);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(6, dl, MVT::i32));
OutChains[3] =
DAG.getStore(Root, dl, Disp, Addr, MachinePointerInfo(TrmpAddr, 6),
/* Alignment = */ 1);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
}
SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
/*
The rounding mode is in bits 11:10 of FPSR, and has the following
settings:
00 Round to nearest
01 Round to -inf
10 Round to +inf
11 Round to 0
FLT_ROUNDS, on the other hand, expects the following:
-1 Undefined
0 Round to 0
1 Round to nearest
2 Round to +inf
3 Round to -inf
To perform the conversion, we do:
(((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
*/
MachineFunction &MF = DAG.getMachineFunction();
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
// Save FP Control Word to stack slot
int SSFI = MF.getFrameInfo().CreateStackObject(2, StackAlignment, false);
SDValue StackSlot =
DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
MachineMemOperand::MOStore, 2, 2);
SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
DAG.getVTList(MVT::Other),
Ops, MVT::i16, MMO);
// Load FP Control Word from stack slot
SDValue CWD =
DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo());
// Transform as necessary
SDValue CWD1 =
DAG.getNode(ISD::SRL, DL, MVT::i16,
DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x800, DL, MVT::i16)),
DAG.getConstant(11, DL, MVT::i8));
SDValue CWD2 =
DAG.getNode(ISD::SRL, DL, MVT::i16,
DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x400, DL, MVT::i16)),
DAG.getConstant(9, DL, MVT::i8));
SDValue RetVal =
DAG.getNode(ISD::AND, DL, MVT::i16,
DAG.getNode(ISD::ADD, DL, MVT::i16,
DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
DAG.getConstant(1, DL, MVT::i16)),
DAG.getConstant(3, DL, MVT::i16));
return DAG.getNode((VT.getSizeInBits() < 16 ?
ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
// Split an unary integer op into 2 half sized ops.
static SDValue LowerVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
unsigned NumElems = VT.getVectorNumElements();
unsigned SizeInBits = VT.getSizeInBits();
// Extract the Lo/Hi vectors
SDLoc dl(Op);
SDValue Src = Op.getOperand(0);
SDValue Lo = extractSubVector(Src, 0, DAG, dl, SizeInBits / 2);
SDValue Hi = extractSubVector(Src, NumElems / 2, DAG, dl, SizeInBits / 2);
MVT EltVT = VT.getVectorElementType();
MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, Lo),
DAG.getNode(Op.getOpcode(), dl, NewVT, Hi));
}
// Decompose 256-bit ops into smaller 128-bit ops.
static SDValue Lower256IntUnary(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return LowerVectorIntUnary(Op, DAG);
}
// Decompose 512-bit ops into smaller 256-bit ops.
static SDValue Lower512IntUnary(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is512BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 512-bit vector integer operation");
return LowerVectorIntUnary(Op, DAG);
}
/// \brief Lower a vector CTLZ using native supported vector CTLZ instruction.
//
// i8/i16 vector implemented using dword LZCNT vector instruction
// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
// split the vector, perform operation on it's Lo a Hi part and
// concatenate the results.
static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::CTLZ);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
"Unsupported element type");
// Split vector, it's Lo and Hi parts will be handled in next iteration.
if (16 < NumElems)
return LowerVectorIntUnary(Op, DAG);
MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
"Unsupported value type for operation");
// Use native supported vector instruction vplzcntd.
Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0));
SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op);
SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode);
SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT);
return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta);
}
// Lower CTLZ using a PSHUFB lookup table implementation.
static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
int NumElts = VT.getVectorNumElements();
int NumBytes = NumElts * (VT.getScalarSizeInBits() / 8);
MVT CurrVT = MVT::getVectorVT(MVT::i8, NumBytes);
// Per-nibble leading zero PSHUFB lookup table.
const int LUT[16] = {/* 0 */ 4, /* 1 */ 3, /* 2 */ 2, /* 3 */ 2,
/* 4 */ 1, /* 5 */ 1, /* 6 */ 1, /* 7 */ 1,
/* 8 */ 0, /* 9 */ 0, /* a */ 0, /* b */ 0,
/* c */ 0, /* d */ 0, /* e */ 0, /* f */ 0};
SmallVector<SDValue, 64> LUTVec;
for (int i = 0; i < NumBytes; ++i)
LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
SDValue InRegLUT = DAG.getBuildVector(CurrVT, DL, LUTVec);
// Begin by bitcasting the input to byte vector, then split those bytes
// into lo/hi nibbles and use the PSHUFB LUT to perform CLTZ on each of them.
// If the hi input nibble is zero then we add both results together, otherwise
// we just take the hi result (by masking the lo result to zero before the
// add).
SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0));
SDValue Zero = getZeroVector(CurrVT, Subtarget, DAG, DL);
SDValue NibbleMask = DAG.getConstant(0xF, DL, CurrVT);
SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT);
SDValue Lo = DAG.getNode(ISD::AND, DL, CurrVT, Op0, NibbleMask);
SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift);
SDValue HiZ = DAG.getSetCC(DL, CurrVT, Hi, Zero, ISD::SETEQ);
Lo = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Lo);
Hi = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Hi);
Lo = DAG.getNode(ISD::AND, DL, CurrVT, Lo, HiZ);
SDValue Res = DAG.getNode(ISD::ADD, DL, CurrVT, Lo, Hi);
// Merge result back from vXi8 back to VT, working on the lo/hi halves
// of the current vector width in the same way we did for the nibbles.
// If the upper half of the input element is zero then add the halves'
// leading zero counts together, otherwise just use the upper half's.
// Double the width of the result until we are at target width.
while (CurrVT != VT) {
int CurrScalarSizeInBits = CurrVT.getScalarSizeInBits();
int CurrNumElts = CurrVT.getVectorNumElements();
MVT NextSVT = MVT::getIntegerVT(CurrScalarSizeInBits * 2);
MVT NextVT = MVT::getVectorVT(NextSVT, CurrNumElts / 2);
SDValue Shift = DAG.getConstant(CurrScalarSizeInBits, DL, NextVT);
// Check if the upper half of the input element is zero.
SDValue HiZ = DAG.getSetCC(DL, CurrVT, DAG.getBitcast(CurrVT, Op0),
DAG.getBitcast(CurrVT, Zero), ISD::SETEQ);
HiZ = DAG.getBitcast(NextVT, HiZ);
// Move the upper/lower halves to the lower bits as we'll be extending to
// NextVT. Mask the lower result to zero if HiZ is true and add the results
// together.
SDValue ResNext = Res = DAG.getBitcast(NextVT, Res);
SDValue R0 = DAG.getNode(ISD::SRL, DL, NextVT, ResNext, Shift);
SDValue R1 = DAG.getNode(ISD::SRL, DL, NextVT, HiZ, Shift);
R1 = DAG.getNode(ISD::AND, DL, NextVT, ResNext, R1);
Res = DAG.getNode(ISD::ADD, DL, NextVT, R0, R1);
CurrVT = NextVT;
}
return Res;
}
static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
if (Subtarget.hasCDI())
return LowerVectorCTLZ_AVX512CDI(Op, DAG);
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntUnary(Op, DAG);
// Decompose 512-bit ops into smaller 256-bit ops.
if (VT.is512BitVector() && !Subtarget.hasBWI())
return Lower512IntUnary(Op, DAG);
assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
}
static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
unsigned Opc = Op.getOpcode();
if (VT.isVector())
return LowerVectorCTLZ(Op, dl, Subtarget, DAG);
Op = Op.getOperand(0);
if (VT == MVT::i8) {
// Zero extend to i32 since there is not an i8 bsr.
OpVT = MVT::i32;
Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
}
// Issue a bsr (scan bits in reverse) which also sets EFLAGS.
SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
if (Opc == ISD::CTLZ) {
// If src is zero (i.e. bsr sets ZF), returns NumBits.
SDValue Ops[] = {
Op,
DAG.getConstant(NumBits + NumBits - 1, dl, OpVT),
DAG.getConstant(X86::COND_E, dl, MVT::i8),
Op.getValue(1)
};
Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops);
}
// Finally xor with NumBits-1.
Op = DAG.getNode(ISD::XOR, dl, OpVT, Op,
DAG.getConstant(NumBits - 1, dl, OpVT));
if (VT == MVT::i8)
Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
return Op;
}
static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
unsigned NumBits = VT.getScalarSizeInBits();
SDLoc dl(Op);
if (VT.isVector()) {
SDValue N0 = Op.getOperand(0);
SDValue Zero = DAG.getConstant(0, dl, VT);
// lsb(x) = (x & -x)
SDValue LSB = DAG.getNode(ISD::AND, dl, VT, N0,
DAG.getNode(ISD::SUB, dl, VT, Zero, N0));
// cttz_undef(x) = (width - 1) - ctlz(lsb)
if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
SDValue WidthMinusOne = DAG.getConstant(NumBits - 1, dl, VT);
return DAG.getNode(ISD::SUB, dl, VT, WidthMinusOne,
DAG.getNode(ISD::CTLZ, dl, VT, LSB));
}
// cttz(x) = ctpop(lsb - 1)
SDValue One = DAG.getConstant(1, dl, VT);
return DAG.getNode(ISD::CTPOP, dl, VT,
DAG.getNode(ISD::SUB, dl, VT, LSB, One));
}
assert(Op.getOpcode() == ISD::CTTZ &&
"Only scalar CTTZ requires custom lowering");
// Issue a bsf (scan bits forward) which also sets EFLAGS.
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op.getOperand(0));
// If src is zero (i.e. bsf sets ZF), returns NumBits.
SDValue Ops[] = {
Op,
DAG.getConstant(NumBits, dl, VT),
DAG.getConstant(X86::COND_E, dl, MVT::i8),
Op.getValue(1)
};
return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
}
/// Break a 256-bit integer operation into two new 128-bit ones and then
/// concatenate the result back.
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is256BitVector() && VT.isInteger() &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
SDLoc dl(Op);
// Extract the LHS vectors
SDValue LHS = Op.getOperand(0);
SDValue LHS1 = extract128BitVector(LHS, 0, DAG, dl);
SDValue LHS2 = extract128BitVector(LHS, NumElems / 2, DAG, dl);
// Extract the RHS vectors
SDValue RHS = Op.getOperand(1);
SDValue RHS1 = extract128BitVector(RHS, 0, DAG, dl);
SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl);
MVT EltVT = VT.getVectorElementType();
MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
}
/// Break a 512-bit integer operation into two new 256-bit ones and then
/// concatenate the result back.
static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is512BitVector() && VT.isInteger() &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
SDLoc dl(Op);
// Extract the LHS vectors
SDValue LHS = Op.getOperand(0);
SDValue LHS1 = extract256BitVector(LHS, 0, DAG, dl);
SDValue LHS2 = extract256BitVector(LHS, NumElems / 2, DAG, dl);
// Extract the RHS vectors
SDValue RHS = Op.getOperand(1);
SDValue RHS1 = extract256BitVector(RHS, 0, DAG, dl);
SDValue RHS2 = extract256BitVector(RHS, NumElems / 2, DAG, dl);
MVT EltVT = VT.getVectorElementType();
MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
}
static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
Op.getOperand(0), Op.getOperand(1));
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntUnary(Op, DAG);
}
static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntArith(Op, DAG);
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
// Lower v16i8/v32i8/v64i8 mul as sign-extension to v8i16/v16i16/v32i16
// vector pairs, multiply and truncate.
if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) {
if (Subtarget.hasInt256()) {
// For 512-bit vectors, split into 256-bit vectors to allow the
// sign-extension to occur.
if (VT == MVT::v64i8)
return Lower512IntArith(Op, DAG);
// For 256-bit vectors, split into 128-bit vectors to allow the
// sign-extension to occur. We don't need this on AVX512BW as we can
// safely sign-extend to v32i16.
if (VT == MVT::v32i8 && !Subtarget.hasBWI())
return Lower256IntArith(Op, DAG);
MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
return DAG.getNode(
ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::MUL, dl, ExVT,
DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, A),
DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, B)));
}
assert(VT == MVT::v16i8 &&
"Pre-AVX2 support only supports v16i8 multiplication");
MVT ExVT = MVT::v8i16;
// Extract the lo parts and sign extend to i16
SDValue ALo, BLo;
if (Subtarget.hasSSE41()) {
ALo = DAG.getSignExtendVectorInReg(A, dl, ExVT);
BLo = DAG.getSignExtendVectorInReg(B, dl, ExVT);
} else {
const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
-1, 4, -1, 5, -1, 6, -1, 7};
ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
ALo = DAG.getBitcast(ExVT, ALo);
BLo = DAG.getBitcast(ExVT, BLo);
ALo = DAG.getNode(ISD::SRA, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT));
BLo = DAG.getNode(ISD::SRA, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT));
}
// Extract the hi parts and sign extend to i16
SDValue AHi, BHi;
if (Subtarget.hasSSE41()) {
const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
AHi = DAG.getSignExtendVectorInReg(AHi, dl, ExVT);
BHi = DAG.getSignExtendVectorInReg(BHi, dl, ExVT);
} else {
const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
-1, 12, -1, 13, -1, 14, -1, 15};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
AHi = DAG.getBitcast(ExVT, AHi);
BHi = DAG.getBitcast(ExVT, BHi);
AHi = DAG.getNode(ISD::SRA, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT));
BHi = DAG.getNode(ISD::SRA, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT));
}
// Multiply, mask the lower 8bits of the lo/hi results and pack
SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT));
RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT));
return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
}
// Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
if (VT == MVT::v4i32) {
assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() &&
"Should not custom lower when pmuldq is available!");
// Extract the odd parts.
static const int UnpackMask[] = { 1, -1, 3, -1 };
SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
// Multiply the even parts.
SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B);
// Now multiply odd parts.
SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);
Evens = DAG.getBitcast(VT, Evens);
Odds = DAG.getBitcast(VT, Odds);
// Merge the two vectors back together with a shuffle. This expands into 2
// shuffles.
static const int ShufMask[] = { 0, 4, 2, 6 };
return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
}
assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
"Only know how to lower V2I64/V4I64/V8I64 multiply");
// 32-bit vector types used for MULDQ/MULUDQ.
MVT MulVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);
// MULDQ returns the 64-bit result of the signed multiplication of the lower
// 32-bits. We can lower with this if the sign bits stretch that far.
if (Subtarget.hasSSE41() && DAG.ComputeNumSignBits(A) > 32 &&
DAG.ComputeNumSignBits(B) > 32) {
return DAG.getNode(X86ISD::PMULDQ, dl, VT, DAG.getBitcast(MulVT, A),
DAG.getBitcast(MulVT, B));
}
// Ahi = psrlqi(a, 32);
// Bhi = psrlqi(b, 32);
//
// AloBlo = pmuludq(a, b);
// AloBhi = pmuludq(a, Bhi);
// AhiBlo = pmuludq(Ahi, b);
//
// Hi = psllqi(AloBhi + AhiBlo, 32);
// return AloBlo + Hi;
APInt LowerBitsMask = APInt::getLowBitsSet(64, 32);
bool ALoIsZero = DAG.MaskedValueIsZero(A, LowerBitsMask);
bool BLoIsZero = DAG.MaskedValueIsZero(B, LowerBitsMask);
APInt UpperBitsMask = APInt::getHighBitsSet(64, 32);
bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask);
bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask);
// Bit cast to 32-bit vectors for MULUDQ.
SDValue Alo = DAG.getBitcast(MulVT, A);
SDValue Blo = DAG.getBitcast(MulVT, B);
SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
// Only multiply lo/hi halves that aren't known to be zero.
SDValue AloBlo = Zero;
if (!ALoIsZero && !BLoIsZero)
AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Blo);
SDValue AloBhi = Zero;
if (!ALoIsZero && !BHiIsZero) {
SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
Bhi = DAG.getBitcast(MulVT, Bhi);
AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Bhi);
}
SDValue AhiBlo = Zero;
if (!AHiIsZero && !BLoIsZero) {
SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
Ahi = DAG.getBitcast(MulVT, Ahi);
AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, Blo);
}
SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo);
Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG);
return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi);
}
static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntArith(Op, DAG);
// Only i8 vectors should need custom lowering after this.
assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256())) &&
"Unsupported vector type");
// Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
// logical shift down the upper half and pack back to i8.
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
// With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack
// and then ashr/lshr the upper bits down to the lower bits before multiply.
unsigned Opcode = Op.getOpcode();
unsigned ExShift = (ISD::MULHU == Opcode ? ISD::SRL : ISD::SRA);
unsigned ExSSE41 = (ISD::MULHU == Opcode ? X86ISD::VZEXT : X86ISD::VSEXT);
// AVX2 implementations - extend xmm subvectors to ymm.
if (Subtarget.hasInt256()) {
SDValue Lo = DAG.getIntPtrConstant(0, dl);
SDValue Hi = DAG.getIntPtrConstant(VT.getVectorNumElements() / 2, dl);
if (VT == MVT::v32i8) {
SDValue ALo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, A, Lo);
SDValue BLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, B, Lo);
SDValue AHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, A, Hi);
SDValue BHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, B, Hi);
ALo = DAG.getNode(ExSSE41, dl, MVT::v16i16, ALo);
BLo = DAG.getNode(ExSSE41, dl, MVT::v16i16, BLo);
AHi = DAG.getNode(ExSSE41, dl, MVT::v16i16, AHi);
BHi = DAG.getNode(ExSSE41, dl, MVT::v16i16, BHi);
Lo = DAG.getNode(ISD::SRL, dl, MVT::v16i16,
DAG.getNode(ISD::MUL, dl, MVT::v16i16, ALo, BLo),
DAG.getConstant(8, dl, MVT::v16i16));
Hi = DAG.getNode(ISD::SRL, dl, MVT::v16i16,
DAG.getNode(ISD::MUL, dl, MVT::v16i16, AHi, BHi),
DAG.getConstant(8, dl, MVT::v16i16));
// The ymm variant of PACKUS treats the 128-bit lanes separately, so before
// using PACKUS we need to permute the inputs to the correct lo/hi xmm lane.
const int LoMask[] = {0, 1, 2, 3, 4, 5, 6, 7,
16, 17, 18, 19, 20, 21, 22, 23};
const int HiMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
24, 25, 26, 27, 28, 29, 30, 31};
return DAG.getNode(X86ISD::PACKUS, dl, VT,
DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, LoMask),
DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask));
}
SDValue ExA = getExtendInVec(ExSSE41, dl, MVT::v16i16, A, DAG);
SDValue ExB = getExtendInVec(ExSSE41, dl, MVT::v16i16, B, DAG);
SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB);
SDValue MulH = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul,
DAG.getConstant(8, dl, MVT::v16i16));
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, MulH, Lo);
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, MulH, Hi);
return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
}
assert(VT == MVT::v16i8 &&
"Pre-AVX2 support only supports v16i8 multiplication");
MVT ExVT = MVT::v8i16;
// Extract the lo parts and zero/sign extend to i16.
SDValue ALo, BLo;
if (Subtarget.hasSSE41()) {
ALo = getExtendInVec(ExSSE41, dl, ExVT, A, DAG);
BLo = getExtendInVec(ExSSE41, dl, ExVT, B, DAG);
} else {
const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
-1, 4, -1, 5, -1, 6, -1, 7};
ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
ALo = DAG.getBitcast(ExVT, ALo);
BLo = DAG.getBitcast(ExVT, BLo);
ALo = DAG.getNode(ExShift, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT));
BLo = DAG.getNode(ExShift, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT));
}
// Extract the hi parts and zero/sign extend to i16.
SDValue AHi, BHi;
if (Subtarget.hasSSE41()) {
const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
AHi = getExtendInVec(ExSSE41, dl, ExVT, AHi, DAG);
BHi = getExtendInVec(ExSSE41, dl, ExVT, BHi, DAG);
} else {
const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
-1, 12, -1, 13, -1, 14, -1, 15};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
AHi = DAG.getBitcast(ExVT, AHi);
BHi = DAG.getBitcast(ExVT, BHi);
AHi = DAG.getNode(ExShift, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT));
BHi = DAG.getNode(ExShift, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT));
}
// Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and
// pack back to v16i8.
SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
RLo = DAG.getNode(ISD::SRL, dl, ExVT, RLo, DAG.getConstant(8, dl, ExVT));
RHi = DAG.getNode(ISD::SRL, dl, ExVT, RHi, DAG.getConstant(8, dl, ExVT));
return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
}
SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget.isTargetWin64() && "Unexpected target");
EVT VT = Op.getValueType();
assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
"Unexpected return type for lowering");
RTLIB::Libcall LC;
bool isSigned;
switch (Op->getOpcode()) {
default: llvm_unreachable("Unexpected request for libcall!");
case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break;
case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break;
case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break;
case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break;
case ISD::SDIVREM: isSigned = true; LC = RTLIB::SDIVREM_I128; break;
case ISD::UDIVREM: isSigned = false; LC = RTLIB::UDIVREM_I128; break;
}
SDLoc dl(Op);
SDValue InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
EVT ArgVT = Op->getOperand(i).getValueType();
assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
"Unexpected argument type for lowering");
SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
Entry.Node = StackPtr;
InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr,
MachinePointerInfo(), /* Alignment = */ 16);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Ty = PointerType::get(ArgTy,0);
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(InChain)
.setLibCallee(
getLibcallCallingConv(LC),
static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()), Callee,
std::move(Args))
.setInRegister()
.setSExtResult(isSigned)
.setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return DAG.getBitcast(VT, CallInfo.first);
}
static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
MVT VT = Op0.getSimpleValueType();
SDLoc dl(Op);
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256()) {
unsigned Opcode = Op.getOpcode();
unsigned NumElems = VT.getVectorNumElements();
MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), NumElems / 2);
SDValue Lo0 = extract128BitVector(Op0, 0, DAG, dl);
SDValue Lo1 = extract128BitVector(Op1, 0, DAG, dl);
SDValue Hi0 = extract128BitVector(Op0, NumElems / 2, DAG, dl);
SDValue Hi1 = extract128BitVector(Op1, NumElems / 2, DAG, dl);
SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Lo0, Lo1);
SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Hi0, Hi1);
SDValue Ops[] = {
DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(0), Hi.getValue(0)),
DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(1), Hi.getValue(1))
};
return DAG.getMergeValues(Ops, dl);
}
assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
(VT == MVT::v8i32 && Subtarget.hasInt256()));
// PMULxD operations multiply each even value (starting at 0) of LHS with
// the related value of RHS and produce a widen result.
// E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
// => <2 x i64> <ae|cg>
//
// In other word, to have all the results, we need to perform two PMULxD:
// 1. one with the even values.
// 2. one with the odd values.
// To achieve #2, with need to place the odd values at an even position.
//
// Place the odd value at an even position (basically, shift all values 1
// step to the left):
const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1};
// <a|b|c|d> => <b|undef|d|undef>
SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0,
makeArrayRef(&Mask[0], VT.getVectorNumElements()));
// <e|f|g|h> => <f|undef|h|undef>
SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1,
makeArrayRef(&Mask[0], VT.getVectorNumElements()));
// Emit two multiplies, one for the lower 2 ints and one for the higher 2
// ints.
MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
unsigned Opcode =
(!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
// PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
// => <2 x i64> <ae|cg>
SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
// PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
// => <2 x i64> <bf|dh>
SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
// Shuffle it back into the right order.
SDValue Highs, Lows;
if (VT == MVT::v8i32) {
const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
} else {
const int HighMask[] = {1, 5, 3, 7};
Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
const int LowMask[] = {0, 4, 2, 6};
Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
}
// If we have a signed multiply but no PMULDQ fix up the high parts of a
// unsigned multiply.
if (IsSigned && !Subtarget.hasSSE41()) {
SDValue ShAmt = DAG.getConstant(
31, dl,
DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0);
SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
}
// The first result of MUL_LOHI is actually the low value, followed by the
// high value.
SDValue Ops[] = {Lows, Highs};
return DAG.getMergeValues(Ops, dl);
}
// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
if (VT.getScalarSizeInBits() < 16)
return false;
if (VT.is512BitVector() && Subtarget.hasAVX512() &&
(VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI()))
return true;
bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) ||
(VT.is256BitVector() && Subtarget.hasInt256());
bool AShift = LShift && (Subtarget.hasAVX512() ||
(VT != MVT::v2i64 && VT != MVT::v4i64));
return (Opcode == ISD::SRA) ? AShift : LShift;
}
// The shift amount is a variable, but it is the same for all vector lanes.
// These instructions are defined together with shift-immediate.
static
bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
// Return true if the required (according to Opcode) variable-shift form is
// natively supported by the Subtarget
static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16)
return false;
// vXi16 supported only on AVX-512, BWI
if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())
return false;
if (Subtarget.hasAVX512())
return true;
bool LShift = VT.is128BitVector() || VT.is256BitVector();
bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64;
return (Opcode == ISD::SRA) ? AShift : LShift;
}
static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
(Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");
MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
SDValue Ex = DAG.getBitcast(ExVT, R);
// ashr(R, 63) === cmp_slt(R, 0)
if (ShiftAmt == 63 && Subtarget.hasSSE42()) {
assert((VT != MVT::v4i64 || Subtarget.hasInt256()) &&
"Unsupported PCMPGT op");
return DAG.getNode(X86ISD::PCMPGT, dl, VT,
getZeroVector(VT, Subtarget, DAG, dl), R);
}
if (ShiftAmt >= 32) {
// Splat sign to upper i32 dst, and SRA upper i32 src to lower i32.
SDValue Upper =
getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG);
SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
ShiftAmt - 32, DAG);
if (VT == MVT::v2i64)
Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3});
if (VT == MVT::v4i64)
Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
{9, 1, 11, 3, 13, 5, 15, 7});
} else {
// SRA upper i32, SHL whole i64 and select lower i32.
SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
ShiftAmt, DAG);
SDValue Lower =
getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG);
Lower = DAG.getBitcast(ExVT, Lower);
if (VT == MVT::v2i64)
Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3});
if (VT == MVT::v4i64)
Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
{8, 1, 10, 3, 12, 5, 14, 7});
}
return DAG.getBitcast(VT, Ex);
};
// Optimize shl/srl/sra with constant shift amount.
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
uint64_t ShiftAmt = ShiftConst->getZExtValue();
if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
// i64 SRA needs to be performed as partial shifts.
if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
(Subtarget.hasInt256() && VT == MVT::v4i64)) &&
Op.getOpcode() == ISD::SRA)
return ArithmeticShiftRight64(ShiftAmt);
if (VT == MVT::v16i8 ||
(Subtarget.hasInt256() && VT == MVT::v32i8) ||
VT == MVT::v64i8) {
unsigned NumElts = VT.getVectorNumElements();
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
// Simple i8 add case
if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
return DAG.getNode(ISD::ADD, dl, VT, R, R);
// ashr(R, 7) === cmp_slt(R, 0)
if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
if (VT.is512BitVector()) {
assert(VT == MVT::v64i8 && "Unexpected element type!");
SDValue CMP = DAG.getNode(X86ISD::PCMPGTM, dl, MVT::v64i1, Zeros, R);
return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
}
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
}
// XOP can shift v16i8 directly instead of as shift v8i16 + mask.
if (VT == MVT::v16i8 && Subtarget.hasXOP())
return SDValue();
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
R, ShiftAmt, DAG);
SHL = DAG.getBitcast(VT, SHL);
// Zero out the rightmost bits.
return DAG.getNode(ISD::AND, dl, VT, SHL,
DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT,
R, ShiftAmt, DAG);
SRL = DAG.getBitcast(VT, SRL);
// Zero out the leftmost bits.
return DAG.getNode(ISD::AND, dl, VT, SRL,
DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
}
if (Op.getOpcode() == ISD::SRA) {
// ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
}
llvm_unreachable("Unknown shift opcode.");
}
}
}
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
// TODO: Replace constant extraction with getTargetConstantBitsFromNode.
if (!Subtarget.is64Bit() && !Subtarget.hasXOP() &&
(VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) ||
(Subtarget.hasAVX512() && VT == MVT::v8i64))) {
// AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant.
unsigned SubVectorScale = 1;
if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
SubVectorScale =
Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits();
Amt = Amt.getOperand(0);
}
// Peek through any splat that was introduced for i64 shift vectorization.
int SplatIndex = -1;
if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode()))
if (SVN->isSplat()) {
SplatIndex = SVN->getSplatIndex();
Amt = Amt.getOperand(0);
assert(SplatIndex < (int)VT.getVectorNumElements() &&
"Splat shuffle referencing second operand");
}
if (Amt.getOpcode() != ISD::BITCAST ||
Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
Amt = Amt.getOperand(0);
unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
(SubVectorScale * VT.getVectorNumElements());
unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
uint64_t ShiftAmt = 0;
unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio);
for (unsigned i = 0; i != Ratio; ++i) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + BaseOp));
if (!C)
return SDValue();
// 6 == Log2(64)
ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
}
// Check remaining shift amounts (if not a splat).
if (SplatIndex < 0) {
for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
uint64_t ShAmt = 0;
for (unsigned j = 0; j != Ratio; ++j) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
if (!C)
return SDValue();
// 6 == Log2(64)
ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
}
if (ShAmt != ShiftAmt)
return SDValue();
}
}
if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
if (Op.getOpcode() == ISD::SRA)
return ArithmeticShiftRight64(ShiftAmt);
}
return SDValue();
}
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
unsigned X86OpcI = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
(Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
unsigned X86OpcV = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHL :
(Op.getOpcode() == ISD::SRL) ? X86ISD::VSRL : X86ISD::VSRA;
if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) {
SDValue BaseShAmt;
MVT EltVT = VT.getVectorElementType();
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Amt)) {
// Check if this build_vector node is doing a splat.
// If so, then set BaseShAmt equal to the splat value.
BaseShAmt = BV->getSplatValue();
if (BaseShAmt && BaseShAmt.isUndef())
BaseShAmt = SDValue();
} else {
if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
Amt = Amt.getOperand(0);
ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt);
if (SVN && SVN->isSplat()) {
unsigned SplatIdx = (unsigned)SVN->getSplatIndex();
SDValue InVec = Amt.getOperand(0);
if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
assert((SplatIdx < InVec.getSimpleValueType().getVectorNumElements()) &&
"Unexpected shuffle index found!");
BaseShAmt = InVec.getOperand(SplatIdx);
} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
if (C->getZExtValue() == SplatIdx)
BaseShAmt = InVec.getOperand(1);
}
}
if (!BaseShAmt)
// Avoid introducing an extract element from a shuffle.
BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InVec,
DAG.getIntPtrConstant(SplatIdx, dl));
}
}
if (BaseShAmt.getNode()) {
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt);
else if (EltVT.bitsLT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
}
}
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
if (!Subtarget.is64Bit() && VT == MVT::v2i64 &&
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
VT.getVectorNumElements();
std::vector<SDValue> Vals(Ratio);
for (unsigned i = 0; i != Ratio; ++i)
Vals[i] = Amt.getOperand(i);
for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
for (unsigned j = 0; j != Ratio; ++j)
if (Vals[j] != Amt.getOperand(i + j))
return SDValue();
}
if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode()))
return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
}
return SDValue();
}
static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
assert(VT.isVector() && "Custom lowering only for vector shifts!");
assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!");
if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget))
return V;
if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
return V;
if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode()))
return Op;
// XOP has 128-bit variable logical/arithmetic shifts.
// +ve/-ve Amt = shift left/right.
if (Subtarget.hasXOP() &&
(VT == MVT::v2i64 || VT == MVT::v4i32 ||
VT == MVT::v8i16 || VT == MVT::v16i8)) {
if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) {
SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt);
}
if (Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL)
return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);
if (Op.getOpcode() == ISD::SRA)
return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);
}
// 2i64 vector logical shifts can efficiently avoid scalarization - do the
// shifts per-lane and then shuffle the partial results back together.
if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
// Splat the shift amounts so the scalar shifts above will catch it.
SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
SDValue R0 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt0);
SDValue R1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt1);
return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
}
// i64 vector arithmetic shift can be emulated with the transform:
// M = lshr(SIGN_MASK, Amt)
// ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
Op.getOpcode() == ISD::SRA) {
SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
R = DAG.getNode(ISD::XOR, dl, VT, R, M);
R = DAG.getNode(ISD::SUB, dl, VT, R, M);
return R;
}
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
// Do this only if the vector shift count is a constant build_vector.
if (ConstantAmt && Op.getOpcode() == ISD::SHL &&
(VT == MVT::v8i16 || VT == MVT::v4i32 ||
(Subtarget.hasInt256() && VT == MVT::v16i16))) {
SmallVector<SDValue, 8> Elts;
MVT SVT = VT.getVectorElementType();
unsigned SVTBits = SVT.getSizeInBits();
APInt One(SVTBits, 1);
unsigned NumElems = VT.getVectorNumElements();
for (unsigned i=0; i !=NumElems; ++i) {
SDValue Op = Amt->getOperand(i);
if (Op->isUndef()) {
Elts.push_back(Op);
continue;
}
ConstantSDNode *ND = cast<ConstantSDNode>(Op);
APInt C(SVTBits, ND->getAPIntValue().getZExtValue());
uint64_t ShAmt = C.getZExtValue();
if (ShAmt >= SVTBits) {
Elts.push_back(DAG.getUNDEF(SVT));
continue;
}
Elts.push_back(DAG.getConstant(One.shl(ShAmt), dl, SVT));
}
SDValue BV = DAG.getBuildVector(VT, dl, Elts);
return DAG.getNode(ISD::MUL, dl, VT, R, BV);
}
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT));
Op = DAG.getNode(ISD::ADD, dl, VT, Op,
DAG.getConstant(0x3f800000U, dl, VT));
Op = DAG.getBitcast(MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
// If possible, lower this shift as a sequence of two shifts by
// constant plus a MOVSS/MOVSD/PBLEND instead of scalarizing it.
// Example:
// (v4i32 (srl A, (build_vector < X, Y, Y, Y>)))
//
// Could be rewritten as:
// (v4i32 (MOVSS (srl A, <Y,Y,Y,Y>), (srl A, <X,X,X,X>)))
//
// The advantage is that the two shifts from the example would be
// lowered as X86ISD::VSRLI nodes. This would be cheaper than scalarizing
// the vector shift into four scalar shifts plus four pairs of vector
// insert/extract.
if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32)) {
unsigned TargetOpcode = X86ISD::MOVSS;
bool CanBeSimplified;
// The splat value for the first packed shift (the 'X' from the example).
SDValue Amt1 = Amt->getOperand(0);
// The splat value for the second packed shift (the 'Y' from the example).
SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) : Amt->getOperand(2);
// See if it is possible to replace this node with a sequence of
// two shifts followed by a MOVSS/MOVSD/PBLEND.
if (VT == MVT::v4i32) {
// Check if it is legal to use a MOVSS.
CanBeSimplified = Amt2 == Amt->getOperand(2) &&
Amt2 == Amt->getOperand(3);
if (!CanBeSimplified) {
// Otherwise, check if we can still simplify this node using a MOVSD.
CanBeSimplified = Amt1 == Amt->getOperand(1) &&
Amt->getOperand(2) == Amt->getOperand(3);
TargetOpcode = X86ISD::MOVSD;
Amt2 = Amt->getOperand(2);
}
} else {
// Do similar checks for the case where the machine value type
// is MVT::v8i16.
CanBeSimplified = Amt1 == Amt->getOperand(1);
for (unsigned i=3; i != 8 && CanBeSimplified; ++i)
CanBeSimplified = Amt2 == Amt->getOperand(i);
if (!CanBeSimplified) {
TargetOpcode = X86ISD::MOVSD;
CanBeSimplified = true;
Amt2 = Amt->getOperand(4);
for (unsigned i=0; i != 4 && CanBeSimplified; ++i)
CanBeSimplified = Amt1 == Amt->getOperand(i);
for (unsigned j=4; j != 8 && CanBeSimplified; ++j)
CanBeSimplified = Amt2 == Amt->getOperand(j);
}
}
if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
isa<ConstantSDNode>(Amt2)) {
// Replace this node with two shifts followed by a MOVSS/MOVSD/PBLEND.
MVT CastVT = MVT::v4i32;
SDValue Splat1 =
DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
SDValue Splat2 =
DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), dl, VT);
SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
SDValue BitCast1 = DAG.getBitcast(CastVT, Shift1);
SDValue BitCast2 = DAG.getBitcast(CastVT, Shift2);
if (TargetOpcode == X86ISD::MOVSD)
return DAG.getBitcast(VT, DAG.getVectorShuffle(CastVT, dl, BitCast1,
BitCast2, {0, 1, 6, 7}));
return DAG.getBitcast(VT, DAG.getVectorShuffle(CastVT, dl, BitCast1,
BitCast2, {0, 5, 6, 7}));
}
}
// v4i32 Non Uniform Shifts.
// If the shift amount is constant we can shift each lane using the SSE2
// immediate shifts, else we need to zero-extend each lane to the lower i64
// and shift using the SSE2 variable shifts.
// The separate results can then be blended together.
if (VT == MVT::v4i32) {
unsigned Opc = Op.getOpcode();
SDValue Amt0, Amt1, Amt2, Amt3;
if (ConstantAmt) {
Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1});
Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
} else {
// ISD::SHL is handled above but we include it here for completeness.
switch (Opc) {
default:
llvm_unreachable("Unknown target vector shift node");
case ISD::SHL:
Opc = X86ISD::VSHL;
break;
case ISD::SRL:
Opc = X86ISD::VSRL;
break;
case ISD::SRA:
Opc = X86ISD::VSRA;
break;
}
// The SSE2 shifts use the lower i64 as the same shift amount for
// all lanes and the upper i64 is ignored. These shuffle masks
// optimally zero-extend each lanes on SSE2/SSE41/AVX targets.
SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1});
}
SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
SDValue R2 = DAG.getNode(Opc, dl, VT, R, Amt2);
SDValue R3 = DAG.getNode(Opc, dl, VT, R, Amt3);
SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1});
SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7});
return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
}
// It's worth extending once and using the vXi16/vXi32 shifts for smaller
// types, but without AVX512 the extra overheads to get from vXi8 to vXi32
// make the existing SSE solution better.
if ((Subtarget.hasInt256() && VT == MVT::v8i16) ||
(Subtarget.hasAVX512() && VT == MVT::v16i16) ||
(Subtarget.hasAVX512() && VT == MVT::v16i8) ||
(Subtarget.hasBWI() && VT == MVT::v32i8)) {
MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32);
MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
unsigned ExtOpc =
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
R = DAG.getNode(ExtOpc, dl, ExtVT, R);
Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
}
if (VT == MVT::v16i8 ||
(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) ||
(VT == MVT::v64i8 && Subtarget.hasBWI())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
unsigned ShiftOpcode = Op->getOpcode();
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (VT.is512BitVector()) {
// On AVX512BW targets we make use of the fact that VSELECT lowers
// to a masked blend which selects bytes based just on the sign bit
// extracted to a mask.
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
V0 = DAG.getBitcast(VT, V0);
V1 = DAG.getBitcast(VT, V1);
Sel = DAG.getBitcast(VT, Sel);
Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel);
return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1));
} else if (Subtarget.hasSSE41()) {
// On SSE41 targets we make use of the fact that VSELECT lowers
// to PBLENDVB which selects bytes based just on the sign bit.
V0 = DAG.getBitcast(VT, V0);
V1 = DAG.getBitcast(VT, V1);
Sel = DAG.getBitcast(VT, Sel);
return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1));
}
// On pre-SSE41 targets we test for the sign bit by comparing to
// zero - a negative value will set all bits of the lanes to true
// and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl);
SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel);
return DAG.getSelect(dl, SelVT, C, V0, V1);
};
// Turn 'a' into a mask suitable for VSELECT: a = a << 5;
// We can safely do this using i16 shifts as we're only interested in
// the 3 lower bits of each byte.
Amt = DAG.getBitcast(ExtVT, Amt);
Amt = DAG.getNode(ISD::SHL, dl, ExtVT, Amt, DAG.getConstant(5, dl, ExtVT));
Amt = DAG.getBitcast(VT, Amt);
if (Op->getOpcode() == ISD::SHL || Op->getOpcode() == ISD::SRL) {
// r = VSELECT(r, shift(r, 4), a);
SDValue M =
DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// r = VSELECT(r, shift(r, 2), a);
M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// return VSELECT(r, shift(r, 1), a);
M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
return R;
}
if (Op->getOpcode() == ISD::SRA) {
// For SRA we need to unpack each byte to the higher byte of a i16 vector
// so we can correctly sign extend. We don't care what happens to the
// lower byte.
SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), Amt);
SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), Amt);
SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), R);
SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), R);
ALo = DAG.getBitcast(ExtVT, ALo);
AHi = DAG.getBitcast(ExtVT, AHi);
RLo = DAG.getBitcast(ExtVT, RLo);
RHi = DAG.getBitcast(ExtVT, RHi);
// r = VSELECT(r, shift(r, 4), a);
SDValue MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
DAG.getConstant(4, dl, ExtVT));
SDValue MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
DAG.getConstant(4, dl, ExtVT));
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
// a += a
ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo);
AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
// r = VSELECT(r, shift(r, 2), a);
MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
DAG.getConstant(2, dl, ExtVT));
MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
DAG.getConstant(2, dl, ExtVT));
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
// a += a
ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo);
AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
// r = VSELECT(r, shift(r, 1), a);
MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
DAG.getConstant(1, dl, ExtVT));
MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
DAG.getConstant(1, dl, ExtVT));
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
// Logical shift the result back to the lower byte, leaving a zero upper
// byte
// meaning that we can safely pack with PACKUSWB.
RLo =
DAG.getNode(ISD::SRL, dl, ExtVT, RLo, DAG.getConstant(8, dl, ExtVT));
RHi =
DAG.getNode(ISD::SRL, dl, ExtVT, RHi, DAG.getConstant(8, dl, ExtVT));
return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
}
}
if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
MVT ExtVT = MVT::v8i32;
SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z);
SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Z, R);
SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Z, R);
ALo = DAG.getBitcast(ExtVT, ALo);
AHi = DAG.getBitcast(ExtVT, AHi);
RLo = DAG.getBitcast(ExtVT, RLo);
RHi = DAG.getBitcast(ExtVT, RHi);
SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo);
SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi);
Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT));
return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
}
if (VT == MVT::v8i16) {
unsigned ShiftOpcode = Op->getOpcode();
// If we have a constant shift amount, the non-SSE41 path is best as
// avoiding bitcasts make it easier to constant fold and reduce to PBLENDW.
bool UseSSE41 = Subtarget.hasSSE41() &&
!ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
// On SSE41 targets we make use of the fact that VSELECT lowers
// to PBLENDVB which selects bytes based just on the sign bit.
if (UseSSE41) {
MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
V0 = DAG.getBitcast(ExtVT, V0);
V1 = DAG.getBitcast(ExtVT, V1);
Sel = DAG.getBitcast(ExtVT, Sel);
return DAG.getBitcast(VT, DAG.getSelect(dl, ExtVT, Sel, V0, V1));
}
// On pre-SSE41 targets we splat the sign bit - a negative value will
// set all bits of the lanes to true and VSELECT uses that in
// its OR(AND(V0,C),AND(V1,~C)) lowering.
SDValue C =
DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT));
return DAG.getSelect(dl, VT, C, V0, V1);
};
// Turn 'a' into a mask suitable for VSELECT: a = a << 12;
if (UseSSE41) {
// On SSE41 targets we need to replicate the shift mask in both
// bytes for PBLENDVB.
Amt = DAG.getNode(
ISD::OR, dl, VT,
DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(4, dl, VT)),
DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT)));
} else {
Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT));
}
// r = VSELECT(r, shift(r, 8), a);
SDValue M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(8, dl, VT));
R = SignBitSelect(Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// r = VSELECT(r, shift(r, 4), a);
M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
R = SignBitSelect(Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// r = VSELECT(r, shift(r, 2), a);
M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
R = SignBitSelect(Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// return VSELECT(r, shift(r, 1), a);
M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
R = SignBitSelect(Amt, M, R);
return R;
}
// Decompose 256-bit shifts into smaller 128-bit shifts.
if (VT.is256BitVector())
return Lower256IntArith(Op, DAG);
return SDValue();
}
static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
unsigned Opcode = Op.getOpcode();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
if (Subtarget.hasAVX512()) {
// Attempt to rotate by immediate.
APInt UndefElts;
SmallVector<APInt, 16> EltBits;
if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits)) {
if (!UndefElts && llvm::all_of(EltBits, [EltBits](APInt &V) {
return EltBits[0] == V;
})) {
unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
uint64_t RotateAmt = EltBits[0].urem(EltSizeInBits);
return DAG.getNode(Op, DL, VT, R,
DAG.getConstant(RotateAmt, DL, MVT::i8));
}
}
// Else, fall-back on VPROLV/VPRORV.
return Op;
}
assert(VT.isVector() && "Custom lowering only for vector rotates!");
assert(Subtarget.hasXOP() && "XOP support required for vector rotates!");
assert((Opcode == ISD::ROTL) && "Only ROTL supported");
// XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right.
// Split 256-bit integers.
if (VT.is256BitVector())
return Lower256IntArith(Op, DAG);
assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
// Attempt to rotate by immediate.
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
assert(RotateAmt < EltSizeInBits && "Rotation out of range");
return DAG.getNode(X86ISD::VPROTI, DL, VT, R,
DAG.getConstant(RotateAmt, DL, MVT::i8));
}
}
// Use general rotate by variable (per-element).
return DAG.getNode(X86ISD::VPROT, DL, VT, R, Amt);
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
// looks for this combo and may remove the "setcc" instruction if the "setcc"
// has only one use.
SDNode *N = Op.getNode();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
unsigned BaseOp = 0;
X86::CondCode Cond;
SDLoc DL(Op);
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
// A subtract of one will be selected as a INC. Note that INC doesn't
// set CF, so we can't do this for UADDO.
if (isOneConstant(RHS)) {
BaseOp = X86ISD::INC;
Cond = X86::COND_O;
break;
}
BaseOp = X86ISD::ADD;
Cond = X86::COND_O;
break;
case ISD::UADDO:
BaseOp = X86ISD::ADD;
Cond = X86::COND_B;
break;
case ISD::SSUBO:
// A subtract of one will be selected as a DEC. Note that DEC doesn't
// set CF, so we can't do this for USUBO.
if (isOneConstant(RHS)) {
BaseOp = X86ISD::DEC;
Cond = X86::COND_O;
break;
}
BaseOp = X86ISD::SUB;
Cond = X86::COND_O;
break;
case ISD::USUBO:
BaseOp = X86ISD::SUB;
Cond = X86::COND_B;
break;
case ISD::SMULO:
BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL;
Cond = X86::COND_O;
break;
case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
if (N->getValueType(0) == MVT::i8) {
BaseOp = X86ISD::UMUL8;
Cond = X86::COND_O;
break;
}
SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
MVT::i32);
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
SDValue SetCC = getSETCC(X86::COND_O, SDValue(Sum.getNode(), 2), DL, DAG);
if (N->getValueType(1) == MVT::i1)
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
}
// Also sets EFLAGS.
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
SDValue SetCC = getSETCC(Cond, SDValue(Sum.getNode(), 1), DL, DAG);
if (N->getValueType(1) == MVT::i1)
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
/// Returns true if the operand type is exactly twice the native width, and
/// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
/// Used to know whether to use cmpxchg8/16b when expanding atomic operations
/// (otherwise we leave them alone to become __sync_fetch_and_... calls).
bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
unsigned OpWidth = MemType->getPrimitiveSizeInBits();
if (OpWidth == 64)
return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
else if (OpWidth == 128)
return Subtarget.hasCmpxchg16b();
else
return false;
}
bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return needsCmpXchgNb(SI->getValueOperand()->getType());
}
// Note: this turns large loads into lock cmpxchg8b/16b.
// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
auto PTy = cast<PointerType>(LI->getPointerOperandType());
return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg
: AtomicExpansionKind::None;
}
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
Type *MemType = AI->getType();
// If the operand is too big, we must see if cmpxchg8/16b is available
// and default to library calls otherwise.
if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
: AtomicExpansionKind::None;
}
AtomicRMWInst::BinOp Op = AI->getOperation();
switch (Op) {
default:
llvm_unreachable("Unknown atomic operation");
case AtomicRMWInst::Xchg:
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
// It's better to use xadd, xsub or xchg for these in all cases.
return AtomicExpansionKind::None;
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
// If the atomicrmw's result isn't actually used, we can just add a "lock"
// prefix to a normal instruction for these operations.
return !AI->use_empty() ? AtomicExpansionKind::CmpXChg
: AtomicExpansionKind::None;
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
return AtomicExpansionKind::CmpXChg;
}
}
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
Type *MemType = AI->getType();
// Accesses larger than the native width are turned into cmpxchg/libcalls, so
// there is no benefit in turning such RMWs into loads, and it is actually
// harmful as it introduces a mfence.
if (MemType->getPrimitiveSizeInBits() > NativeWidth)
return nullptr;
auto Builder = IRBuilder<>(AI);
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
auto SSID = AI->getSyncScopeID();
// We must restrict the ordering to avoid generating loads with Release or
// ReleaseAcquire orderings.
auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
auto Ptr = AI->getPointerOperand();
// Before the load we need a fence. Here is an example lifted from
// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence
// is required:
// Thread 0:
// x.store(1, relaxed);
// r1 = y.fetch_add(0, release);
// Thread 1:
// y.fetch_add(42, acquire);
// r2 = x.load(relaxed);
// r1 = r2 = 0 is impossible, but becomes possible if the idempotent rmw is
// lowered to just a load without a fence. A mfence flushes the store buffer,
// making the optimization clearly correct.
// FIXME: it is required if isReleaseOrStronger(Order) but it is not clear
// otherwise, we might be able to be more aggressive on relaxed idempotent
// rmw. In practice, they do not look useful, so we don't try to be
// especially clever.
if (SSID == SyncScope::SingleThread)
// FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at
// the IR level, so we must wrap it in an intrinsic.
return nullptr;
if (!Subtarget.hasMFence())
// FIXME: it might make sense to use a locked operation here but on a
// different cache-line to prevent cache-line bouncing. In practice it
// is probably a small win, and x86 processors without mfence are rare
// enough that we do not bother.
return nullptr;
Function *MFence =
llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence);
Builder.CreateCall(MFence, {});
// Finally we can emit the atomic load.
LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr,
AI->getType()->getPrimitiveSizeInBits());
Loaded->setAtomic(Order, SSID);
AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
return Loaded;
}
static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
// The only fence that needs an instruction is a sequentially-consistent
// cross-thread fence.
if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
FenceSSID == SyncScope::System) {
if (Subtarget.hasMFence())
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), // Base
DAG.getTargetConstant(1, dl, MVT::i8), // Scale
DAG.getRegister(0, MVT::i32), // Index
DAG.getTargetConstant(0, dl, MVT::i32), // Disp
DAG.getRegister(0, MVT::i32), // Segment.
Zero,
Chain
};
SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
return SDValue(Res, 0);
}
// MEMBARRIER is a compiler barrier; it codegens to a no-op.
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
}
static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT T = Op.getSimpleValueType();
SDLoc DL(Op);
unsigned Reg = 0;
unsigned size = 0;
switch(T.SimpleTy) {
default: llvm_unreachable("Invalid value type!");
case MVT::i8: Reg = X86::AL; size = 1; break;
case MVT::i16: Reg = X86::AX; size = 2; break;
case MVT::i32: Reg = X86::EAX; size = 4; break;
case MVT::i64:
assert(Subtarget.is64Bit() && "Node not type legal!");
Reg = X86::RAX; size = 8;
break;
}
SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
Op.getOperand(2), SDValue());
SDValue Ops[] = { cpIn.getValue(0),
Op.getOperand(1),
Op.getOperand(3),
DAG.getTargetConstant(size, DL, MVT::i8),
cpIn.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
Ops, T, MMO);
SDValue cpOut =
DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
MVT::i32, cpOut.getValue(2));
SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG);
DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1));
return SDValue();
}
static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
SrcVT == MVT::i64) {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
if (DstVT != MVT::f64)
// This conversion needs to be expanded.
return SDValue();
SDValue Op0 = Op->getOperand(0);
SmallVector<SDValue, 16> Elts;
SDLoc dl(Op);
unsigned NumElts;
MVT SVT;
if (SrcVT.isVector()) {
NumElts = SrcVT.getVectorNumElements();
SVT = SrcVT.getVectorElementType();
// Widen the vector in input in the case of MVT::v2i32.
// Example: from MVT::v2i32 to MVT::v4i32.
for (unsigned i = 0, e = NumElts; i != e; ++i)
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
DAG.getIntPtrConstant(i, dl)));
} else {
assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
"Unexpected source type in LowerBITCAST");
Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
DAG.getIntPtrConstant(0, dl)));
Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
DAG.getIntPtrConstant(1, dl)));
NumElts = 2;
SVT = MVT::i32;
}
// Explicitly mark the extra elements as Undef.
Elts.append(NumElts, DAG.getUNDEF(SVT));
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
SDValue BV = DAG.getBuildVector(NewVT, dl, Elts);
SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
DAG.getIntPtrConstant(0, dl));
}
assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() &&
Subtarget.hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
"Unexpected custom BITCAST");
// i64 <=> MMX conversions are Legal.
if (SrcVT==MVT::i64 && DstVT.isVector())
return Op;
if (DstVT==MVT::i64 && SrcVT.isVector())
return Op;
// MMX <=> MMX conversions are Legal.
if (SrcVT.isVector() && DstVT.isVector())
return Op;
// All other conversions need to be expanded.
return SDValue();
}
/// Compute the horizontal sum of bytes in V for the elements of VT.
///
/// Requires V to be a byte vector and VT to be an integer vector type with
/// wider elements than V's type. The width of the elements of VT determines
/// how many bytes of V are summed horizontally to produce each element of the
/// result.
static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(V);
MVT ByteVecVT = V.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
assert(ByteVecVT.getVectorElementType() == MVT::i8 &&
"Expected value to have byte element type.");
assert(EltVT != MVT::i8 &&
"Horizontal byte sum only makes sense for wider elements!");
unsigned VecSize = VT.getSizeInBits();
assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!");
// PSADBW instruction horizontally add all bytes and leave the result in i64
// chunks, thus directly computes the pop count for v2i64 and v4i64.
if (EltVT == MVT::i64) {
SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);
return DAG.getBitcast(VT, V);
}
if (EltVT == MVT::i32) {
// We unpack the low half and high half into i32s interleaved with zeros so
// that we can use PSADBW to horizontally sum them. The most useful part of
// this is that it lines up the results of two PSADBW instructions to be
// two v2i64 vectors which concatenated are the 4 population counts. We can
// then use PACKUSWB to shrink and concatenate them into a v4i32 again.
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL);
SDValue V32 = DAG.getBitcast(VT, V);
SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V32, Zeros);
SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V32, Zeros);
// Do the horizontal sums into two v2i64s.
Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
DAG.getBitcast(ByteVecVT, Low), Zeros);
High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
DAG.getBitcast(ByteVecVT, High), Zeros);
// Merge them together.
MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16);
V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT,
DAG.getBitcast(ShortVecVT, Low),
DAG.getBitcast(ShortVecVT, High));
return DAG.getBitcast(VT, V);
}
// The only element type left is i16.
assert(EltVT == MVT::i16 && "Unknown how to handle type");
// To obtain pop count for each i16 element starting from the pop count for
// i8 elements, shift the i16s left by 8, sum as i8s, and then shift as i16s
// right by 8. It is important to shift as i16s as i8 vector shift isn't
// directly supported.
SDValue ShifterV = DAG.getConstant(8, DL, VT);
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), ShifterV);
V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl),
DAG.getBitcast(ByteVecVT, V));
return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), ShifterV);
}
static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
unsigned VecSize = VT.getSizeInBits();
// Implement a lookup table in register by using an algorithm based on:
// http://wm.ite.pl/articles/sse-popcount.html
//
// The general idea is that every lower byte nibble in the input vector is an
// index into a in-register pre-computed pop count table. We then split up the
// input vector in two new ones: (1) a vector with only the shifted-right
// higher nibbles for each byte and (2) a vector with the lower nibbles (and
// masked out higher ones) for each byte. PSHUFB is used separately with both
// to index the in-register table. Next, both are added and the result is a
// i8 vector where each element contains the pop count for input byte.
//
// To obtain the pop count for elements != i8, we follow up with the same
// approach and use additional tricks as described below.
//
const int LUT[16] = {/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2,
/* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3,
/* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3,
/* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4};
int NumByteElts = VecSize / 8;
MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts);
SDValue In = DAG.getBitcast(ByteVecVT, Op);
SmallVector<SDValue, 64> LUTVec;
for (int i = 0; i < NumByteElts; ++i)
LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
SDValue InRegLUT = DAG.getBuildVector(ByteVecVT, DL, LUTVec);
SDValue M0F = DAG.getConstant(0x0F, DL, ByteVecVT);
// High nibbles
SDValue FourV = DAG.getConstant(4, DL, ByteVecVT);
SDValue HighNibbles = DAG.getNode(ISD::SRL, DL, ByteVecVT, In, FourV);
// Low nibbles
SDValue LowNibbles = DAG.getNode(ISD::AND, DL, ByteVecVT, In, M0F);
// The input vector is used as the shuffle mask that index elements into the
// LUT. After counting low and high nibbles, add the vector to obtain the
// final pop count per i8 element.
SDValue HighPopCnt =
DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles);
SDValue LowPopCnt =
DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles);
SDValue PopCnt = DAG.getNode(ISD::ADD, DL, ByteVecVT, HighPopCnt, LowPopCnt);
if (EltVT == MVT::i8)
return PopCnt;
return LowerHorizontalByteSum(PopCnt, VT, Subtarget, DAG);
}
static SDValue LowerVectorCTPOPBitmath(SDValue Op, const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is128BitVector() &&
"Only 128-bit vector bitmath lowering supported.");
int VecSize = VT.getSizeInBits();
MVT EltVT = VT.getVectorElementType();
int Len = EltVT.getSizeInBits();
// This is the vectorized version of the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
// with a minor tweak to use a series of adds + shifts instead of vector
// multiplications. Implemented for all integer vector types. We only use
// this when we don't have SSSE3 which allows a LUT-based lowering that is
// much faster, even faster than using native popcnt instructions.
auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) {
MVT VT = V.getSimpleValueType();
SDValue ShifterV = DAG.getConstant(Shifter, DL, VT);
return DAG.getNode(OpCode, DL, VT, V, ShifterV);
};
auto GetMask = [&](SDValue V, APInt Mask) {
MVT VT = V.getSimpleValueType();
SDValue MaskV = DAG.getConstant(Mask, DL, VT);
return DAG.getNode(ISD::AND, DL, VT, V, MaskV);
};
// We don't want to incur the implicit masks required to SRL vNi8 vectors on
// x86, so set the SRL type to have elements at least i16 wide. This is
// correct because all of our SRLs are followed immediately by a mask anyways
// that handles any bits that sneak into the high bits of the byte elements.
MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16);
SDValue V = Op;
// v = v - ((v >> 1) & 0x55555555...)
SDValue Srl =
DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 1));
SDValue And = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x55)));
V = DAG.getNode(ISD::SUB, DL, VT, V, And);
// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
SDValue AndLHS = GetMask(V, APInt::getSplat(Len, APInt(8, 0x33)));
Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 2));
SDValue AndRHS = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x33)));
V = DAG.getNode(ISD::ADD, DL, VT, AndLHS, AndRHS);
// v = (v + (v >> 4)) & 0x0F0F0F0F...
Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 4));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, V, Srl);
V = GetMask(Add, APInt::getSplat(Len, APInt(8, 0x0F)));
// At this point, V contains the byte-wise population count, and we are
// merely doing a horizontal sum if necessary to get the wider element
// counts.
if (EltVT == MVT::i8)
return V;
return LowerHorizontalByteSum(
DAG.getBitcast(MVT::getVectorVT(MVT::i8, VecSize / 8), V), VT, Subtarget,
DAG);
}
// Please ensure that any codegen change from LowerVectorCTPOP is reflected in
// updated cost models in X86TTIImpl::getIntrinsicInstrCost.
static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) &&
"Unknown CTPOP type to handle");
SDLoc DL(Op.getNode());
SDValue Op0 = Op.getOperand(0);
// TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions.
if (Subtarget.hasVPOPCNTDQ()) {
if (VT == MVT::v8i16) {
Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v8i64, Op0);
Op = DAG.getNode(ISD::CTPOP, DL, MVT::v8i64, Op);
return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op);
}
if (VT == MVT::v16i8 || VT == MVT::v16i16) {
Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v16i32, Op0);
Op = DAG.getNode(ISD::CTPOP, DL, MVT::v16i32, Op);
return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op);
}
}
if (!Subtarget.hasSSSE3()) {
// We can't use the fast LUT approach, so fall back on vectorized bitmath.
assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!");
return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG);
}
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntUnary(Op, DAG);
// Decompose 512-bit ops into smaller 256-bit ops.
if (VT.is512BitVector() && !Subtarget.hasBWI())
return Lower512IntUnary(Op, DAG);
return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
}
static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Op.getSimpleValueType().isVector() &&
"We only do custom lowering for vector population count.");
return LowerVectorCTPOP(Op, Subtarget, DAG);
}
static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
SDLoc DL(Op);
// For scalars, its still beneficial to transfer to/from the SIMD unit to
// perform the BITREVERSE.
if (!VT.isVector()) {
MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
Res = DAG.getNode(ISD::BITREVERSE, DL, VecVT, Res);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Res,
DAG.getIntPtrConstant(0, DL));
}
int NumElts = VT.getVectorNumElements();
int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector())
return Lower256IntUnary(Op, DAG);
assert(VT.is128BitVector() &&
"Only 128-bit vector bitreverse lowering supported.");
// VPPERM reverses the bits of a byte with the permute Op (2 << 5), and we
// perform the BSWAP in the shuffle.
// Its best to shuffle using the second operand as this will implicitly allow
// memory folding for multiple vectors.
SmallVector<SDValue, 16> MaskElts;
for (int i = 0; i != NumElts; ++i) {
for (int j = ScalarSizeInBytes - 1; j >= 0; --j) {
int SourceByte = 16 + (i * ScalarSizeInBytes) + j;
int PermuteByte = SourceByte | (2 << 5);
MaskElts.push_back(DAG.getConstant(PermuteByte, DL, MVT::i8));
}
}
SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, MaskElts);
SDValue Res = DAG.getBitcast(MVT::v16i8, In);
Res = DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, DAG.getUNDEF(MVT::v16i8),
Res, Mask);
return DAG.getBitcast(VT, Res);
}
static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
if (Subtarget.hasXOP())
return LowerBITREVERSE_XOP(Op, DAG);
assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
SDLoc DL(Op);
unsigned NumElts = VT.getVectorNumElements();
assert(VT.getScalarType() == MVT::i8 &&
"Only byte vector BITREVERSE supported");
// Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntUnary(Op, DAG);
// Perform BITREVERSE using PSHUFB lookups. Each byte is split into
// two nibbles and a PSHUFB lookup to find the bitreverse of each
// 0-15 value (moved to the other nibble).
SDValue NibbleMask = DAG.getConstant(0xF, DL, VT);
SDValue Lo = DAG.getNode(ISD::AND, DL, VT, In, NibbleMask);
SDValue Hi = DAG.getNode(ISD::SRL, DL, VT, In, DAG.getConstant(4, DL, VT));
const int LoLUT[16] = {
/* 0 */ 0x00, /* 1 */ 0x80, /* 2 */ 0x40, /* 3 */ 0xC0,
/* 4 */ 0x20, /* 5 */ 0xA0, /* 6 */ 0x60, /* 7 */ 0xE0,
/* 8 */ 0x10, /* 9 */ 0x90, /* a */ 0x50, /* b */ 0xD0,
/* c */ 0x30, /* d */ 0xB0, /* e */ 0x70, /* f */ 0xF0};
const int HiLUT[16] = {
/* 0 */ 0x00, /* 1 */ 0x08, /* 2 */ 0x04, /* 3 */ 0x0C,
/* 4 */ 0x02, /* 5 */ 0x0A, /* 6 */ 0x06, /* 7 */ 0x0E,
/* 8 */ 0x01, /* 9 */ 0x09, /* a */ 0x05, /* b */ 0x0D,
/* c */ 0x03, /* d */ 0x0B, /* e */ 0x07, /* f */ 0x0F};
SmallVector<SDValue, 16> LoMaskElts, HiMaskElts;
for (unsigned i = 0; i < NumElts; ++i) {
LoMaskElts.push_back(DAG.getConstant(LoLUT[i % 16], DL, MVT::i8));
HiMaskElts.push_back(DAG.getConstant(HiLUT[i % 16], DL, MVT::i8));
}
SDValue LoMask = DAG.getBuildVector(VT, DL, LoMaskElts);
SDValue HiMask = DAG.getBuildVector(VT, DL, HiMaskElts);
Lo = DAG.getNode(X86ISD::PSHUFB, DL, VT, LoMask, Lo);
Hi = DAG.getNode(X86ISD::PSHUFB, DL, VT, HiMask, Hi);
return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
}
static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG) {
unsigned NewOpc = 0;
switch (N->getOpcode()) {
case ISD::ATOMIC_LOAD_ADD:
NewOpc = X86ISD::LADD;
break;
case ISD::ATOMIC_LOAD_SUB:
NewOpc = X86ISD::LSUB;
break;
case ISD::ATOMIC_LOAD_OR:
NewOpc = X86ISD::LOR;
break;
case ISD::ATOMIC_LOAD_XOR:
NewOpc = X86ISD::LXOR;
break;
case ISD::ATOMIC_LOAD_AND:
NewOpc = X86ISD::LAND;
break;
default:
llvm_unreachable("Unknown ATOMIC_LOAD_ opcode");
}
MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
return DAG.getMemIntrinsicNode(
NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
{N->getOperand(0), N->getOperand(1), N->getOperand(2)},
/*MemVT=*/N->getSimpleValueType(0), MMO);
}
/// Lower atomic_load_ops into LOCK-prefixed operations.
static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Chain = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
unsigned Opc = N->getOpcode();
MVT VT = N->getSimpleValueType(0);
SDLoc DL(N);
// We can lower atomic_load_add into LXADD. However, any other atomicrmw op
// can only be lowered when the result is unused. They should have already
// been transformed into a cmpxchg loop in AtomicExpand.
if (N->hasAnyUseOfValue(0)) {
// Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to
// select LXADD if LOCK_SUB can't be selected.
if (Opc == ISD::ATOMIC_LOAD_SUB) {
AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode());
RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
RHS, AN->getMemOperand());
}
assert(Opc == ISD::ATOMIC_LOAD_ADD &&
"Used AtomicRMW ops other than Add should have been expanded!");
return N;
}
SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG);
// RAUW the chain, but don't worry about the result, as it's unused.
assert(!N->hasAnyUseOfValue(0));
DAG.ReplaceAllUsesOfValueWith(N.getValue(1), LockOp.getValue(1));
return SDValue();
}
static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
SDLoc dl(Node);
EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
// Convert seq_cst store -> xchg
// Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
// FIXME: On 32-bit, store -> fist or movq would be more efficient
// (The only way to get a 16-byte store is cmpxchg16b)
// FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
if (cast<AtomicSDNode>(Node)->getOrdering() ==
AtomicOrdering::SequentiallyConsistent ||
!DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
cast<AtomicSDNode>(Node)->getMemoryVT(),
Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2),
cast<AtomicSDNode>(Node)->getMemOperand());
return Swap.getValue(1);
}
// Other atomic stores have a simple pattern.
return Op;
}
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
MVT VT = N->getSimpleValueType(0);
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
SDLoc DL(N);
// Set the carry flag.
SDValue Carry = Op.getOperand(2);
EVT CarryVT = Carry.getValueType();
APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
Carry, DAG.getConstant(NegOne, DL, CarryVT));
unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB;
SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0),
Op.getOperand(1), Carry.getValue(1));
SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG);
if (N->getValueType(1) == MVT::i1)
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit());
// For MacOSX, we want to call an alternative entry point: __sincos_stret,
// which returns the values as { float, float } (in XMM0) or
// { double, double } (which is returned in XMM0, XMM1).
SDLoc dl(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
bool isF64 = ArgVT == MVT::f64;
// Only optimize x86_64 for now. i386 is a bit messy. For f32,
// the small struct {f32, f32} is returned in (eax, edx). For f64,
// the results are returned via SRet in memory.
const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Callee =
DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy)
: (Type *)VectorType::get(ArgTy, 4);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
.setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
if (isF64)
// Returned in xmm0 and xmm1.
return CallResult.first;
// Returned in bits 0:31 and 32:64 xmm0.
SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
CallResult.first, DAG.getIntPtrConstant(0, dl));
SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
CallResult.first, DAG.getIntPtrConstant(1, dl));
SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
}
/// Widen a vector input to a vector of NVT. The
/// input vector must have the same element type as NVT.
static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
bool FillWithZeroes = false) {
// Check if InOp already has the right width.
MVT InVT = InOp.getSimpleValueType();
if (InVT == NVT)
return InOp;
if (InOp.isUndef())
return DAG.getUNDEF(NVT);
assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
"input and widen element type must match");
unsigned InNumElts = InVT.getVectorNumElements();
unsigned WidenNumElts = NVT.getVectorNumElements();
assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 &&
"Unexpected request for vector widening");
SDLoc dl(InOp);
if (InOp.getOpcode() == ISD::CONCAT_VECTORS &&
InOp.getNumOperands() == 2) {
SDValue N1 = InOp.getOperand(1);
if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) ||
N1.isUndef()) {
InOp = InOp.getOperand(0);
InVT = InOp.getSimpleValueType();
InNumElts = InVT.getVectorNumElements();
}
}
if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) {
SmallVector<SDValue, 16> Ops;
for (unsigned i = 0; i < InNumElts; ++i)
Ops.push_back(InOp.getOperand(i));
EVT EltVT = InOp.getOperand(0).getValueType();
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
DAG.getUNDEF(EltVT);
for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i)
Ops.push_back(FillVal);
return DAG.getBuildVector(NVT, dl, Ops);
}
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) :
DAG.getUNDEF(NVT);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal,
InOp, DAG.getIntPtrConstant(0, dl));
}
static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.hasAVX512() &&
"MGATHER/MSCATTER are supported on AVX-512 arch only");
// X86 scatter kills mask register, so its type should be added to
// the list of return values.
// If the "scatter" has 2 return values, it is already handled.
if (Op.getNode()->getNumValues() == 2)
return Op;
MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode());
SDValue Src = N->getValue();
MVT VT = Src.getSimpleValueType();
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op");
SDLoc dl(Op);
SDValue NewScatter;
SDValue Index = N->getIndex();
SDValue Mask = N->getMask();
SDValue Chain = N->getChain();
SDValue BasePtr = N->getBasePtr();
MVT MemVT = N->getMemoryVT().getSimpleVT();
MVT IndexVT = Index.getSimpleValueType();
MVT MaskVT = Mask.getSimpleValueType();
if (MemVT.getScalarSizeInBits() < VT.getScalarSizeInBits()) {
// The v2i32 value was promoted to v2i64.
// Now we "redo" the type legalizer's work and widen the original
// v2i32 value to v4i32. The original v2i32 is retrieved from v2i64
// with a shuffle.
assert((MemVT == MVT::v2i32 && VT == MVT::v2i64) &&
"Unexpected memory type");
int ShuffleMask[] = {0, 2, -1, -1};
Src = DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src),
DAG.getUNDEF(MVT::v4i32), ShuffleMask);
// Now we have 4 elements instead of 2.
// Expand the index.
MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), 4);
Index = ExtendToType(Index, NewIndexVT, DAG);
// Expand the mask with zeroes
// Mask may be <2 x i64> or <2 x i1> at this moment
assert((MaskVT == MVT::v2i1 || MaskVT == MVT::v2i64) &&
"Unexpected mask type");
MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), 4);
Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
VT = MVT::v4i32;
}
unsigned NumElts = VT.getVectorNumElements();
if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
!Index.getSimpleValueType().is512BitVector()) {
// AVX512F supports only 512-bit vectors. Or data or index should
// be 512 bit wide. If now the both index and data are 256-bit, but
// the vector contains 8 elements, we just sign-extend the index
if (IndexVT == MVT::v8i32)
// Just extend index
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
else {
// The minimal number of elts in scatter is 8
NumElts = 8;
// Index
MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
// Use original index here, do not modify the index twice
Index = ExtendToType(N->getIndex(), NewIndexVT, DAG);
if (IndexVT.getScalarType() == MVT::i32)
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
// Mask
// At this point we have promoted mask operand
assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
// Use the original mask here, do not modify the mask twice
Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);
// The value that should be stored
MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
Src = ExtendToType(Src, NewVT, DAG);
}
}
// If the mask is "wide" at this point - truncate it to i1 vector
MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts);
Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask);
// The mask is killed by scatter, add it to the values
SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other);
SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index};
NewScatter = DAG.getMaskedScatter(VTs, N->getMemoryVT(), dl, Ops,
N->getMemOperand());
DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
return SDValue(NewScatter.getNode(), 1);
}
static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
MVT VT = Op.getSimpleValueType();
MVT ScalarVT = VT.getScalarType();
SDValue Mask = N->getMask();
SDLoc dl(Op);
assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) &&
"Expanding masked load is supported on AVX-512 target only!");
assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) &&
"Expanding masked load is supported for 32 and 64-bit types only!");
// 4x32, 4x64 and 2x64 vectors of non-expanding loads are legal regardless of
// VLX. These types for exp-loads are handled here.
if (!N->isExpandingLoad() && VT.getVectorNumElements() <= 4)
return Op;
assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
"Cannot lower masked load op.");
assert((ScalarVT.getSizeInBits() >= 32 ||
(Subtarget.hasBWI() &&
(ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
"Unsupported masked load op.");
// This operation is legal for targets with VLX, but without
// VLX the vector should be widened to 512 bit
unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
SDValue Src0 = N->getSrc0();
Src0 = ExtendToType(Src0, WideDataVT, DAG);
// Mask element has to be i1.
MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) &&
"We handle 4x32, 4x64 and 2x64 vectors only in this case");
MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
if (MaskEltTy != MVT::i1)
Mask = DAG.getNode(ISD::TRUNCATE, dl,
MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask);
SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
N->getBasePtr(), Mask, Src0,
N->getMemoryVT(), N->getMemOperand(),
N->getExtensionType(),
N->isExpandingLoad());
SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
NewLoad.getValue(0),
DAG.getIntPtrConstant(0, dl));
SDValue RetOps[] = {Exract, NewLoad.getValue(1)};
return DAG.getMergeValues(RetOps, dl);
}
static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode());
SDValue DataToStore = N->getValue();
MVT VT = DataToStore.getSimpleValueType();
MVT ScalarVT = VT.getScalarType();
SDValue Mask = N->getMask();
SDLoc dl(Op);
assert((!N->isCompressingStore() || Subtarget.hasAVX512()) &&
"Expanding masked load is supported on AVX-512 target only!");
assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) &&
"Expanding masked load is supported for 32 and 64-bit types only!");
// 4x32 and 2x64 vectors of non-compressing stores are legal regardless to VLX.
if (!N->isCompressingStore() && VT.getVectorNumElements() <= 4)
return Op;
assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
"Cannot lower masked store op.");
assert((ScalarVT.getSizeInBits() >= 32 ||
(Subtarget.hasBWI() &&
(ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
"Unsupported masked store op.");
// This operation is legal for targets with VLX, but without
// VLX the vector should be widened to 512 bit
unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
// Mask element has to be i1.
MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) &&
"We handle 4x32, 4x64 and 2x64 vectors only in this case");
MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);
DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
if (MaskEltTy != MVT::i1)
Mask = DAG.getNode(ISD::TRUNCATE, dl,
MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask);
return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
Mask, N->getMemoryVT(), N->getMemOperand(),
N->isTruncatingStore(), N->isCompressingStore());
}
static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.hasAVX512() &&
"MGATHER/MSCATTER are supported on AVX-512 arch only");
MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
SDValue Index = N->getIndex();
SDValue Mask = N->getMask();
SDValue Src0 = N->getValue();
MVT IndexVT = Index.getSimpleValueType();
MVT MaskVT = Mask.getSimpleValueType();
unsigned NumElts = VT.getVectorNumElements();
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
!Index.getSimpleValueType().is512BitVector()) {
// AVX512F supports only 512-bit vectors. Or data or index should
// be 512 bit wide. If now the both index and data are 256-bit, but
// the vector contains 8 elements, we just sign-extend the index
if (NumElts == 8) {
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), Index };
DAG.UpdateNodeOperands(N, Ops);
return Op;
}
// Minimal number of elements in Gather
NumElts = 8;
// Index
MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
Index = ExtendToType(Index, NewIndexVT, DAG);
if (IndexVT.getScalarType() == MVT::i32)
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
// Mask
MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts);
// At this point we have promoted mask operand
assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask);
// The pass-through value
MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
Src0 = ExtendToType(Src0, NewVT, DAG);
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
N->getMemOperand());
SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
NewGather.getValue(0),
DAG.getIntPtrConstant(0, dl));
SDValue RetOps[] = {Exract, NewGather.getValue(1)};
return DAG.getMergeValues(RetOps, dl);
}
if (N->getMemoryVT() == MVT::v2i32 && Subtarget.hasVLX()) {
// There is a special case when the return type is v2i32 is illegal and
// the type legaizer extended it to v2i64. Without this conversion we end up
// with VPGATHERQQ (reading q-words from the memory) instead of VPGATHERQD.
// In order to avoid this situation, we'll build an X86 specific Gather node
// with index v2i64 and value type v4i32.
assert(VT == MVT::v2i64 && Src0.getValueType() == MVT::v2i64 &&
"Unexpected type in masked gather");
Src0 = DAG.getVectorShuffle(MVT::v4i32, dl,
DAG.getBitcast(MVT::v4i32, Src0),
DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 });
// The mask should match the destination type. Extending mask with zeroes
// is not necessary since instruction itself reads only two values from
// memory.
Mask = ExtendToType(Mask, MVT::v4i1, DAG, false);
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
DAG.getVTList(MVT::v4i32, MVT::Other), Ops, dl, N->getMemoryVT(),
N->getMemOperand());
SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64,
NewGather.getValue(0), DAG);
SDValue RetOps[] = { Sext, NewGather.getValue(1) };
return DAG.getMergeValues(RetOps, dl);
}
if (N->getMemoryVT() == MVT::v2f32 && Subtarget.hasVLX()) {
// This transformation is for optimization only.
// The type legalizer extended mask and index to 4 elements vector
// in order to match requirements of the common gather node - same
// vector width of index and value. X86 Gather node allows mismatch
// of vector width in order to select more optimal instruction at the
// end.
assert(VT == MVT::v4f32 && Src0.getValueType() == MVT::v4f32 &&
"Unexpected type in masked gather");
if (Mask.getOpcode() == ISD::CONCAT_VECTORS &&
ISD::isBuildVectorAllZeros(Mask.getOperand(1).getNode()) &&
Index.getOpcode() == ISD::CONCAT_VECTORS &&
Index.getOperand(1).isUndef()) {
Mask = ExtendToType(Mask.getOperand(0), MVT::v4i1, DAG, false);
Index = Index.getOperand(0);
} else
return Op;
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
DAG.getVTList(MVT::v4f32, MVT::Other), Ops, dl, N->getMemoryVT(),
N->getMemOperand());
SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(1) };
return DAG.getMergeValues(RetOps, dl);
}
return Op;
}
SDValue X86TargetLowering::LowerGC_TRANSITION_START(SDValue Op,
SelectionDAG &DAG) const {
// TODO: Eventually, the lowering of these nodes should be informed by or
// deferred to the GC strategy for the function in which they appear. For
// now, however, they must be lowered to something. Since they are logically
// no-ops in the case of a null GC strategy (or a GC strategy which does not
// require special handling for these nodes), lower them as literal NOOPs for
// the time being.
SmallVector<SDValue, 2> Ops;
Ops.push_back(Op.getOperand(0));
if (Op->getGluedNode())
Ops.push_back(Op->getOperand(Op->getNumOperands() - 1));
SDLoc OpDL(Op);
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);
return NOOP;
}
SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op,
SelectionDAG &DAG) const {
// TODO: Eventually, the lowering of these nodes should be informed by or
// deferred to the GC strategy for the function in which they appear. For
// now, however, they must be lowered to something. Since they are logically
// no-ops in the case of a null GC strategy (or a GC strategy which does not
// require special handling for these nodes), lower them as literal NOOPs for
// the time being.
SmallVector<SDValue, 2> Ops;
Ops.push_back(Op.getOperand(0));
if (Op->getGluedNode())
Ops.push_back(Op->getOperand(Op->getNumOperands() - 1));
SDLoc OpDL(Op);
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);
return NOOP;
}
/// Provide custom lowering hooks for some operations.
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
return LowerCMP_SWAP(Op, Subtarget, DAG);
case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG);
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG);
case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
case ISD::VECTOR_SHUFFLE: return lowerVectorShuffle(Op, Subtarget, DAG);
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG);
case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG);
case ISD::ZERO_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG);
case ISD::FABS:
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, Subtarget, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::FRAME_TO_ARGS_OFFSET:
return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::EH_SJLJ_SETUP_DISPATCH:
return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG);
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::MULHS:
case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG);
case ISD::UMUL_LOHI:
case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
case ISD::ROTL:
case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:
case ISD::SUB: return LowerADD_SUB(Op, DAG);
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN: return LowerMINMAX(Op, DAG);
case ISD::ABS: return LowerABS(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG);
case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG);
case ISD::GC_TRANSITION_START:
return LowerGC_TRANSITION_START(Op, DAG);
case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION_END(Op, DAG);
case ISD::STORE: return LowerTruncatingStore(Op, Subtarget, DAG);
}
}
/// Places new result values for the node in Results (their number
/// and types must exactly match those of the original return values of
/// the node), or leaves Results empty, which indicates that the node is not
/// to be custom lowered after all.
void X86TargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDValue Res = LowerOperation(SDValue(N, 0), DAG);
if (!Res.getNode())
return;
assert((N->getNumValues() <= Res->getNumValues()) &&
"Lowering returned the wrong number of results!");
// Places new result values base on N result number.
// In some cases (LowerSINT_TO_FP for example) Res has more result values
// than original node, chain should be dropped(last value).
for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
Results.push_back(Res.getValue(I));
}
/// Replace a node with an illegal result type with a new node built out of
/// custom code.
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
SDLoc dl(N);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
case X86ISD::AVG: {
// Legalize types for X86ISD::AVG by expanding vectors.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
auto InVT = N->getValueType(0);
auto InVTSize = InVT.getSizeInBits();
const unsigned RegSize =
(InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128;
assert((Subtarget.hasBWI() || RegSize < 512) &&
"512-bit vector requires AVX512BW");
assert((Subtarget.hasAVX2() || RegSize < 256) &&
"256-bit vector requires AVX2");
auto ElemVT = InVT.getVectorElementType();
auto RegVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
RegSize / ElemVT.getSizeInBits());
assert(RegSize % InVT.getSizeInBits() == 0);
unsigned NumConcat = RegSize / InVT.getSizeInBits();
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = N->getOperand(0);
SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
Ops[0] = N->getOperand(1);
SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1);
Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res,
DAG.getIntPtrConstant(0, dl)));
return;
}
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
case X86ISD::FMINC:
case X86ISD::FMIN:
case X86ISD::FMAXC:
case X86ISD::FMAX: {
EVT VT = N->getValueType(0);
assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX.");
SDValue UNDEF = DAG.getUNDEF(VT);
SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
N->getOperand(0), UNDEF);
SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
N->getOperand(1), UNDEF);
Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS));
return;
}
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
case ISD::SDIVREM:
case ISD::UDIVREM: {
SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
Results.push_back(V);
return;
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
if (N->getValueType(0) == MVT::v2i32) {
assert((IsSigned || Subtarget.hasAVX512()) &&
"Can only handle signed conversion without AVX512");
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
SDValue Src = N->getOperand(0);
if (Src.getValueType() == MVT::v2f64) {
SDValue Idx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI
: X86ISD::CVTTP2UI,
dl, MVT::v4i32, Src);
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
Results.push_back(Res);
return;
}
if (Src.getValueType() == MVT::v2f32) {
SDValue Idx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
: ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
Results.push_back(Res);
return;
}
// The FP_TO_INTHelper below only handles f32/f64/f80 scalar inputs,
// so early out here.
return;
}
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
if (FIST.getNode()) {
EVT VT = N->getValueType(0);
// Return a load from the stack slot.
if (StackSlot.getNode())
Results.push_back(
DAG.getLoad(VT, dl, FIST, StackSlot, MachinePointerInfo()));
else
Results.push_back(FIST);
}
return;
}
case ISD::SINT_TO_FP: {
assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
SDValue Src = N->getOperand(0);
if (N->getValueType(0) != MVT::v2f32 || Src.getValueType() != MVT::v2i64)
return;
Results.push_back(DAG.getNode(X86ISD::CVTSI2P, dl, MVT::v4f32, Src));
return;
}
case ISD::UINT_TO_FP: {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT VT = N->getValueType(0);
if (VT != MVT::v2f32)
return;
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
Results.push_back(DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v4f32, Src));
return;
}
if (SrcVT != MVT::v2i32)
return;
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
SDValue VBias =
DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::v2f64);
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
DAG.getBitcast(MVT::v2i64, VBias));
Or = DAG.getBitcast(MVT::v2f64, Or);
// TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
return;
}
case ISD::FP_ROUND: {
if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
return;
SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
return;
}
case ISD::FP_EXTEND: {
// Right now, only MVT::v2f32 has OperationAction for FP_EXTEND.
// No other ValueType for FP_EXTEND should reach this point.
assert(N->getValueType(0) == MVT::v2f32 &&
"Do not know how to legalize this Node");
return;
}
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
default : llvm_unreachable("Do not know how to custom type "
"legalize this intrinsic operation!");
case Intrinsic::x86_rdtsc:
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
case Intrinsic::x86_rdtscp:
return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
Results);
case Intrinsic::x86_rdpmc:
return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
case Intrinsic::x86_xgetbv:
return getExtendedControlRegister(N, dl, DAG, Subtarget, Results);
}
}
case ISD::INTRINSIC_WO_CHAIN: {
if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG))
Results.push_back(V);
return;
}
case ISD::READCYCLECOUNTER: {
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
}
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
EVT T = N->getValueType(0);
assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
bool Regs64bit = T == MVT::i128;
MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
SDValue cpInL, cpInH;
cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
DAG.getConstant(0, dl, HalfT));
cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
DAG.getConstant(1, dl, HalfT));
cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
Regs64bit ? X86::RAX : X86::EAX,
cpInL, SDValue());
cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
Regs64bit ? X86::RDX : X86::EDX,
cpInH, cpInL.getValue(1));
SDValue swapInL, swapInH;
swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
DAG.getConstant(0, dl, HalfT));
swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
DAG.getConstant(1, dl, HalfT));
swapInH =
DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX,
swapInH, cpInH.getValue(1));
// If the current function needs the base pointer, RBX,
// we shouldn't use cmpxchg directly.
// Indeed the lowering of that instruction will clobber
// that register and since RBX will be a reserved register
// the register allocator will not make sure its value will
// be properly saved and restored around this live-range.
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
SDValue Result;
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
unsigned BasePtr = TRI->getBaseRegister();
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
(BasePtr == X86::RBX || BasePtr == X86::EBX)) {
// ISel prefers the LCMPXCHG64 variant.
// If that assert breaks, that means it is not the case anymore,
// and we need to teach LCMPXCHG8_SAVE_EBX_DAG how to save RBX,
// not just EBX. This is a matter of accepting i64 input for that
// pseudo, and restoring into the register of the right wide
// in expand pseudo. Everything else should just work.
assert(((Regs64bit == (BasePtr == X86::RBX)) || BasePtr == X86::EBX) &&
"Saving only half of the RBX");
unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_SAVE_RBX_DAG
: X86ISD::LCMPXCHG8_SAVE_EBX_DAG;
SDValue RBXSave = DAG.getCopyFromReg(swapInH.getValue(0), dl,
Regs64bit ? X86::RBX : X86::EBX,
HalfT, swapInH.getValue(1));
SDValue Ops[] = {/*Chain*/ RBXSave.getValue(1), N->getOperand(1), swapInL,
RBXSave,
/*Glue*/ RBXSave.getValue(2)};
Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO);
} else {
unsigned Opcode =
Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG;
swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl,
Regs64bit ? X86::RBX : X86::EBX, swapInL,
swapInH.getValue(1));
SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1),
swapInL.getValue(1)};
Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO);
}
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
Regs64bit ? X86::RAX : X86::EAX,
HalfT, Result.getValue(1));
SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
Regs64bit ? X86::RDX : X86::EDX,
HalfT, cpOutL.getValue(2));
SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
MVT::i32, cpOutH.getValue(2));
SDValue Success = getSETCC(X86::COND_E, EFLAGS, dl, DAG);
Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1));
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
Results.push_back(Success);
Results.push_back(EFLAGS.getValue(1));
return;
}
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_LOAD: {
// Delegate to generic TypeLegalization. Situations we can really handle
// should have already been dealt with by AtomicExpandPass.cpp.
break;
}
case ISD::BITCAST: {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT DstVT = N->getValueType(0);
EVT SrcVT = N->getOperand(0)->getValueType(0);
if (SrcVT != MVT::f64 ||
(DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
return;
unsigned NumElts = DstVT.getVectorNumElements();
EVT SVT = DstVT.getVectorElementType();
EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, N->getOperand(0));
SDValue ToVecInt = DAG.getBitcast(WiderVT, Expanded);
if (ExperimentalVectorWideningLegalization) {
// If we are legalizing vectors by widening, we already have the desired
// legal vector type, just return it.
Results.push_back(ToVecInt);
return;
}
SmallVector<SDValue, 8> Elts;
for (unsigned i = 0, e = NumElts; i != e; ++i)
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT,
ToVecInt, DAG.getIntPtrConstant(i, dl)));
Results.push_back(DAG.getBuildVector(DstVT, dl, Elts));
}
}
}
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((X86ISD::NodeType)Opcode) {
case X86ISD::FIRST_NUMBER: break;
case X86ISD::BSF: return "X86ISD::BSF";
case X86ISD::BSR: return "X86ISD::BSR";
case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FAND: return "X86ISD::FAND";
case X86ISD::FANDN: return "X86ISD::FANDN";
case X86ISD::FOR: return "X86ISD::FOR";
case X86ISD::FXOR: return "X86ISD::FXOR";
case X86ISD::FILD: return "X86ISD::FILD";
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
case X86ISD::RDTSCP_DAG: return "X86ISD::RDTSCP_DAG";
case X86ISD::RDPMC_DAG: return "X86ISD::RDPMC_DAG";
case X86ISD::BT: return "X86ISD::BT";
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::CMPM: return "X86ISD::CMPM";
case X86ISD::CMPMU: return "X86ISD::CMPMU";
case X86ISD::CMPM_RND: return "X86ISD::CMPM_RND";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
case X86ISD::FSETCC: return "X86ISD::FSETCC";
case X86ISD::FSETCCM: return "X86ISD::FSETCCM";
case X86ISD::FSETCCM_RND: return "X86ISD::FSETCCM_RND";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
case X86ISD::IRET: return "X86ISD::IRET";
case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
case X86ISD::Wrapper: return "X86ISD::Wrapper";
case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP";
case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q";
case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W";
case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D";
case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND";
case X86ISD::ADDUS: return "X86ISD::ADDUS";
case X86ISD::SUBUS: return "X86ISD::SUBUS";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAXS: return "X86ISD::FMAXS";
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
case X86ISD::FMAXS_RND: return "X86ISD::FMAX_RND";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FMINS: return "X86ISD::FMINS";
case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND";
case X86ISD::FMINS_RND: return "X86ISD::FMINS_RND";
case X86ISD::FMAXC: return "X86ISD::FMAXC";
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::FRCPS: return "X86ISD::FRCPS";
case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
case X86ISD::INSERTQI: return "X86ISD::INSERTQI";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP";
case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP";
case X86ISD::EH_SJLJ_SETUP_DISPATCH:
return "X86ISD::EH_SJLJ_SETUP_DISPATCH";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r";
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG";
case X86ISD::LCMPXCHG8_SAVE_EBX_DAG:
return "X86ISD::LCMPXCHG8_SAVE_EBX_DAG";
case X86ISD::LCMPXCHG16_SAVE_RBX_DAG:
return "X86ISD::LCMPXCHG16_SAVE_RBX_DAG";
case X86ISD::LADD: return "X86ISD::LADD";
case X86ISD::LSUB: return "X86ISD::LSUB";
case X86ISD::LOR: return "X86ISD::LOR";
case X86ISD::LXOR: return "X86ISD::LXOR";
case X86ISD::LAND: return "X86ISD::LAND";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES";
case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS";
case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
case X86ISD::VSRL: return "X86ISD::VSRL";
case X86ISD::VSRA: return "X86ISD::VSRA";
case X86ISD::VSHLI: return "X86ISD::VSHLI";
case X86ISD::VSRLI: return "X86ISD::VSRLI";
case X86ISD::VSRAI: return "X86ISD::VSRAI";
case X86ISD::VSRAV: return "X86ISD::VSRAV";
case X86ISD::VROTLI: return "X86ISD::VROTLI";
case X86ISD::VROTRI: return "X86ISD::VROTRI";
case X86ISD::VPPERM: return "X86ISD::VPPERM";
case X86ISD::CMPP: return "X86ISD::CMPP";
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";
case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
case X86ISD::ADC: return "X86ISD::ADC";
case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
case X86ISD::SMUL8: return "X86ISD::SMUL8";
case X86ISD::UMUL8: return "X86ISD::UMUL8";
case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
case X86ISD::INC: return "X86ISD::INC";
case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::MOVMSK: return "X86ISD::MOVMSK";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
case X86ISD::TESTM: return "X86ISD::TESTM";
case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
case X86ISD::KTEST: return "X86ISD::KTEST";
case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL";
case X86ISD::KSHIFTR: return "X86ISD::KSHIFTR";
case X86ISD::PACKSS: return "X86ISD::PACKSS";
case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::VALIGN: return "X86ISD::VALIGN";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
case X86ISD::SHUFP: return "X86ISD::SHUFP";
case X86ISD::SHUF128: return "X86ISD::SHUF128";
case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
case X86ISD::MOVSD: return "X86ISD::MOVSD";
case X86ISD::MOVSS: return "X86ISD::MOVSS";
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST";
case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT";
case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV";
case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS";
case X86ISD::VRANGE: return "X86ISD::VRANGE";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
case X86ISD::PSADBW: return "X86ISD::PSADBW";
case X86ISD::DBPSADBW: return "X86ISD::DBPSADBW";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
case X86ISD::MFENCE: return "X86ISD::MFENCE";
case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
case X86ISD::RDSEED: return "X86ISD::RDSEED";
case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";
case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";
case X86ISD::VPROT: return "X86ISD::VPROT";
case X86ISD::VPROTI: return "X86ISD::VPROTI";
case X86ISD::VPSHA: return "X86ISD::VPSHA";
case X86ISD::VPSHL: return "X86ISD::VPSHL";
case X86ISD::VPCOM: return "X86ISD::VPCOM";
case X86ISD::VPCOMU: return "X86ISD::VPCOMU";
case X86ISD::VPERMIL2: return "X86ISD::VPERMIL2";
case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";
case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
case X86ISD::FMADD_RND: return "X86ISD::FMADD_RND";
case X86ISD::FNMADD_RND: return "X86ISD::FNMADD_RND";
case X86ISD::FMSUB_RND: return "X86ISD::FMSUB_RND";
case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND";
case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND";
case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND";
case X86ISD::FMADDS1_RND: return "X86ISD::FMADDS1_RND";
case X86ISD::FNMADDS1_RND: return "X86ISD::FNMADDS1_RND";
case X86ISD::FMSUBS1_RND: return "X86ISD::FMSUBS1_RND";
case X86ISD::FNMSUBS1_RND: return "X86ISD::FNMSUBS1_RND";
case X86ISD::FMADDS3_RND: return "X86ISD::FMADDS3_RND";
case X86ISD::FNMADDS3_RND: return "X86ISD::FNMADDS3_RND";
case X86ISD::FMSUBS3_RND: return "X86ISD::FMSUBS3_RND";
case X86ISD::FNMSUBS3_RND: return "X86ISD::FNMSUBS3_RND";
case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H";
case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L";
case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES";
case X86ISD::VREDUCE: return "X86ISD::VREDUCE";
case X86ISD::VREDUCES: return "X86ISD::VREDUCES";
case X86ISD::VGETMANT: return "X86ISD::VGETMANT";
case X86ISD::VGETMANTS: return "X86ISD::VGETMANTS";
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
case X86ISD::XTEST: return "X86ISD::XTEST";
case X86ISD::COMPRESS: return "X86ISD::COMPRESS";
case X86ISD::EXPAND: return "X86ISD::EXPAND";
case X86ISD::SELECT: return "X86ISD::SELECT";
case X86ISD::SELECTS: return "X86ISD::SELECTS";
case X86ISD::ADDSUB: return "X86ISD::ADDSUB";
case X86ISD::RCP28: return "X86ISD::RCP28";
case X86ISD::RCP28S: return "X86ISD::RCP28S";
case X86ISD::EXP2: return "X86ISD::EXP2";
case X86ISD::RSQRT28: return "X86ISD::RSQRT28";
case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S";
case X86ISD::FADD_RND: return "X86ISD::FADD_RND";
case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND";
case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND";
case X86ISD::FSUBS_RND: return "X86ISD::FSUBS_RND";
case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND";
case X86ISD::FMULS_RND: return "X86ISD::FMULS_RND";
case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
case X86ISD::FDIVS_RND: return "X86ISD::FDIVS_RND";
case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND";
case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND";
case X86ISD::SCALEF: return "X86ISD::SCALEF";
case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
case X86ISD::AVG: return "X86ISD::AVG";
case X86ISD::MULHRS: return "X86ISD::MULHRS";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
case X86ISD::CVTTP2SI: return "X86ISD::CVTTP2SI";
case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI";
case X86ISD::CVTTP2SI_RND: return "X86ISD::CVTTP2SI_RND";
case X86ISD::CVTTP2UI_RND: return "X86ISD::CVTTP2UI_RND";
case X86ISD::CVTTS2SI_RND: return "X86ISD::CVTTS2SI_RND";
case X86ISD::CVTTS2UI_RND: return "X86ISD::CVTTS2UI_RND";
case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P";
case X86ISD::CVTUI2P: return "X86ISD::CVTUI2P";
case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS";
case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS";
case X86ISD::MULTISHIFT: return "X86ISD::MULTISHIFT";
case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI";
case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND";
case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND";
case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND";
case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
case X86ISD::LWPINS: return "X86ISD::LWPINS";
case X86ISD::MGATHER: return "X86ISD::MGATHER";
}
return nullptr;
}
/// Return true if the addressing mode represented by AM is legal for this
/// target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
// X86 allows a sign-extended 32-bit immediate field as a displacement.
if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr))
return false;
if (AM.BaseGV) {
unsigned GVFlags = Subtarget.classifyGlobalReference(AM.BaseGV);
// If a reference to this global requires an extra load, we can't fold it.
if (isGlobalStubReference(GVFlags))
return false;
// If BaseGV requires a register for the PIC base, we cannot also have a
// BaseReg specified.
if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
return false;
// If lower 4G is not available, then we must use rip-relative addressing.
if ((M != CodeModel::Small || isPositionIndependent()) &&
Subtarget.is64Bit() && (AM.BaseOffs || AM.Scale > 1))
return false;
}
switch (AM.Scale) {
case 0:
case 1:
case 2:
case 4:
case 8:
// These scales always work.
break;
case 3:
case 5:
case 9:
// These scales are formed with basereg+scalereg. Only accept if there is
// no basereg yet.
if (AM.HasBaseReg)
return false;
break;
default: // Other stuff never works.
return false;
}
return true;
}
bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
unsigned Bits = Ty->getScalarSizeInBits();
// 8-bit shifts are always expensive, but versions with a scalar amount aren't
// particularly cheaper than those without.
if (Bits == 8)
return false;
// On AVX2 there are new vpsllv[dq] instructions (and other shifts), that make
// variable shifts just as cheap as scalar ones.
if (Subtarget.hasInt256() && (Bits == 32 || Bits == 64))
return false;
// Otherwise, it's significantly cheaper to shift by a scalar amount than by a
// fully general vector.
return true;
}
bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
return NumBits1 > NumBits2;
}
bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
if (!isTypeLegal(EVT::getEVT(Ty1)))
return false;
assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
// Assuming the caller doesn't have a zeroext or signext return parameter,
// truncation all the way down to i1 is valid.
return true;
}
bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<32>(Imm);
}
bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Can also use sub to handle negated immediates.
return isInt<32>(Imm);
}
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
return NumBits1 > NumBits2;
}
bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit();
}
bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit();
}
bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
EVT VT1 = Val.getValueType();
if (isZExtFree(VT1, VT2))
return true;
if (Val.getOpcode() != ISD::LOAD)
return false;
if (!VT1.isSimple() || !VT1.isInteger() ||
!VT2.isSimple() || !VT2.isInteger())
return false;
switch (VT1.getSimpleVT().SimpleTy) {
default: break;
case MVT::i8:
case MVT::i16:
case MVT::i32:
// X86 has 8, 16, and 32-bit zero-extending loads.
return true;
}
return false;
}
bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
bool
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
if (!Subtarget.hasAnyFMA())
return false;
VT = VT.getScalarType();
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
return true;
default:
break;
}
return false;
}
bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
// i16 instructions are longer (0x66 prefix) and potentially slower.
return !(VT1 == MVT::i32 && VT2 == MVT::i16);
}
/// Targets can use this to indicate that they only support *some*
/// VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
bool
X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
if (!VT.isSimple())
return false;
// Not for i1 vectors
if (VT.getSimpleVT().getScalarType() == MVT::i1)
return false;
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSimpleVT().getSizeInBits() == 64)
return false;
// We only care that the types being shuffled are legal. The lowering can
// handle any possible shuffle mask that results.
return isTypeLegal(VT.getSimpleVT());
}
bool
X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
EVT VT) const {
// Just delegate to the generic legality, clear masks aren't special.
return isShuffleMaskLegal(Mask, VT);
}
//===----------------------------------------------------------------------===//
// X86 Scheduler Hooks
//===----------------------------------------------------------------------===//
/// Utility function to emit xbegin specifying the start of an RTM region.
static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
const TargetInstrInfo *TII) {
DebugLoc DL = MI.getDebugLoc();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = ++MBB->getIterator();
// For the v = xbegin(), we generate
//
// thisMBB:
// xbegin sinkMBB
//
// mainMBB:
// s0 = -1
//
// fallBB:
// eax = # XABORT_DEF
// s1 = eax
//
// sinkMBB:
// v = phi(s0/mainBB, s1/fallBB)
MachineBasicBlock *thisMBB = MBB;
MachineFunction *MF = MBB->getParent();
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, fallMBB);
MF->insert(I, sinkMBB);
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned fallDstReg = MRI.createVirtualRegister(RC);
// thisMBB:
// xbegin fallMBB
// # fallthrough to mainMBB
// # abortion to fallMBB
BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB);
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(fallMBB);
// mainMBB:
// mainDstReg := -1
BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1);
BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
mainMBB->addSuccessor(sinkMBB);
// fallMBB:
// ; pseudo instruction to model hardware's definition from XABORT
// EAX := XABORT_DEF
// fallDstReg := EAX
BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF));
BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg)
.addReg(X86::EAX);
fallMBB->addSuccessor(sinkMBB);
// sinkMBB:
// DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB)
BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg)
.addReg(mainDstReg).addMBB(mainMBB)
.addReg(fallDstReg).addMBB(fallMBB);
MI.eraseFromParent();
return sinkMBB;
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
// or XMM0_V32I8 in AVX all of this code can be replaced with that
// in the .td file.
static MachineBasicBlock *emitPCMPSTRM(MachineInstr &MI, MachineBasicBlock *BB,
const TargetInstrInfo *TII) {
unsigned Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("illegal opcode!");
case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break;
case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break;
case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break;
case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break;
case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break;
case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break;
case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break;
case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break;
}
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
unsigned NumArgs = MI.getNumOperands();
for (unsigned i = 1; i < NumArgs; ++i) {
MachineOperand &Op = MI.getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
MIB.add(Op);
}
if (MI.hasOneMemOperand())
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
.addReg(X86::XMM0);
MI.eraseFromParent();
return BB;
}
// FIXME: Custom handling because TableGen doesn't support multiple implicit
// defs in an instruction pattern
static MachineBasicBlock *emitPCMPSTRI(MachineInstr &MI, MachineBasicBlock *BB,
const TargetInstrInfo *TII) {
unsigned Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("illegal opcode!");
case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break;
case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break;
case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break;
case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break;
case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break;
case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break;
case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break;
case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break;
}
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
unsigned NumArgs = MI.getNumOperands(); // remove the results
for (unsigned i = 1; i < NumArgs; ++i) {
MachineOperand &Op = MI.getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
MIB.add(Op);
}
if (MI.hasOneMemOperand())
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
.addReg(X86::ECX);
MI.eraseFromParent();
return BB;
}
static MachineBasicBlock *emitWRPKRU(MachineInstr &MI, MachineBasicBlock *BB,
const X86Subtarget &Subtarget) {
DebugLoc dl = MI.getDebugLoc();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// insert input VAL into EAX
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
.addReg(MI.getOperand(0).getReg());
// insert zero to ECX
BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::ECX);
// insert zero to EDX
BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::EDX);
// insert WRPKRU instruction
BuildMI(*BB, MI, dl, TII->get(X86::WRPKRUr));
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
static MachineBasicBlock *emitRDPKRU(MachineInstr &MI, MachineBasicBlock *BB,
const X86Subtarget &Subtarget) {
DebugLoc dl = MI.getDebugLoc();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// insert zero to ECX
BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::ECX);
// insert RDPKRU instruction
BuildMI(*BB, MI, dl, TII->get(X86::RDPKRUr));
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
.addReg(X86::EAX);
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB,
const X86Subtarget &Subtarget,
unsigned Opc) {
DebugLoc dl = MI.getDebugLoc();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// Address into RAX/EAX, other two args into ECX, EDX.
unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
for (int i = 0; i < X86::AddrNumOperands; ++i)
MIB.add(MI.getOperand(i));
unsigned ValOps = X86::AddrNumOperands;
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
.addReg(MI.getOperand(ValOps).getReg());
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
.addReg(MI.getOperand(ValOps + 1).getReg());
// The instruction doesn't actually take any operands though.
BuildMI(*BB, MI, dl, TII->get(Opc));
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB,
const X86Subtarget &Subtarget) {
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// Address into RAX/EAX
unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
for (int i = 0; i < X86::AddrNumOperands; ++i)
MIB.add(MI->getOperand(i));
// The instruction doesn't actually take any operands though.
BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr));
MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
// Emit va_arg instruction on X86-64.
// Operands to this pseudo-instruction:
// 0 ) Output : destination address (reg)
// 1-5) Input : va_list address (addr, i64mem)
// 6 ) ArgSize : Size (in bytes) of vararg type
// 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
// 8 ) Align : Alignment of type
// 9 ) EFLAGS (implicit-def)
assert(MI.getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
static_assert(X86::AddrNumOperands == 5,
"VAARG_64 assumes 5 address operands");
unsigned DestReg = MI.getOperand(0).getReg();
MachineOperand &Base = MI.getOperand(1);
MachineOperand &Scale = MI.getOperand(2);
MachineOperand &Index = MI.getOperand(3);
MachineOperand &Disp = MI.getOperand(4);
MachineOperand &Segment = MI.getOperand(5);
unsigned ArgSize = MI.getOperand(6).getImm();
unsigned ArgMode = MI.getOperand(7).getImm();
unsigned Align = MI.getOperand(8).getImm();
// Memory Reference
assert(MI.hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
// Machine Information
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
DebugLoc DL = MI.getDebugLoc();
// struct va_list {
// i32 gp_offset
// i32 fp_offset
// i64 overflow_area (address)
// i64 reg_save_area (address)
// }
// sizeof(va_list) = 24
// alignment(va_list) = 8
unsigned TotalNumIntRegs = 6;
unsigned TotalNumXMMRegs = 8;
bool UseGPOffset = (ArgMode == 1);
bool UseFPOffset = (ArgMode == 2);
unsigned MaxOffset = TotalNumIntRegs * 8 +
(UseFPOffset ? TotalNumXMMRegs * 16 : 0);
/* Align ArgSize to a multiple of 8 */
unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
bool NeedsAlign = (Align > 8);
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *overflowMBB;
MachineBasicBlock *offsetMBB;
MachineBasicBlock *endMBB;
unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
unsigned OffsetReg = 0;
if (!UseGPOffset && !UseFPOffset) {
// If we only pull from the overflow region, we don't create a branch.
// We don't need to alter control flow.
OffsetDestReg = 0; // unused
OverflowDestReg = DestReg;
offsetMBB = nullptr;
overflowMBB = thisMBB;
endMBB = thisMBB;
} else {
// First emit code to check if gp_offset (or fp_offset) is below the bound.
// If so, pull the argument from reg_save_area. (branch to offsetMBB)
// If not, pull from overflow_area. (branch to overflowMBB)
//
// thisMBB
// | .
// | .
// offsetMBB overflowMBB
// | .
// | .
// endMBB
// Registers for the PHI in endMBB
OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *MF = MBB->getParent();
overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator MBBIter = ++MBB->getIterator();
// Insert the new basic blocks
MF->insert(MBBIter, offsetMBB);
MF->insert(MBBIter, overflowMBB);
MF->insert(MBBIter, endMBB);
// Transfer the remainder of MBB and its successor edges to endMBB.
endMBB->splice(endMBB->begin(), thisMBB,
std::next(MachineBasicBlock::iterator(MI)), thisMBB->end());
endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Make offsetMBB and overflowMBB successors of thisMBB
thisMBB->addSuccessor(offsetMBB);
thisMBB->addSuccessor(overflowMBB);
// endMBB is a successor of both offsetMBB and overflowMBB
offsetMBB->addSuccessor(endMBB);
overflowMBB->addSuccessor(endMBB);
// Load the offset value into a register
OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
.add(Base)
.add(Scale)
.add(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.add(Segment)
.setMemRefs(MMOBegin, MMOEnd);
// Check if there is enough room left to pull this argument.
BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
.addReg(OffsetReg)
.addImm(MaxOffset + 8 - ArgSizeA8);
// Branch to "overflowMBB" if offset >= max
// Fall through to "offsetMBB" otherwise
BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
.addMBB(overflowMBB);
}
// In offsetMBB, emit code to use the reg_save_area.
if (offsetMBB) {
assert(OffsetReg != 0);
// Read the reg_save_area address.
unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
.add(Base)
.add(Scale)
.add(Index)
.addDisp(Disp, 16)
.add(Segment)
.setMemRefs(MMOBegin, MMOEnd);
// Zero-extend the offset
unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
.addImm(0)
.addReg(OffsetReg)
.addImm(X86::sub_32bit);
// Add the offset to the reg_save_area to get the final address.
BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
.addReg(OffsetReg64)
.addReg(RegSaveReg);
// Compute the offset for the next argument
unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
.addReg(OffsetReg)
.addImm(UseFPOffset ? 16 : 8);
// Store it back into the va_list.
BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
.add(Base)
.add(Scale)
.add(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.add(Segment)
.addReg(NextOffsetReg)
.setMemRefs(MMOBegin, MMOEnd);
// Jump to endMBB
BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
.addMBB(endMBB);
}
//
// Emit code to use overflow area
//
// Load the overflow_area address into a register.
unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
.add(Base)
.add(Scale)
.add(Index)
.addDisp(Disp, 8)
.add(Segment)
.setMemRefs(MMOBegin, MMOEnd);
// If we need to align it, do so. Otherwise, just copy the address
// to OverflowDestReg.
if (NeedsAlign) {
// Align the overflow address
assert(isPowerOf2_32(Align) && "Alignment must be a power of 2");
unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
// aligned_addr = (addr + (align-1)) & ~(align-1)
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
.addReg(OverflowAddrReg)
.addImm(Align-1);
BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
.addReg(TmpReg)
.addImm(~(uint64_t)(Align-1));
} else {
BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
.addReg(OverflowAddrReg);
}
// Compute the next overflow address after this argument.
// (the overflow address should be kept 8-byte aligned)
unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
.addReg(OverflowDestReg)
.addImm(ArgSizeA8);
// Store the new overflow address.
BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
.add(Base)
.add(Scale)
.add(Index)
.addDisp(Disp, 8)
.add(Segment)
.addReg(NextAddrReg)
.setMemRefs(MMOBegin, MMOEnd);
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
BuildMI(*endMBB, endMBB->begin(), DL,
TII->get(X86::PHI), DestReg)
.addReg(OffsetDestReg).addMBB(offsetMBB)
.addReg(OverflowDestReg).addMBB(overflowMBB);
}
// Erase the pseudo instruction
MI.eraseFromParent();
return endMBB;
}
MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
// Emit code to save XMM registers to the stack. The ABI says that the
// number of registers to save is given in %al, so it's theoretically
// possible to do an indirect jump trick to avoid saving all of them,
// however this code takes a simpler approach and just executes all
// of the stores if %al is non-zero. It's less code, and it's probably
// easier on the hardware branch predictor, and stores aren't all that
// expensive anyway.
// Create the new basic blocks. One block contains all the XMM stores,
// and one block is the final destination regardless of whether any
// stores were performed.
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *F = MBB->getParent();
MachineFunction::iterator MBBIter = ++MBB->getIterator();
MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(MBBIter, XMMSaveMBB);
F->insert(MBBIter, EndMBB);
// Transfer the remainder of MBB and its successor edges to EndMBB.
EndMBB->splice(EndMBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
// The original block will now fall through to the XMM save block.
MBB->addSuccessor(XMMSaveMBB);
// The XMMSaveMBB will fall through to the end block.
XMMSaveMBB->addSuccessor(EndMBB);
// Now add the instructions.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
unsigned CountReg = MI.getOperand(0).getReg();
int64_t RegSaveFrameIndex = MI.getOperand(1).getImm();
int64_t VarArgsFPOffset = MI.getOperand(2).getImm();
if (!Subtarget.isCallingConvWin64(F->getFunction()->getCallingConv())) {
// If %al is 0, branch around the XMM save block.
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
MBB->addSuccessor(EndMBB);
}
// Make sure the last operand is EFLAGS, which gets clobbered by the branch
// that was just emitted, but clearly shouldn't be "saved".
assert((MI.getNumOperands() <= 3 ||
!MI.getOperand(MI.getNumOperands() - 1).isReg() ||
MI.getOperand(MI.getNumOperands() - 1).getReg() == X86::EFLAGS) &&
"Expected last argument to be EFLAGS");
unsigned MOVOpc = Subtarget.hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
for (int i = 3, e = MI.getNumOperands() - 1; i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
MachineMemOperand *MMO = F->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset),
MachineMemOperand::MOStore,
/*Size=*/16, /*Align=*/16);
BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
.addFrameIndex(RegSaveFrameIndex)
.addImm(/*Scale=*/1)
.addReg(/*IndexReg=*/0)
.addImm(/*Disp=*/Offset)
.addReg(/*Segment=*/0)
.addReg(MI.getOperand(i).getReg())
.addMemOperand(MMO);
}
MI.eraseFromParent(); // The pseudo instruction is gone now.
return EndMBB;
}
// The EFLAGS operand of SelectItr might be missing a kill marker
// because there were multiple uses of EFLAGS, and ISel didn't know
// which to mark. Figure out whether SelectItr should have had a
// kill marker, and set it if it should. Returns the correct kill
// marker value.
static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
MachineBasicBlock* BB,
const TargetRegisterInfo* TRI) {
// Scan forward through BB for a use/def of EFLAGS.
MachineBasicBlock::iterator miI(std::next(SelectItr));
for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
const MachineInstr& mi = *miI;
if (mi.readsRegister(X86::EFLAGS))
return false;
if (mi.definesRegister(X86::EFLAGS))
break; // Should have kill-flag - update below.
}
// If we hit the end of the block, check whether EFLAGS is live into a
// successor.
if (miI == BB->end()) {
for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
sEnd = BB->succ_end();
sItr != sEnd; ++sItr) {
MachineBasicBlock* succ = *sItr;
if (succ->isLiveIn(X86::EFLAGS))
return false;
}
}
// We found a def, or hit the end of the basic block and EFLAGS wasn't live
// out. SelectMI should have a kill flag on EFLAGS.
SelectItr->addRegisterKilled(X86::EFLAGS, TRI);
return true;
}
// Return true if it is OK for this CMOV pseudo-opcode to be cascaded
// together with other CMOV pseudo-opcodes into a single basic-block with
// conditional jump around it.
static bool isCMOVPseudo(MachineInstr &MI) {
switch (MI.getOpcode()) {
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_GR8:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64:
case X86::CMOV_V4F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
case X86::CMOV_V16F32:
case X86::CMOV_V8F32:
case X86::CMOV_V8F64:
case X86::CMOV_V8I64:
case X86::CMOV_V8I1:
case X86::CMOV_V16I1:
case X86::CMOV_V32I1:
case X86::CMOV_V64I1:
return true;
default:
return false;
}
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
// TrueVal = ...
// cmpTY ccX, r1, r2
// bCC copy1MBB
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineFunction *F = BB->getParent();
// This code lowers all pseudo-CMOV instructions. Generally it lowers these
// as described above, by inserting a BB, and then making a PHI at the join
// point to select the true and false operands of the CMOV in the PHI.
//
// The code also handles two different cases of multiple CMOV opcodes
// in a row.
//
// Case 1:
// In this case, there are multiple CMOVs in a row, all which are based on
// the same condition setting (or the exact opposite condition setting).
// In this case we can lower all the CMOVs using a single inserted BB, and
// then make a number of PHIs at the join point to model the CMOVs. The only
// trickiness here, is that in a case like:
//
// t2 = CMOV cond1 t1, f1
// t3 = CMOV cond1 t2, f2
//
// when rewriting this into PHIs, we have to perform some renaming on the
// temps since you cannot have a PHI operand refer to a PHI result earlier
// in the same block. The "simple" but wrong lowering would be:
//
// t2 = PHI t1(BB1), f1(BB2)
// t3 = PHI t2(BB1), f2(BB2)
//
// but clearly t2 is not defined in BB1, so that is incorrect. The proper
// renaming is to note that on the path through BB1, t2 is really just a
// copy of t1, and do that renaming, properly generating:
//
// t2 = PHI t1(BB1), f1(BB2)
// t3 = PHI t1(BB1), f2(BB2)
//
// Case 2, we lower cascaded CMOVs such as
//
// (CMOV (CMOV F, T, cc1), T, cc2)
//
// to two successive branches. For that, we look for another CMOV as the
// following instruction.
//
// Without this, we would add a PHI between the two jumps, which ends up
// creating a few copies all around. For instance, for
//
// (sitofp (zext (fcmp une)))
//
// we would generate:
//
// ucomiss %xmm1, %xmm0
// movss <1.0f>, %xmm0
// movaps %xmm0, %xmm1
// jne .LBB5_2
// xorps %xmm1, %xmm1
// .LBB5_2:
// jp .LBB5_4
// movaps %xmm1, %xmm0
// .LBB5_4:
// retq
//
// because this custom-inserter would have generated:
//
// A
// | \
// | B
// | /
// C
// | \
// | D
// | /
// E
//
// A: X = ...; Y = ...
// B: empty
// C: Z = PHI [X, A], [Y, B]
// D: empty
// E: PHI [X, C], [Z, D]
//
// If we lower both CMOVs in a single step, we can instead generate:
//
// A
// | \
// | C
// | /|
// |/ |
// | |
// | D
// | /
// E
//
// A: X = ...; Y = ...
// D: empty
// E: PHI [X, A], [X, C], [Y, D]
//
// Which, in our sitofp/fcmp example, gives us something like:
//
// ucomiss %xmm1, %xmm0
// movss <1.0f>, %xmm0
// jne .LBB5_4
// jp .LBB5_4
// xorps %xmm0, %xmm0
// .LBB5_4:
// retq
//
MachineInstr *CascadedCMOV = nullptr;
MachineInstr *LastCMOV = &MI;
X86::CondCode CC = X86::CondCode(MI.getOperand(3).getImm());
X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
MachineBasicBlock::iterator NextMIIt =
std::next(MachineBasicBlock::iterator(MI));
// Check for case 1, where there are multiple CMOVs with the same condition
// first. Of the two cases of multiple CMOV lowerings, case 1 reduces the
// number of jumps the most.
if (isCMOVPseudo(MI)) {
// See if we have a string of CMOVS with the same condition.
while (NextMIIt != BB->end() && isCMOVPseudo(*NextMIIt) &&
(NextMIIt->getOperand(3).getImm() == CC ||
NextMIIt->getOperand(3).getImm() == OppCC)) {
LastCMOV = &*NextMIIt;
++NextMIIt;
}
}
// This checks for case 2, but only do this if we didn't already find
// case 1, as indicated by LastCMOV == MI.
if (LastCMOV == &MI && NextMIIt != BB->end() &&
NextMIIt->getOpcode() == MI.getOpcode() &&
NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() &&
NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() &&
NextMIIt->getOperand(1).isKill()) {
CascadedCMOV = &*NextMIIt;
}
MachineBasicBlock *jcc1MBB = nullptr;
// If we have a cascaded CMOV, we lower it to two successive branches to
// the same block. EFLAGS is used by both, so mark it as live in the second.
if (CascadedCMOV) {
jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, jcc1MBB);
jcc1MBB->addLiveIn(X86::EFLAGS);
}
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV;
if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
!checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
sinkMBB->addLiveIn(X86::EFLAGS);
}
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(LastCMOV)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add the true and fallthrough blocks as its successors.
if (CascadedCMOV) {
// The fallthrough block may be jcc1MBB, if we have a cascaded CMOV.
BB->addSuccessor(jcc1MBB);
// In that case, jcc1MBB will itself fallthrough the copy0MBB, and
// jump to the sinkMBB.
jcc1MBB->addSuccessor(copy0MBB);
jcc1MBB->addSuccessor(sinkMBB);
} else {
BB->addSuccessor(copy0MBB);
}
// The true block target of the first (or only) branch is always sinkMBB.
BB->addSuccessor(sinkMBB);
// Create the conditional branch instruction.
unsigned Opc = X86::GetCondBranchFromCond(CC);
BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
if (CascadedCMOV) {
unsigned Opc2 = X86::GetCondBranchFromCond(
(X86::CondCode)CascadedCMOV->getOperand(3).getImm());
BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
}
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
copy0MBB->addSuccessor(sinkMBB);
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
MachineBasicBlock::iterator MIItEnd =
std::next(MachineBasicBlock::iterator(LastCMOV));
MachineBasicBlock::iterator SinkInsertionPoint = sinkMBB->begin();
DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
MachineInstrBuilder MIB;
// As we are creating the PHIs, we have to be careful if there is more than
// one. Later CMOVs may reference the results of earlier CMOVs, but later
// PHIs have to reference the individual true/false inputs from earlier PHIs.
// That also means that PHI construction must work forward from earlier to
// later, and that the code must maintain a mapping from earlier PHI's
// destination registers, and the registers that went into the PHI.
for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
unsigned DestReg = MIIt->getOperand(0).getReg();
unsigned Op1Reg = MIIt->getOperand(1).getReg();
unsigned Op2Reg = MIIt->getOperand(2).getReg();
// If this CMOV we are generating is the opposite condition from
// the jump we generated, then we have to swap the operands for the
// PHI that is going to be generated.
if (MIIt->getOperand(3).getImm() == OppCC)
std::swap(Op1Reg, Op2Reg);
if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end())
Op1Reg = RegRewriteTable[Op1Reg].first;
if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end())
Op2Reg = RegRewriteTable[Op2Reg].second;
MIB = BuildMI(*sinkMBB, SinkInsertionPoint, DL,
TII->get(X86::PHI), DestReg)
.addReg(Op1Reg).addMBB(copy0MBB)
.addReg(Op2Reg).addMBB(thisMBB);
// Add this PHI to the rewrite table.
RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
}
// If we have a cascaded CMOV, the second Jcc provides the same incoming
// value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
if (CascadedCMOV) {
MIB.addReg(MI.getOperand(2).getReg()).addMBB(jcc1MBB);
// Copy the PHI result to the register defined by the second CMOV.
BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
DL, TII->get(TargetOpcode::COPY),
CascadedCMOV->getOperand(0).getReg())
.addReg(MI.getOperand(0).getReg());
CascadedCMOV->eraseFromParent();
}
// Now remove the CMOV(s).
for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; )
(MIIt++)->eraseFromParent();
return sinkMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI,
MachineBasicBlock *BB) const {
// Combine the following atomic floating-point modification pattern:
// a.store(reg OP a.load(acquire), release)
// Transform them into:
// OPss (%gpr), %xmm
// movss %xmm, (%gpr)
// Or sd equivalent for 64-bit operations.
unsigned MOp, FOp;
switch (MI.getOpcode()) {
default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP");
case X86::RELEASE_FADD32mr:
FOp = X86::ADDSSrm;
MOp = X86::MOVSSmr;
break;
case X86::RELEASE_FADD64mr:
FOp = X86::ADDSDrm;
MOp = X86::MOVSDmr;
break;
}
const X86InstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned ValOpIdx = X86::AddrNumOperands;
unsigned VSrc = MI.getOperand(ValOpIdx).getReg();
MachineInstrBuilder MIB =
BuildMI(*BB, MI, DL, TII->get(FOp),
MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
.addReg(VSrc);
for (int i = 0; i < X86::AddrNumOperands; ++i) {
MachineOperand &Operand = MI.getOperand(i);
// Clear any kill flags on register operands as we'll create a second
// instruction using the same address operands.
if (Operand.isReg())
Operand.setIsKill(false);
MIB.add(Operand);
}
MachineInstr *FOpMI = MIB;
MIB = BuildMI(*BB, MI, DL, TII->get(MOp));
for (int i = 0; i < X86::AddrNumOperands; ++i)
MIB.add(MI.getOperand(i));
MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(MF->shouldSplitStack());
const bool Is64Bit = Subtarget.is64Bit();
const bool IsLP64 = Subtarget.isTarget64BitLP64();
const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;
// BB:
// ... [Till the alloca]
// If stacklet is not large enough, jump to mallocMBB
//
// bumpMBB:
// Allocate by subtracting from RSP
// Jump to continueMBB
//
// mallocMBB:
// Allocate by call to runtime
//
// continueMBB:
// ...
// [rest of original BB]
//
MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetRegisterClass *AddrRegClass =
getRegClassFor(getPointerTy(MF->getDataLayout()));
unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
sizeVReg = MI.getOperand(1).getReg(),
physSPReg =
IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP;
MachineFunction::iterator MBBIter = ++BB->getIterator();
MF->insert(MBBIter, bumpMBB);
MF->insert(MBBIter, mallocMBB);
MF->insert(MBBIter, continueMBB);
continueMBB->splice(continueMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
continueMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add code to the main basic block to check if the stack limit has been hit,
// and if so, jump to mallocMBB otherwise to bumpMBB.
BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
.addReg(tmpSPVReg).addReg(sizeVReg);
BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
.addReg(SPLimitVReg);
BuildMI(BB, DL, TII->get(X86::JG_1)).addMBB(mallocMBB);
// bumpMBB simply decreases the stack pointer, since we know the current
// stacklet has enough space.
BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
.addReg(SPLimitVReg);
BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
.addReg(SPLimitVReg);
BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
if (IsLP64) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::RDI, RegState::Implicit)
.addReg(X86::RAX, RegState::ImplicitDefine);
} else if (Is64Bit) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI)
.addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::EDI, RegState::Implicit)
.addReg(X86::EAX, RegState::ImplicitDefine);
} else {
BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
.addImm(12);
BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::EAX, RegState::ImplicitDefine);
}
if (!Is64Bit)
BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
.addImm(16);
BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
.addReg(IsLP64 ? X86::RAX : X86::EAX);
BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Set up the CFG correctly.
BB->addSuccessor(bumpMBB);
BB->addSuccessor(mallocMBB);
mallocMBB->addSuccessor(continueMBB);
bumpMBB->addSuccessor(continueMBB);
// Take care of the PHI nodes.
BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
MI.getOperand(0).getReg())
.addReg(mallocPtrVReg)
.addMBB(mallocMBB)
.addReg(bumpSPPtrVReg)
.addMBB(bumpMBB);
// Delete the original pseudo instruction.
MI.eraseFromParent();
// And we're done.
return continueMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
DebugLoc DL = MI.getDebugLoc();
assert(!isAsynchronousEHPersonality(
classifyEHPersonality(MF->getFunction()->getPersonalityFn())) &&
"SEH does not use catchret!");
// Only 32-bit EH needs to worry about manually restoring stack pointers.
if (!Subtarget.is32Bit())
return BB;
// C++ EH creates a new target block to hold the restore code, and wires up
// the new block to the return destination with a normal JMP_4.
MachineBasicBlock *RestoreMBB =
MF->CreateMachineBasicBlock(BB->getBasicBlock());
assert(BB->succ_size() == 1);
MF->insert(std::next(BB->getIterator()), RestoreMBB);
RestoreMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(RestoreMBB);
MI.getOperand(0).setMBB(RestoreMBB);
auto RestoreMBBI = RestoreMBB->begin();
BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::EH_RESTORE));
BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB);
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredCatchPad(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const Constant *PerFn = MF->getFunction()->getPersonalityFn();
bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn));
// Only 32-bit SEH requires special handling for catchpad.
if (IsSEH && Subtarget.is32Bit()) {
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE));
}
MI.eraseFromParent();
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
MachineBasicBlock *BB) const {
// So, here we replace TLSADDR with the sequence:
// adjust_stackdown -> TLSADDR -> adjust_stackup.
// We need this because TLSADDR is lowered into calls
// inside MC, therefore without the two markers shrink-wrapping
// may push the prologue/epilogue pass them.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
MachineFunction &MF = *BB->getParent();
// Emit CALLSEQ_START right before the instruction.
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
MachineInstrBuilder CallseqStart =
BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
// Emit CALLSEQ_END right after the instruction.
// We don't call erase from parent because we want to keep the
// original instruction around.
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
MachineInstrBuilder CallseqEnd =
BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0);
BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
MachineBasicBlock *BB) const {
// This is pretty easy. We're taking the value that we received from
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
MachineFunction *F = BB->getParent();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?");
assert(MI.getOperand(3).isGlobal() && "This should be a global");
// Get a register mask for the lowered call.
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
Subtarget.is64Bit() ?
Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() :
Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
if (Subtarget.is64Bit()) {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else if (!isPositionIndependent()) {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
}
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
MachineBasicBlock *
X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = ++MBB->getIterator();
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
unsigned DstReg;
unsigned MemOpndSlot = 0;
unsigned CurOp = 0;
DstReg = MI.getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
(void)TRI;
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned restoreDstReg = MRI.createVirtualRegister(RC);
MemOpndSlot = CurOp;
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
// For v = setjmp(buf), we generate
//
// thisMBB:
// buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB
// SjLjSetup restoreMBB
//
// mainMBB:
// v_main = 0
//
// sinkMBB:
// v = phi(main, restore)
//
// restoreMBB:
// if base pointer being used, load it from frame
// v_restore = 1
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
MF->push_back(restoreMBB);
restoreMBB->setHasAddressTaken();
MachineInstrBuilder MIB;
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
unsigned PtrStoreOpc = 0;
unsigned LabelReg = 0;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
!isPositionIndependent();
// Prepare IP either in reg or imm.
if (!UseImmLabel) {
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
LabelReg = MRI.createVirtualRegister(PtrRC);
if (Subtarget.is64Bit()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
.addReg(X86::RIP)
.addImm(0)
.addReg(0)
.addMBB(restoreMBB)
.addReg(0);
} else {
const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
.addReg(XII->getGlobalBaseReg(MF))
.addImm(0)
.addReg(0)
.addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
.addReg(0);
}
} else
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
// Store IP
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset);
else
MIB.add(MI.getOperand(MemOpndSlot + i));
}
if (!UseImmLabel)
MIB.addReg(LabelReg);
else
MIB.addMBB(restoreMBB);
MIB.setMemRefs(MMOBegin, MMOEnd);
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
.addMBB(restoreMBB);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MIB.addRegMask(RegInfo->getNoPreservedMask());
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(restoreMBB);
// mainMBB:
// EAX = 0
BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(X86::PHI), DstReg)
.addReg(mainDstReg).addMBB(mainMBB)
.addReg(restoreDstReg).addMBB(restoreMBB);
// restoreMBB:
if (RegInfo->hasBasePointer(*MF)) {
const bool Uses64BitFramePtr =
Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64();
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
X86FI->setRestoreBasePointer(MF);
unsigned FramePtr = RegInfo->getFrameRegister(*MF);
unsigned BasePtr = RegInfo->getBaseRegister();
unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr),
FramePtr, true, X86FI->getRestoreBasePointerOffset())
.setMIFlag(MachineInstr::FrameSetup);
}
BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
restoreMBB->addSuccessor(sinkMBB);
MI.eraseFromParent();
return sinkMBB;
}
MachineBasicBlock *
X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
const TargetRegisterClass *RC =
(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
unsigned Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
unsigned SP = RegInfo->getStackRegister();
MachineInstrBuilder MIB;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t SPOffset = 2 * PVT.getStoreSize();
unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
// Reload FP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
MIB.add(MI.getOperand(i));
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload IP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), LabelOffset);
else
MIB.add(MI.getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload SP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), SPOffset);
else
MIB.add(MI.getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Jump
BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
MI.eraseFromParent();
return MBB;
}
void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB,
int FI) const {
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
unsigned Op = 0;
unsigned VR = 0;
bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
!isPositionIndependent();
if (UseImmLabel) {
Op = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
} else {
const TargetRegisterClass *TRC =
(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
VR = MRI->createVirtualRegister(TRC);
Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
if (Subtarget.is64Bit())
BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR)
.addReg(X86::RIP)
.addImm(1)
.addReg(0)
.addMBB(DispatchBB)
.addReg(0);
else
BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR)
.addReg(0) /* TII->getGlobalBaseReg(MF) */
.addImm(1)
.addReg(0)
.addMBB(DispatchBB, Subtarget.classifyBlockAddressReference())
.addReg(0);
}
MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op));
addFrameReference(MIB, FI, 36);
if (UseImmLabel)
MIB.addMBB(DispatchBB);
else
MIB.addReg(VR);
}
MachineBasicBlock *
X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *BB) const {
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = BB->getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
MachineRegisterInfo *MRI = &MF->getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
int FI = MFI.getFunctionContextIndex();
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
unsigned MaxCSNum = 0;
for (auto &MBB : *MF) {
if (!MBB.isEHPad())
continue;
MCSymbol *Sym = nullptr;
for (const auto &MI : MBB) {
if (MI.isDebugValue())
continue;
assert(MI.isEHLabel() && "expected EH_LABEL");
Sym = MI.getOperand(0).getMCSymbol();
break;
}
if (!MF->hasCallSiteLandingPad(Sym))
continue;
for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
CallSiteNumToLPad[CSI].push_back(&MBB);
MaxCSNum = std::max(MaxCSNum, CSI);
}
}
// Get an ordered list of the machine basic blocks for the jump table.
std::vector<MachineBasicBlock *> LPadList;
SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
LPadList.reserve(CallSiteNumToLPad.size());
for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
for (auto &LP : CallSiteNumToLPad[CSI]) {
LPadList.push_back(LP);
InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
}
}
assert(!LPadList.empty() &&
"No landing pad destinations for the dispatch jump table!");
// Create the MBBs for the dispatch code.
// Shove the dispatch's address into the return slot in the function context.
MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
DispatchBB->setIsEHPad(true);
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
BuildMI(TrapBB, DL, TII->get(X86::TRAP));
DispatchBB->addSuccessor(TrapBB);
MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
DispatchBB->addSuccessor(DispContBB);
// Insert MBBs.
MF->push_back(DispatchBB);
MF->push_back(DispContBB);
MF->push_back(TrapBB);
// Insert code into the entry block that creates and registers the function
// context.
SetupEntryBlockForSjLj(MI, BB, DispatchBB, FI);
// Create the jump table and associated information
MachineJumpTableInfo *JTI =
MF->getOrCreateJumpTableInfo(getJumpTableEncoding());
unsigned MJTI = JTI->createJumpTableIndex(LPadList);
const X86RegisterInfo &RI = TII->getRegisterInfo();
// Add a register mask with no preserved registers. This results in all
// registers being marked as clobbered.
if (RI.hasBasePointer(*MF)) {
const bool FPIs64Bit =
Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64();
X86MachineFunctionInfo *MFI = MF->getInfo<X86MachineFunctionInfo>();
MFI->setRestoreBasePointer(MF);
unsigned FP = RI.getFrameRegister(*MF);
unsigned BP = RI.getBaseRegister();
unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm;
addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true,
MFI->getRestoreBasePointerOffset())
.addRegMask(RI.getNoPreservedMask());
} else {
BuildMI(DispatchBB, DL, TII->get(X86::NOOP))
.addRegMask(RI.getNoPreservedMask());
}
unsigned IReg = MRI->createVirtualRegister(&X86::GR32RegClass);
addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI,
4);
BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri))
.addReg(IReg)
.addImm(LPadList.size());
BuildMI(DispatchBB, DL, TII->get(X86::JA_1)).addMBB(TrapBB);
unsigned JReg = MRI->createVirtualRegister(&X86::GR32RegClass);
BuildMI(DispContBB, DL, TII->get(X86::SUB32ri), JReg)
.addReg(IReg)
.addImm(1);
BuildMI(DispContBB, DL,
TII->get(Subtarget.is64Bit() ? X86::JMP64m : X86::JMP32m))
.addReg(0)
.addImm(Subtarget.is64Bit() ? 8 : 4)
.addReg(JReg)
.addJumpTableIndex(MJTI)
.addReg(0);
// Add the jump table entries as successors to the MBB.
SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
for (auto &LP : LPadList)
if (SeenMBBs.insert(LP).second)
DispContBB->addSuccessor(LP);
// N.B. the order the invoke BBs are processed in doesn't matter here.
SmallVector<MachineBasicBlock *, 64> MBBLPads;
const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
for (MachineBasicBlock *MBB : InvokeBBs) {
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
// Keep a copy of Successors since it's modified inside the loop.
SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
MBB->succ_rend());
// FIXME: Avoid quadratic complexity.
for (auto MBBS : Successors) {
if (MBBS->isEHPad()) {
MBB->removeSuccessor(MBBS);
MBBLPads.push_back(MBBS);
}
}
MBB->addSuccessor(DispatchBB);
// Find the invoke call and mark all of the callee-saved registers as
// 'implicit defined' so that they're spilled. This prevents code from
// moving instructions to before the EH block, where they will never be
// executed.
for (auto &II : reverse(*MBB)) {
if (!II.isCall())
continue;
DenseMap<unsigned, bool> DefRegs;
for (auto &MOp : II.operands())
if (MOp.isReg())
DefRegs[MOp.getReg()] = true;
MachineInstrBuilder MIB(*MF, &II);
for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
unsigned Reg = SavedRegs[RI];
if (!DefRegs[Reg])
MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
}
break;
}
}
// Mark all former landing pads as non-landing pads. The dispatch is the only
// landing pad now.
for (auto &LP : MBBLPads)
LP->setIsEHPad(false);
// The instruction is gone now.
MI.eraseFromParent();
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
switch (MI.getOpcode()) {
default: llvm_unreachable("Unexpected instr type to insert");
case X86::TAILJMPd64:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
case X86::TAILJMPr64_REX:
case X86::TAILJMPm64_REX:
llvm_unreachable("TAILJMP64 would not be touched here.");
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
return BB;
case X86::TLS_addr32:
case X86::TLS_addr64:
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
return EmitLoweredTLSAddr(MI, BB);
case X86::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
case X86::CATCHPAD:
return EmitLoweredCatchPad(MI, BB);
case X86::SEG_ALLOCA_32:
case X86::SEG_ALLOCA_64:
return EmitLoweredSegAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_FR128:
case X86::CMOV_GR8:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64:
case X86::CMOV_V4F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
case X86::CMOV_V16F32:
case X86::CMOV_V8F32:
case X86::CMOV_V8F64:
case X86::CMOV_V8I64:
case X86::CMOV_V8I1:
case X86::CMOV_V16I1:
case X86::CMOV_V32I1:
case X86::CMOV_V64I1:
return EmitLoweredSelect(MI, BB);
case X86::RDFLAGS32:
case X86::RDFLAGS64: {
unsigned PushF =
MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF));
// Permit reads of the FLAGS register without it being defined.
// This intrinsic exists to read external processor state in flags, such as
// the trap flag, interrupt flag, and direction flag, none of which are
// modeled by the backend.
Push->getOperand(2).setIsUndef();
BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg());
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
case X86::WRFLAGS32:
case X86::WRFLAGS64: {
unsigned Push =
MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
unsigned PopF =
MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64;
BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg());
BuildMI(*BB, MI, DL, TII->get(PopF));
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
case X86::RELEASE_FADD32mr:
case X86::RELEASE_FADD64mr:
return EmitLoweredAtomicFP(MI, BB);
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
case X86::FP32_TO_INT64_IN_MEM:
case X86::FP64_TO_INT16_IN_MEM:
case X86::FP64_TO_INT32_IN_MEM:
case X86::FP64_TO_INT64_IN_MEM:
case X86::FP80_TO_INT16_IN_MEM:
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
int CWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
unsigned OldCW =
MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
CWFrameIdx);
// Set the high part to be round to zero...
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
.addImm(0xC7F);
// Reload the modified control word now...
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FLDCW16m)), CWFrameIdx);
// Restore the memory image of control word to original value
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
.addReg(OldCW);
// Get the X86 opcode to use.
unsigned Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("illegal opcode!");
case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
}
X86AddressMode AM = getAddressFromInstr(&MI, 0);
addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
.addReg(MI.getOperand(X86::AddrNumOperands).getReg());
// Reload the original control word now.
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FLDCW16m)), CWFrameIdx);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
// String/text processing lowering.
case X86::PCMPISTRM128REG:
case X86::VPCMPISTRM128REG:
case X86::PCMPISTRM128MEM:
case X86::VPCMPISTRM128MEM:
case X86::PCMPESTRM128REG:
case X86::VPCMPESTRM128REG:
case X86::PCMPESTRM128MEM:
case X86::VPCMPESTRM128MEM:
assert(Subtarget.hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
return emitPCMPSTRM(MI, BB, Subtarget.getInstrInfo());
// String/text processing lowering.
case X86::PCMPISTRIREG:
case X86::VPCMPISTRIREG:
case X86::PCMPISTRIMEM:
case X86::VPCMPISTRIMEM:
case X86::PCMPESTRIREG:
case X86::VPCMPESTRIREG:
case X86::PCMPESTRIMEM:
case X86::VPCMPESTRIMEM:
assert(Subtarget.hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
return emitPCMPSTRI(MI, BB, Subtarget.getInstrInfo());
// Thread synchronization.
case X86::MONITOR:
return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr);
case X86::MONITORX:
return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr);
// Cache line zero
case X86::CLZERO:
return emitClzero(&MI, BB, Subtarget);
// PKU feature
case X86::WRPKRU:
return emitWRPKRU(MI, BB, Subtarget);
case X86::RDPKRU:
return emitRDPKRU(MI, BB, Subtarget);
// xbegin
case X86::XBEGIN:
return emitXBegin(MI, BB, Subtarget.getInstrInfo());
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
case X86::VAARG_64:
return EmitVAARG64WithCustomInserter(MI, BB);
case X86::EH_SjLj_SetJmp32:
case X86::EH_SjLj_SetJmp64:
return emitEHSjLjSetJmp(MI, BB);
case X86::EH_SjLj_LongJmp32:
case X86::EH_SjLj_LongJmp64:
return emitEHSjLjLongJmp(MI, BB);
case X86::Int_eh_sjlj_setup_dispatch:
return EmitSjLjDispatchBlock(MI, BB);
case TargetOpcode::STATEPOINT:
// As an implementation detail, STATEPOINT shares the STACKMAP format at
// this point in the process. We diverge later.
return emitPatchPoint(MI, BB);
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
case TargetOpcode::PATCHABLE_EVENT_CALL:
// Do nothing here, handle in xray instrumentation pass.
return BB;
case X86::LCMPXCHG8B: {
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
// In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B
// requires a memory operand. If it happens that current architecture is
// i686 and for current function we need a base pointer
// - which is ESI for i686 - register allocator would not be able to
// allocate registers for an address in form of X(%reg, %reg, Y)
// - there never would be enough unreserved registers during regalloc
// (without the need for base ptr the only option would be X(%edi, %esi, Y).
// We are giving a hand to register allocator by precomputing the address in
// a new vreg using LEA.
// If it is not i686 or there is no base pointer - nothing to do here.
if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF))
return BB;
// Even though this code does not necessarily needs the base pointer to
// be ESI, we check for that. The reason: if this assert fails, there are
// some changes happened in the compiler base pointer handling, which most
// probably have to be addressed somehow here.
assert(TRI->getBaseRegister() == X86::ESI &&
"LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a "
"base pointer in mind");
MachineRegisterInfo &MRI = MF->getRegInfo();
MVT SPTy = getPointerTy(MF->getDataLayout());
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass);
X86AddressMode AM = getAddressFromInstr(&MI, 0);
// Regalloc does not need any help when the memory operand of CMPXCHG8B
// does not use index register.
if (AM.IndexReg == X86::NoRegister)
return BB;
// After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its
// four operand definitions that are E[ABCD] registers. We skip them and
// then insert the LEA.
MachineBasicBlock::iterator MBBI(MI);
while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) ||
MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX))
--MBBI;
addFullAddress(
BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);
setDirectAddressInInstr(&MI, 0, computedAddrVReg);
return BB;
}
case X86::LCMPXCHG16B:
return BB;
case X86::LCMPXCHG8B_SAVE_EBX:
case X86::LCMPXCHG16B_SAVE_RBX: {
unsigned BasePtr =
MI.getOpcode() == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX;
if (!BB->isLiveIn(BasePtr))
BB->addLiveIn(BasePtr);
return BB;
}
}
}
//===----------------------------------------------------------------------===//
// X86 Optimization Hooks
//===----------------------------------------------------------------------===//
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = Known.getBitWidth();
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
Opc == ISD::INTRINSIC_W_CHAIN ||
Opc == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
Known.resetAll();
switch (Opc) {
default: break;
case X86ISD::ADD:
case X86ISD::SUB:
case X86ISD::ADC:
case X86ISD::SBB:
case X86ISD::SMUL:
case X86ISD::UMUL:
case X86ISD::INC:
case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
// These nodes' second result is a boolean.
if (Op.getResNo() == 0)
break;
LLVM_FALLTHROUGH;
case X86ISD::SETCC:
Known.Zero.setBitsFrom(1);
break;
case X86ISD::MOVMSK: {
unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements();
Known.Zero.setBitsFrom(NumLoBits);
break;
}
case X86ISD::VSHLI:
case X86ISD::VSRLI: {
if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
if (ShiftImm->getAPIntValue().uge(VT.getScalarSizeInBits())) {
Known.setAllZero();
break;
}
DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
unsigned ShAmt = ShiftImm->getZExtValue();
if (Opc == X86ISD::VSHLI) {
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// Low bits are known zero.
Known.Zero.setLowBits(ShAmt);
} else {
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// High bits are known zero.
Known.Zero.setHighBits(ShAmt);
}
}
break;
}
case X86ISD::VZEXT: {
SDValue N0 = Op.getOperand(0);
unsigned NumElts = VT.getVectorNumElements();
EVT SrcVT = N0.getValueType();
unsigned InNumElts = SrcVT.getVectorNumElements();
unsigned InBitWidth = SrcVT.getScalarSizeInBits();
assert(InNumElts >= NumElts && "Illegal VZEXT input");
Known = KnownBits(InBitWidth);
APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts);
DAG.computeKnownBits(N0, Known, DemandedSrcElts, Depth + 1);
Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(InBitWidth);
break;
}
}
}
unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const {
unsigned VTBits = Op.getScalarValueSizeInBits();
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case X86ISD::SETCC_CARRY:
// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
return VTBits;
case X86ISD::VSEXT: {
SDValue Src = Op.getOperand(0);
unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
Tmp += VTBits - Src.getScalarValueSizeInBits();
return Tmp;
}
case X86ISD::VSHLI: {
SDValue Src = Op.getOperand(0);
unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
if (ShiftVal.uge(VTBits))
return VTBits; // Shifted all bits out --> zero.
if (ShiftVal.uge(Tmp))
return 1; // Shifted all sign bits out --> unknown.
return Tmp - ShiftVal.getZExtValue();
}
case X86ISD::VSRAI: {
SDValue Src = Op.getOperand(0);
unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
ShiftVal += Tmp;
return ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
}
case X86ISD::PCMPGT:
case X86ISD::PCMPEQ:
case X86ISD::CMPP:
case X86ISD::VPCOM:
case X86ISD::VPCOMU:
// Vector compares return zero/all-bits result values.
return VTBits;
}
// Fallback case.
return 1;
}
/// Returns true (and the GlobalValue and the offset) if the node is a
/// GlobalAddress + offset.
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
const GlobalValue* &GA,
int64_t &Offset) const {
if (N->getOpcode() == X86ISD::Wrapper) {
if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
return true;
}
}
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
// Attempt to match a combined shuffle mask against supported unary shuffle
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
bool AllowFloatDomain, bool AllowIntDomain,
SDValue &V1, SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
// Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction.
// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
(MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
unsigned MaxScale = 64 / MaskEltSize;
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
bool Match = true;
unsigned NumDstElts = NumMaskElts / Scale;
for (unsigned i = 0; i != NumDstElts && Match; ++i) {
Match &= isUndefOrEqual(Mask[i * Scale], (int)i);
Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
}
if (Match) {
unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
SrcVT = MVT::getVectorVT(MaskVT.getScalarType(), SrcSize / MaskEltSize);
if (SrcVT != MaskVT)
V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
DstVT = MVT::getVectorVT(DstVT, NumDstElts);
Shuffle = SrcVT != MaskVT ? unsigned(X86ISD::VZEXT)
: unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
return true;
}
}
}
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
return true;
}
// Check if we have SSE3 which will let us use MOVDDUP etc. The
// instructions are no slower than UNPCKLPD but has the option to
// fold the input operand into even an unaligned memory load.
if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
if (isTargetShuffleEquivalent(Mask, {0, 0})) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v2f64;
return true;
}
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v4f32;
return true;
}
if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3})) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v4f32;
return true;
}
}
if (MaskVT.is256BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v4f64;
return true;
}
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v8f32;
return true;
}
if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3, 5, 5, 7, 7})) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v8f32;
return true;
}
}
if (MaskVT.is512BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX512() &&
"AVX512 required for 512-bit vector shuffles");
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v8f64;
return true;
}
if (isTargetShuffleEquivalent(
Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14})) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v16f32;
return true;
}
if (isTargetShuffleEquivalent(
Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15})) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v16f32;
return true;
}
}
// Attempt to match against broadcast-from-vector.
if (Subtarget.hasAVX2()) {
SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
SrcVT = DstVT = MaskVT;
Shuffle = X86ISD::VBROADCAST;
return true;
}
}
return false;
}
// Attempt to match a combined shuffle mask against supported unary immediate
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
const APInt &Zeroable,
bool AllowFloatDomain,
bool AllowIntDomain,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
unsigned InputSizeInBits = MaskVT.getSizeInBits();
unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
bool ContainsZeros =
llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
// Handle VPERMI/VPERMILPD vXi64/vXi64 patterns.
if (!ContainsZeros && MaskScalarSizeInBits == 64) {
// Check for lane crossing permutes.
if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) {
// PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).
if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) {
Shuffle = X86ISD::VPERMI;
ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64);
PermuteImm = getV4X86ShuffleImm(Mask);
return true;
}
if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) {
SmallVector<int, 4> RepeatedMask;
if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) {
Shuffle = X86ISD::VPERMI;
ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64);
PermuteImm = getV4X86ShuffleImm(RepeatedMask);
return true;
}
}
} else if (AllowFloatDomain && Subtarget.hasAVX()) {
// VPERMILPD can permute with a non-repeating shuffle.
Shuffle = X86ISD::VPERMILPI;
ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
PermuteImm = 0;
for (int i = 0, e = Mask.size(); i != e; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef)
continue;
assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index");
PermuteImm |= (M & 1) << i;
}
return true;
}
}
// Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns.
// AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
// had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) &&
!ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) {
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
// Narrow the repeated mask to create 32-bit element permutes.
SmallVector<int, 4> WordMask = RepeatedMask;
if (MaskScalarSizeInBits == 64)
scaleShuffleMask(2, RepeatedMask, WordMask);
Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
PermuteImm = getV4X86ShuffleImm(WordMask);
return true;
}
}
// Handle PSHUFLW/PSHUFHW vXi16 repeated patterns.
if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
ArrayRef<int> LoMask(Mask.data() + 0, 4);
ArrayRef<int> HiMask(Mask.data() + 4, 4);
// PSHUFLW: permute lower 4 elements only.
if (isUndefOrInRange(LoMask, 0, 4) &&
isSequentialOrUndefInRange(HiMask, 0, 4, 4)) {
Shuffle = X86ISD::PSHUFLW;
ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
PermuteImm = getV4X86ShuffleImm(LoMask);
return true;
}
// PSHUFHW: permute upper 4 elements only.
if (isUndefOrInRange(HiMask, 4, 8) &&
isSequentialOrUndefInRange(LoMask, 0, 4, 0)) {
// Offset the HiMask so that we can create the shuffle immediate.
int OffsetHiMask[4];
for (int i = 0; i != 4; ++i)
OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4);
Shuffle = X86ISD::PSHUFHW;
ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
PermuteImm = getV4X86ShuffleImm(OffsetHiMask);
return true;
}
}
}
// Attempt to match against byte/bit shifts.
// FIXME: Add 512-bit support.
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
MaskScalarSizeInBits, Mask,
0, Zeroable, Subtarget);
if (0 < ShiftAmt) {
PermuteImm = (unsigned)ShiftAmt;
return true;
}
}
return false;
}
// Attempt to match a combined unary shuffle mask against supported binary
// shuffle instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
bool AllowFloatDomain, bool AllowIntDomain,
SDValue &V1, SDValue &V2, SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
bool IsUnary) {
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) {
V2 = V1;
Shuffle = X86ISD::MOVLHPS;
ShuffleVT = MVT::v4f32;
return true;
}
if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) {
V2 = V1;
Shuffle = X86ISD::MOVHLPS;
ShuffleVT = MVT::v4f32;
return true;
}
if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() &&
(AllowFloatDomain || !Subtarget.hasSSE41())) {
std::swap(V1, V2);
Shuffle = X86ISD::MOVSD;
ShuffleVT = MaskVT;
return true;
}
if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) &&
(AllowFloatDomain || !Subtarget.hasSSE41())) {
Shuffle = X86ISD::MOVSS;
ShuffleVT = MaskVT;
return true;
}
}
// Attempt to match against either a unary or binary UNPCKL/UNPCKH shuffle.
if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) ||
(MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
(MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
(MaskVT.is512BitVector() && Subtarget.hasAVX512())) {
if (matchVectorShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL,
DAG, Subtarget)) {
ShuffleVT = MaskVT;
if (ShuffleVT.is256BitVector() && !Subtarget.hasAVX2())
ShuffleVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
return true;
}
}
return false;
}
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
const APInt &Zeroable,
bool AllowFloatDomain,
bool AllowIntDomain,
SDValue &V1, SDValue &V2, SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
// Attempt to match against PALIGNR byte rotate.
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask);
if (0 < ByteRotation) {
Shuffle = X86ISD::PALIGNR;
ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8);
PermuteImm = ByteRotation;
return true;
}
}
// Attempt to combine to X86ISD::BLENDI.
if ((NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) ||
(Subtarget.hasAVX() && MaskVT.is256BitVector()))) ||
(MaskVT == MVT::v16i16 && Subtarget.hasAVX2())) {
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end());
if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero,
BlendMask)) {
if (MaskVT == MVT::v16i16) {
// We can only use v16i16 PBLENDW if the lanes are repeated.
SmallVector<int, 8> RepeatedMask;
if (isRepeatedTargetShuffleMask(128, MaskVT, TargetMask,
RepeatedMask)) {
assert(RepeatedMask.size() == 8 &&
"Repeated mask size doesn't match!");
PermuteImm = 0;
for (int i = 0; i < 8; ++i)
if (RepeatedMask[i] >= 8)
PermuteImm |= 1 << i;
V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
Shuffle = X86ISD::BLENDI;
ShuffleVT = MaskVT;
return true;
}
} else {
// Determine a type compatible with X86ISD::BLENDI.
ShuffleVT = MaskVT;
if (Subtarget.hasAVX2()) {
if (ShuffleVT == MVT::v4i64)
ShuffleVT = MVT::v8i32;
else if (ShuffleVT == MVT::v2i64)
ShuffleVT = MVT::v4i32;
} else {
if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
ShuffleVT = MVT::v8i16;
else if (ShuffleVT == MVT::v4i64)
ShuffleVT = MVT::v4f64;
else if (ShuffleVT == MVT::v8i32)
ShuffleVT = MVT::v8f32;
}
if (!ShuffleVT.isFloatingPoint()) {
int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits();
BlendMask =
scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale);
ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale);
}
V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
PermuteImm = (unsigned)BlendMask;
Shuffle = X86ISD::BLENDI;
return true;
}
}
}
// Attempt to combine to INSERTPS.
if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
MaskVT.is128BitVector()) {
if (Zeroable.getBoolValue() &&
matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
Shuffle = X86ISD::INSERTPS;
ShuffleVT = MVT::v4f32;
return true;
}
}
// Attempt to combine to SHUFPD.
if (AllowFloatDomain && EltSizeInBits == 64 &&
((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX()) ||
(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
Shuffle = X86ISD::SHUFP;
ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64);
return true;
}
}
// Attempt to combine to SHUFPS.
if (AllowFloatDomain && EltSizeInBits == 32 &&
((MaskVT.is128BitVector() && Subtarget.hasSSE1()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX()) ||
(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
SmallVector<int, 4> RepeatedMask;
if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) {
// Match each half of the repeated mask, to determine if its just
// referencing one of the vectors, is zeroable or entirely undef.
auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) {
int M0 = RepeatedMask[Offset];
int M1 = RepeatedMask[Offset + 1];
if (isUndefInRange(RepeatedMask, Offset, 2)) {
return DAG.getUNDEF(MaskVT);
} else if (isUndefOrZeroInRange(RepeatedMask, Offset, 2)) {
S0 = (SM_SentinelUndef == M0 ? -1 : 0);
S1 = (SM_SentinelUndef == M1 ? -1 : 1);
return getZeroVector(MaskVT, Subtarget, DAG, DL);
} else if (isUndefOrInRange(M0, 0, 4) && isUndefOrInRange(M1, 0, 4)) {
S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3);
S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3);
return V1;
} else if (isUndefOrInRange(M0, 4, 8) && isUndefOrInRange(M1, 4, 8)) {
S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3);
S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3);
return V2;
}
return SDValue();
};
int ShufMask[4] = {-1, -1, -1, -1};
SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]);
SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]);
if (Lo && Hi) {
V1 = Lo;
V2 = Hi;
Shuffle = X86ISD::SHUFP;
ShuffleVT = MVT::getVectorVT(MVT::f32, MaskVT.getSizeInBits() / 32);
PermuteImm = getV4X86ShuffleImm(ShufMask);
return true;
}
}
}
return false;
}
/// \brief Combine an arbitrary chain of shuffles into a single instruction if
/// possible.
///
/// This is the leaf of the recursive combine below. When we have found some
/// chain of single-use x86 shuffle instructions and accumulated the combined
/// shuffle mask represented by them, this will try to pattern match that mask
/// into either a single instruction if there is a special purpose instruction
/// for this operation, or into a PSHUFB instruction which is a fully general
/// instruction but should only be used to replace chains over a certain depth.
static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
ArrayRef<int> BaseMask, int Depth,
bool HasVariableMask, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!");
assert((Inputs.size() == 1 || Inputs.size() == 2) &&
"Unexpected number of shuffle inputs!");
// Find the inputs that enter the chain. Note that multiple uses are OK
// here, we're not going to remove the operands we find.
bool UnaryShuffle = (Inputs.size() == 1);
SDValue V1 = peekThroughBitcasts(Inputs[0]);
SDValue V2 = (UnaryShuffle ? DAG.getUNDEF(V1.getValueType())
: peekThroughBitcasts(Inputs[1]));
MVT VT1 = V1.getSimpleValueType();
MVT VT2 = V2.getSimpleValueType();
MVT RootVT = Root.getSimpleValueType();
assert(VT1.getSizeInBits() == RootVT.getSizeInBits() &&
VT2.getSizeInBits() == RootVT.getSizeInBits() &&
"Vector size mismatch");
SDLoc DL(Root);
SDValue Res;
unsigned NumBaseMaskElts = BaseMask.size();
if (NumBaseMaskElts == 1) {
assert(BaseMask[0] == 0 && "Invalid shuffle index found!");
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1),
/*AddTo*/ true);
return true;
}
unsigned RootSizeInBits = RootVT.getSizeInBits();
unsigned NumRootElts = RootVT.getVectorNumElements();
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
(RootVT.is256BitVector() && !Subtarget.hasAVX2());
// Don't combine if we are a AVX512/EVEX target and the mask element size
// is different from the root element size - this would prevent writemasks
// from being reused.
// TODO - this currently prevents all lane shuffles from occurring.
// TODO - check for writemasks usage instead of always preventing combining.
// TODO - attempt to narrow Mask back to writemask size.
bool IsEVEXShuffle =
RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits))
return false;
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
// Handle 128-bit lane shuffles of 256-bit vectors.
// TODO - this should support binary shuffles.
if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
return false; // Nothing to do!
MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
unsigned PermMask = 0;
PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
Res = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
DAG.getUNDEF(ShuffleVT),
DAG.getConstant(PermMask, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
// For masks that have been widened to 128-bit elements or more,
// narrow back down to 64-bit elements.
SmallVector<int, 64> Mask;
if (BaseMaskEltSizeInBits > 64) {
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
int MaskScale = BaseMaskEltSizeInBits / 64;
scaleShuffleMask(MaskScale, BaseMask, Mask);
} else {
Mask = SmallVector<int, 64>(BaseMask.begin(), BaseMask.end());
}
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts;
// Determine the effective mask value type.
FloatDomain &= (32 <= MaskEltSizeInBits);
MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits)
: MVT::getIntegerVT(MaskEltSizeInBits);
MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts);
// Only allow legal mask types.
if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT))
return false;
// Attempt to match the mask against known shuffle patterns.
MVT ShuffleSrcVT, ShuffleVT;
unsigned Shuffle, PermuteImm;
// Which shuffle domains are permitted?
// Permit domain crossing at higher combine depths.
bool AllowFloatDomain = FloatDomain || (Depth > 3);
bool AllowIntDomain = (!FloatDomain || (Depth > 3)) &&
(!MaskVT.is256BitVector() || Subtarget.hasAVX2());
// Determine zeroable mask elements.
APInt Zeroable(NumMaskElts, 0);
for (unsigned i = 0; i != NumMaskElts; ++i)
if (isUndefOrZero(Mask[i]))
Zeroable.setBit(i);
if (UnaryShuffle) {
// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
// directly if we don't shuffle the lower element and we shuffle the upper
// (zero) elements within themselves.
if (V1.getOpcode() == X86ISD::VZEXT_LOAD &&
(V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) {
unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits;
ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1),
/*AddTo*/ true);
return true;
}
}
if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
return false; // AVX512 Writemask clash.
Res = DAG.getBitcast(ShuffleSrcVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, Subtarget, Shuffle,
ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
return false; // AVX512 Writemask clash.
Res = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
DAG.getConstant(PermuteImm, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
}
if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT,
UnaryShuffle)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
return false; // AVX512 Writemask clash.
V1 = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ShuffleVT, V2);
DCI.AddToWorklist(V2.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, V1, V2, DL, DAG,
Subtarget, Shuffle, ShuffleVT,
PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
return false; // AVX512 Writemask clash.
V1 = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ShuffleVT, V2);
DCI.AddToWorklist(V2.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
DAG.getConstant(PermuteImm, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
// Typically from here on, we need an integer version of MaskVT.
MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits);
IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts);
// Annoyingly, SSE4A instructions don't map into the above match helpers.
if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
uint64_t BitLen, BitIdx;
if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
Zeroable)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
return false; // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
DCI.AddToWorklist(V1.getNode());
Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
return false; // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(IntMaskVT, V2);
DCI.AddToWorklist(V2.getNode());
Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
}
// Don't try to re-form single instruction chains under any circumstances now
// that we've done encoding canonicalization for them.
if (Depth < 2)
return false;
bool MaskContainsZeros =
any_of(Mask, [](int M) { return M == SM_SentinelZero; });
if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) {
// If we have a single input lane-crossing shuffle then lower to VPERMV.
if (UnaryShuffle && (Depth >= 3 || HasVariableMask) && !MaskContainsZeros &&
((Subtarget.hasAVX2() &&
(MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
(Subtarget.hasAVX512() &&
(MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
(Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
(Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
(Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
(Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
DCI.AddToWorklist(VPermMask.getNode());
Res = DAG.getBitcast(MaskVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res);
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
// Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero
// vector as the second source.
if (UnaryShuffle && (Depth >= 3 || HasVariableMask) &&
((Subtarget.hasAVX512() &&
(MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
(Subtarget.hasVLX() &&
(MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
(Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
(Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
(Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
(Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
// Adjust shuffle mask - replace SM_SentinelZero with second source index.
for (unsigned i = 0; i != NumMaskElts; ++i)
if (Mask[i] == SM_SentinelZero)
Mask[i] = NumMaskElts + i;
SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
DCI.AddToWorklist(VPermMask.getNode());
Res = DAG.getBitcast(MaskVT, V1);
DCI.AddToWorklist(Res.getNode());
SDValue Zero = getZeroVector(MaskVT, Subtarget, DAG, DL);
DCI.AddToWorklist(Zero.getNode());
Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, Res, VPermMask, Zero);
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
// If we have a dual input lane-crossing shuffle then lower to VPERMV3.
if ((Depth >= 3 || HasVariableMask) && !MaskContainsZeros &&
((Subtarget.hasAVX512() &&
(MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
(Subtarget.hasVLX() &&
(MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
(Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
(Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
(Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
(Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
DCI.AddToWorklist(VPermMask.getNode());
V1 = DAG.getBitcast(MaskVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(MaskVT, V2);
DCI.AddToWorklist(V2.getNode());
Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2);
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
return false;
}
// See if we can combine a single input shuffle with zeros to a bit-mask,
// which is much simpler than any shuffle.
if (UnaryShuffle && MaskContainsZeros && (Depth >= 3 || HasVariableMask) &&
isSequentialOrUndefOrZeroInRange(Mask, 0, NumMaskElts, 0) &&
DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) {
APInt Zero = APInt::getNullValue(MaskEltSizeInBits);
APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits);
APInt UndefElts(NumMaskElts, 0);
SmallVector<APInt, 64> EltBits(NumMaskElts, Zero);
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef) {
UndefElts.setBit(i);
continue;
}
if (M == SM_SentinelZero)
continue;
EltBits[i] = AllOnes;
}
SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL);
DCI.AddToWorklist(BitMask.getNode());
Res = DAG.getBitcast(MaskVT, V1);
DCI.AddToWorklist(Res.getNode());
unsigned AndOpcode =
FloatDomain ? unsigned(X86ISD::FAND) : unsigned(ISD::AND);
Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask);
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
// If we have a single input shuffle with different shuffle patterns in the
// the 128-bit lanes use the variable mask to VPERMILPS.
// TODO Combine other mask types at higher depths.
if (UnaryShuffle && HasVariableMask && !MaskContainsZeros &&
((MaskVT == MVT::v8f32 && Subtarget.hasAVX()) ||
(MaskVT == MVT::v16f32 && Subtarget.hasAVX512()))) {
SmallVector<SDValue, 16> VPermIdx;
for (int M : Mask) {
SDValue Idx =
M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32);
VPermIdx.push_back(Idx);
}
SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx);
DCI.AddToWorklist(VPermMask.getNode());
Res = DAG.getBitcast(MaskVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask);
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
// With XOP, binary shuffles of 128/256-bit floating point vectors can combine
// to VPERMIL2PD/VPERMIL2PS.
if ((Depth >= 3 || HasVariableMask) && Subtarget.hasXOP() &&
(MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v4f32 ||
MaskVT == MVT::v8f32)) {
// VPERMIL2 Operation.
// Bits[3] - Match Bit.
// Bits[2:1] - (Per Lane) PD Shuffle Mask.
// Bits[2:0] - (Per Lane) PS Shuffle Mask.
unsigned NumLanes = MaskVT.getSizeInBits() / 128;
unsigned NumEltsPerLane = NumMaskElts / NumLanes;
SmallVector<int, 8> VPerm2Idx;
unsigned M2ZImm = 0;
for (int M : Mask) {
if (M == SM_SentinelUndef) {
VPerm2Idx.push_back(-1);
continue;
}
if (M == SM_SentinelZero) {
M2ZImm = 2;
VPerm2Idx.push_back(8);
continue;
}
int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane);
Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index);
VPerm2Idx.push_back(Index);
}
V1 = DAG.getBitcast(MaskVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(MaskVT, V2);
DCI.AddToWorklist(V2.getNode());
SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true);
DCI.AddToWorklist(VPerm2MaskOp.getNode());
Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
DAG.getConstant(M2ZImm, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
// If we have 3 or more shuffle instructions or a chain involving a variable
// mask, we can replace them with a single PSHUFB instruction profitably.
// Intel's manuals suggest only using PSHUFB if doing so replacing 5
// instructions, but in practice PSHUFB tends to be *very* fast so we're
// more aggressive.
if (UnaryShuffle && (Depth >= 3 || HasVariableMask) &&
((RootVT.is128BitVector() && Subtarget.hasSSSE3()) ||
(RootVT.is256BitVector() && Subtarget.hasAVX2()) ||
(RootVT.is512BitVector() && Subtarget.hasBWI()))) {
SmallVector<SDValue, 16> PSHUFBMask;
int NumBytes = RootVT.getSizeInBits() / 8;
int Ratio = NumBytes / NumMaskElts;
for (int i = 0; i < NumBytes; ++i) {
int M = Mask[i / Ratio];
if (M == SM_SentinelUndef) {
PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
continue;
}
if (M == SM_SentinelZero) {
PSHUFBMask.push_back(DAG.getConstant(255, DL, MVT::i8));
continue;
}
M = Ratio * M + i % Ratio;
assert ((M / 16) == (i / 16) && "Lane crossing detected");
PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8));
}
MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
Res = DAG.getBitcast(ByteVT, V1);
DCI.AddToWorklist(Res.getNode());
SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask);
DCI.AddToWorklist(PSHUFBMaskOp.getNode());
Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp);
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
// With XOP, if we have a 128-bit binary input shuffle we can always combine
// to VPPERM. We match the depth requirement of PSHUFB - VPPERM is never
// slower than PSHUFB on targets that support both.
if ((Depth >= 3 || HasVariableMask) && RootVT.is128BitVector() &&
Subtarget.hasXOP()) {
// VPPERM Mask Operation
// Bits[4:0] - Byte Index (0 - 31)
// Bits[7:5] - Permute Operation (0 - Source byte, 4 - ZERO)
SmallVector<SDValue, 16> VPPERMMask;
int NumBytes = 16;
int Ratio = NumBytes / NumMaskElts;
for (int i = 0; i < NumBytes; ++i) {
int M = Mask[i / Ratio];
if (M == SM_SentinelUndef) {
VPPERMMask.push_back(DAG.getUNDEF(MVT::i8));
continue;
}
if (M == SM_SentinelZero) {
VPPERMMask.push_back(DAG.getConstant(128, DL, MVT::i8));
continue;
}
M = Ratio * M + i % Ratio;
VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8));
}
MVT ByteVT = MVT::v16i8;
V1 = DAG.getBitcast(ByteVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ByteVT, V2);
DCI.AddToWorklist(V2.getNode());
SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask);
DCI.AddToWorklist(VPPERMMaskOp.getNode());
Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp);
DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/*AddTo*/ true);
return true;
}
// Failed to find any combines.
return false;
}
// Attempt to constant fold all of the constant source ops.
// Returns true if the entire shuffle is folded to a constant.
// TODO: Extend this to merge multiple constant Ops and update the mask.
static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
ArrayRef<int> Mask, SDValue Root,
bool HasVariableMask, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
MVT VT = Root.getSimpleValueType();
unsigned SizeInBits = VT.getSizeInBits();
unsigned NumMaskElts = Mask.size();
unsigned MaskSizeInBits = SizeInBits / NumMaskElts;
unsigned NumOps = Ops.size();
// Extract constant bits from each source op.
bool OneUseConstantOp = false;
SmallVector<APInt, 16> UndefEltsOps(NumOps);
SmallVector<SmallVector<APInt, 16>, 16> RawBitsOps(NumOps);
for (unsigned i = 0; i != NumOps; ++i) {
SDValue SrcOp = Ops[i];
OneUseConstantOp |= SrcOp.hasOneUse();
if (!getTargetConstantBitsFromNode(SrcOp, MaskSizeInBits, UndefEltsOps[i],
RawBitsOps[i]))
return false;
}
// Only fold if at least one of the constants is only used once or
// the combined shuffle has included a variable mask shuffle, this
// is to avoid constant pool bloat.
if (!OneUseConstantOp && !HasVariableMask)
return false;
// Shuffle the constant bits according to the mask.
APInt UndefElts(NumMaskElts, 0);
APInt ZeroElts(NumMaskElts, 0);
APInt ConstantElts(NumMaskElts, 0);
SmallVector<APInt, 8> ConstantBitData(NumMaskElts,
APInt::getNullValue(MaskSizeInBits));
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef) {
UndefElts.setBit(i);
continue;
} else if (M == SM_SentinelZero) {
ZeroElts.setBit(i);
continue;
}
assert(0 <= M && M < (int)(NumMaskElts * NumOps));
unsigned SrcOpIdx = (unsigned)M / NumMaskElts;
unsigned SrcMaskIdx = (unsigned)M % NumMaskElts;
auto &SrcUndefElts = UndefEltsOps[SrcOpIdx];
if (SrcUndefElts[SrcMaskIdx]) {
UndefElts.setBit(i);
continue;
}
auto &SrcEltBits = RawBitsOps[SrcOpIdx];
APInt &Bits = SrcEltBits[SrcMaskIdx];
if (!Bits) {
ZeroElts.setBit(i);
continue;
}
ConstantElts.setBit(i);
ConstantBitData[i] = Bits;
}
assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue());
// Create the constant data.
MVT MaskSVT;
if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64))
MaskSVT = MVT::getFloatingPointVT(MaskSizeInBits);
else
MaskSVT = MVT::getIntegerVT(MaskSizeInBits);
MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts);
SDLoc DL(Root);
SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL);
DCI.AddToWorklist(CstOp.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(VT, CstOp));
return true;
}
/// \brief Fully generic combining of x86 shuffle instructions.
///
/// This should be the last combine run over the x86 shuffle instructions. Once
/// they have been fully optimized, this will recursively consider all chains
/// of single-use shuffle instructions, build a generic model of the cumulative
/// shuffle operation, and check for simpler instructions which implement this
/// operation. We use this primarily for two purposes:
///
/// 1) Collapse generic shuffles to specialized single instructions when
/// equivalent. In most cases, this is just an encoding size win, but
/// sometimes we will collapse multiple generic shuffles into a single
/// special-purpose shuffle.
/// 2) Look for sequences of shuffle instructions with 3 or more total
/// instructions, and replace them with the slightly more expensive SSSE3
/// PSHUFB instruction if available. We do this as the last combining step
/// to ensure we avoid using PSHUFB if we can implement the shuffle with
/// a suitable short sequence of other instructions. The PSHUFB will either
/// use a register or have to read from memory and so is slightly (but only
/// slightly) more expensive than the other shuffle instructions.
///
/// Because this is inherently a quadratic operation (for each shuffle in
/// a chain, we recurse up the chain), the depth is limited to 8 instructions.
/// This should never be an issue in practice as the shuffle lowering doesn't
/// produce sequences of more than 8 instructions.
///
/// FIXME: We will currently miss some cases where the redundant shuffling
/// would simplify under the threshold for PSHUFB formation because of
/// combine-ordering. To fix this, we should do the redundant instruction
/// combining in this recursive walk.
static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
int SrcOpIndex, SDValue Root,
ArrayRef<int> RootMask,
ArrayRef<const SDNode*> SrcNodes,
int Depth, bool HasVariableMask,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
if (Depth > 8)
return false;
// Directly rip through bitcasts to find the underlying operand.
SDValue Op = SrcOps[SrcOpIndex];
Op = peekThroughOneUseBitcasts(Op);
MVT VT = Op.getSimpleValueType();
if (!VT.isVector())
return false; // Bail if we hit a non-vector.
assert(Root.getSimpleValueType().isVector() &&
"Shuffles operate on vector types!");
assert(VT.getSizeInBits() == Root.getSimpleValueType().getSizeInBits() &&
"Can only combine shuffles of the same vector register size.");
// Extract target shuffle mask and resolve sentinels and inputs.
SmallVector<int, 64> OpMask;
SmallVector<SDValue, 2> OpInputs;
if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
return false;
assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue());
SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue());
// Add the inputs to the Ops list, avoiding duplicates.
SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
int InputIdx0 = -1, InputIdx1 = -1;
for (int i = 0, e = Ops.size(); i < e; ++i) {
SDValue BC = peekThroughBitcasts(Ops[i]);
if (Input0 && BC == peekThroughBitcasts(Input0))
InputIdx0 = i;
if (Input1 && BC == peekThroughBitcasts(Input1))
InputIdx1 = i;
}
if (Input0 && InputIdx0 < 0) {
InputIdx0 = SrcOpIndex;
Ops[SrcOpIndex] = Input0;
}
if (Input1 && InputIdx1 < 0) {
InputIdx1 = Ops.size();
Ops.push_back(Input1);
}
assert(((RootMask.size() > OpMask.size() &&
RootMask.size() % OpMask.size() == 0) ||
(OpMask.size() > RootMask.size() &&
OpMask.size() % RootMask.size() == 0) ||
OpMask.size() == RootMask.size()) &&
"The smaller number of elements must divide the larger.");
// This function can be performance-critical, so we rely on the power-of-2
// knowledge that we have about the mask sizes to replace div/rem ops with
// bit-masks and shifts.
assert(isPowerOf2_32(RootMask.size()) && "Non-power-of-2 shuffle mask sizes");
assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes");
unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size());
unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size());
unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size());
unsigned RootRatio = std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2);
unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2);
assert((RootRatio == 1 || OpRatio == 1) &&
"Must not have a ratio for both incoming and op masks!");
assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes");
assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes");
assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes");
unsigned RootRatioLog2 = countTrailingZeros(RootRatio);
unsigned OpRatioLog2 = countTrailingZeros(OpRatio);
SmallVector<int, 64> Mask(MaskWidth, SM_SentinelUndef);
// Merge this shuffle operation's mask into our accumulated mask. Note that
// this shuffle's mask will be the first applied to the input, followed by the
// root mask to get us all the way to the root value arrangement. The reason
// for this order is that we are recursing up the operation chain.
for (unsigned i = 0; i < MaskWidth; ++i) {
unsigned RootIdx = i >> RootRatioLog2;
if (RootMask[RootIdx] < 0) {
// This is a zero or undef lane, we're done.
Mask[i] = RootMask[RootIdx];
continue;
}
unsigned RootMaskedIdx =
RootRatio == 1
? RootMask[RootIdx]
: (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1));
// Just insert the scaled root mask value if it references an input other
// than the SrcOp we're currently inserting.
if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) ||
(((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
Mask[i] = RootMaskedIdx;
continue;
}
RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1);
unsigned OpIdx = RootMaskedIdx >> OpRatioLog2;
if (OpMask[OpIdx] < 0) {
// The incoming lanes are zero or undef, it doesn't matter which ones we
// are using.
Mask[i] = OpMask[OpIdx];
continue;
}
// Ok, we have non-zero lanes, map them through to one of the Op's inputs.
unsigned OpMaskedIdx =
OpRatio == 1
? OpMask[OpIdx]
: (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1));
OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
if (OpMask[OpIdx] < (int)OpMask.size()) {
assert(0 <= InputIdx0 && "Unknown target shuffle input");
OpMaskedIdx += InputIdx0 * MaskWidth;
} else {
assert(0 <= InputIdx1 && "Unknown target shuffle input");
OpMaskedIdx += InputIdx1 * MaskWidth;
}
Mask[i] = OpMaskedIdx;
}
// Handle the all undef/zero cases early.
if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) {
DCI.CombineTo(Root.getNode(), DAG.getUNDEF(Root.getValueType()));
return true;
}
if (all_of(Mask, [](int Idx) { return Idx < 0; })) {
// TODO - should we handle the mixed zero/undef case as well? Just returning
// a zero mask will lose information on undef elements possibly reducing
// future combine possibilities.
DCI.CombineTo(Root.getNode(), getZeroVector(Root.getSimpleValueType(),
Subtarget, DAG, SDLoc(Root)));
return true;
}
// Remove unused shuffle source ops.
resolveTargetShuffleInputsAndMask(Ops, Mask);
assert(!Ops.empty() && "Shuffle with no inputs detected");
HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode());
// Update the list of shuffle nodes that have been combined so far.
SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes.begin(),
SrcNodes.end());
CombinedNodes.push_back(Op.getNode());
// See if we can recurse into each shuffle source op (if it's a target
// shuffle). The source op should only be combined if it either has a
// single use (i.e. current Op) or all its users have already been combined.
for (int i = 0, e = Ops.size(); i < e; ++i)
if (Ops[i].getNode()->hasOneUse() ||
SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
if (combineX86ShufflesRecursively(Ops, i, Root, Mask, CombinedNodes,
Depth + 1, HasVariableMask, DAG, DCI,
Subtarget))
return true;
// Attempt to constant fold all of the constant source ops.
if (combineX86ShufflesConstants(Ops, Mask, Root, HasVariableMask, DAG, DCI,
Subtarget))
return true;
// We can only combine unary and binary shuffle mask cases.
if (Ops.size() > 2)
return false;
// Minor canonicalization of the accumulated shuffle mask to make it easier
// to match below. All this does is detect masks with sequential pairs of
// elements, and shrink them to the half-width mask. It does this in a loop
// so it will reduce the size of the mask to the minimal width mask which
// performs an equivalent shuffle.
SmallVector<int, 64> WidenedMask;
while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
Mask = std::move(WidenedMask);
}
// Canonicalization of binary shuffle masks to improve pattern matching by
// commuting the inputs.
if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) {
ShuffleVectorSDNode::commuteMask(Mask);
std::swap(Ops[0], Ops[1]);
}
return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG,
DCI, Subtarget);
}
/// \brief Get the PSHUF-style mask from PSHUF node.
///
/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
/// PSHUF-style masks that can be reused with such instructions.
static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
MVT VT = N.getSimpleValueType();
SmallVector<int, 4> Mask;
SmallVector<SDValue, 2> Ops;
bool IsUnary;
bool HaveMask =
getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask, IsUnary);
(void)HaveMask;
assert(HaveMask);
// If we have more than 128-bits, only the low 128-bits of shuffle mask
// matter. Check that the upper masks are repeats and remove them.
if (VT.getSizeInBits() > 128) {
int LaneElts = 128 / VT.getScalarSizeInBits();
#ifndef NDEBUG
for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i)
for (int j = 0; j < LaneElts; ++j)
assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) &&
"Mask doesn't repeat in high 128-bit lanes!");
#endif
Mask.resize(LaneElts);
}
switch (N.getOpcode()) {
case X86ISD::PSHUFD:
return Mask;
case X86ISD::PSHUFLW:
Mask.resize(4);
return Mask;
case X86ISD::PSHUFHW:
Mask.erase(Mask.begin(), Mask.begin() + 4);
for (int &M : Mask)
M -= 4;
return Mask;
default:
llvm_unreachable("No valid shuffle instruction found!");
}
}
/// \brief Search for a combinable shuffle across a chain ending in pshufd.
///
/// We walk up the chain and look for a combinable shuffle, skipping over
/// shuffles that we could hoist this shuffle's transformation past without
/// altering anything.
static SDValue
combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(N.getOpcode() == X86ISD::PSHUFD &&
"Called with something other than an x86 128-bit half shuffle!");
SDLoc DL(N);
// Walk up a single-use chain looking for a combinable shuffle. Keep a stack
// of the shuffles in the chain so that we can form a fresh chain to replace
// this one.
SmallVector<SDValue, 8> Chain;
SDValue V = N.getOperand(0);
for (; V.hasOneUse(); V = V.getOperand(0)) {
switch (V.getOpcode()) {
default:
return SDValue(); // Nothing combined!
case ISD::BITCAST:
// Skip bitcasts as we always know the type for the target specific
// instructions.
continue;
case X86ISD::PSHUFD:
// Found another dword shuffle.
break;
case X86ISD::PSHUFLW:
// Check that the low words (being shuffled) are the identity in the
// dword shuffle, and the high words are self-contained.
if (Mask[0] != 0 || Mask[1] != 1 ||
!(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
return SDValue();
Chain.push_back(V);
continue;
case X86ISD::PSHUFHW:
// Check that the high words (being shuffled) are the identity in the
// dword shuffle, and the low words are self-contained.
if (Mask[2] != 2 || Mask[3] != 3 ||
!(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
return SDValue();
Chain.push_back(V);
continue;
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
// shuffle into a preceding word shuffle.
if (V.getSimpleValueType().getVectorElementType() != MVT::i8 &&
V.getSimpleValueType().getVectorElementType() != MVT::i16)
return SDValue();
// Search for a half-shuffle which we can combine with.
unsigned CombineOp =
V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
if (V.getOperand(0) != V.getOperand(1) ||
!V->isOnlyUserOf(V.getOperand(0).getNode()))
return SDValue();
Chain.push_back(V);
V = V.getOperand(0);
do {
switch (V.getOpcode()) {
default:
return SDValue(); // Nothing to combine.
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
if (V.getOpcode() == CombineOp)
break;
Chain.push_back(V);
LLVM_FALLTHROUGH;
case ISD::BITCAST:
V = V.getOperand(0);
continue;
}
break;
} while (V.hasOneUse());
break;
}
// Break out of the loop if we break out of the switch.
break;
}
if (!V.hasOneUse())
// We fell out of the loop without finding a viable combining instruction.
return SDValue();
// Merge this node's mask and our incoming mask.
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
for (int &M : Mask)
M = VMask[M];
V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
// Rebuild the chain around this new shuffle.
while (!Chain.empty()) {
SDValue W = Chain.pop_back_val();
if (V.getValueType() != W.getOperand(0).getValueType())
V = DAG.getBitcast(W.getOperand(0).getValueType(), V);
switch (W.getOpcode()) {
default:
llvm_unreachable("Only PSHUF and UNPCK instructions get here!");
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V);
break;
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1));
break;
}
}
if (V.getValueType() != N.getValueType())
V = DAG.getBitcast(N.getValueType(), V);
// Return the new chain to replace N.
return V;
}
/// \brief Search for a combinable shuffle across a chain ending in pshuflw or
/// pshufhw.
///
/// We walk up the chain, skipping shuffles of the other half and looking
/// through shuffles which switch halves trying to find a shuffle of the same
/// pair of dwords.
static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
assert(
(N.getOpcode() == X86ISD::PSHUFLW || N.getOpcode() == X86ISD::PSHUFHW) &&
"Called with something other than an x86 128-bit half shuffle!");
SDLoc DL(N);
unsigned CombineOpcode = N.getOpcode();
// Walk up a single-use chain looking for a combinable shuffle.
SDValue V = N.getOperand(0);
for (; V.hasOneUse(); V = V.getOperand(0)) {
switch (V.getOpcode()) {
default:
return false; // Nothing combined!
case ISD::BITCAST:
// Skip bitcasts as we always know the type for the target specific
// instructions.
continue;
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
if (V.getOpcode() == CombineOpcode)
break;
// Other-half shuffles are no-ops.
continue;
}
// Break out of the loop if we break out of the switch.
break;
}
if (!V.hasOneUse())
// We fell out of the loop without finding a viable combining instruction.
return false;
// Combine away the bottom node as its shuffle will be accumulated into
// a preceding shuffle.
DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
// Record the old value.
SDValue Old = V;
// Merge this node's mask and our incoming mask (adjusted to account for all
// the pshufd instructions encountered).
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
for (int &M : Mask)
M = VMask[M];
V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
// Check that the shuffles didn't cancel each other out. If not, we need to
// combine to the new one.
if (Old != V)
// Replace the combinable shuffle with the combined one, updating all users
// so that we re-evaluate the chain here.
DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
return true;
}
/// \brief Try to combine x86 target specific shuffles.
static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
MVT VT = N.getSimpleValueType();
SmallVector<int, 4> Mask;
unsigned Opcode = N.getOpcode();
switch (Opcode) {
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
Mask = getPSHUFShuffleMask(N);
assert(Mask.size() == 4);
break;
case X86ISD::UNPCKL: {
auto Op0 = N.getOperand(0);
auto Op1 = N.getOperand(1);
unsigned Opcode0 = Op0.getOpcode();
unsigned Opcode1 = Op1.getOpcode();
// Combine X86ISD::UNPCKL with 2 X86ISD::FHADD inputs into a single
// X86ISD::FHADD. This is generated by UINT_TO_FP v2f64 scalarization.
// TODO: Add other horizontal operations as required.
if (VT == MVT::v2f64 && Opcode0 == Opcode1 && Opcode0 == X86ISD::FHADD)
return DAG.getNode(Opcode0, DL, VT, Op0.getOperand(0), Op1.getOperand(0));
// Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
// which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
// moves upper half elements into the lower half part. For example:
//
// t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1,
// undef:v16i8
// t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2
//
// will be combined to:
//
// t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1
// This is only for 128-bit vectors. From SSE4.1 onward this combine may not
// happen due to advanced instructions.
if (!VT.is128BitVector())
return SDValue();
if (Op0.isUndef() && Opcode1 == ISD::VECTOR_SHUFFLE) {
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();
unsigned NumElts = VT.getVectorNumElements();
SmallVector<int, 8> ExpectedMask(NumElts, -1);
std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2,
NumElts / 2);
auto ShufOp = Op1.getOperand(0);
if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask))
return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp);
}
return SDValue();
}
case X86ISD::BLENDI: {
SDValue V0 = N->getOperand(0);
SDValue V1 = N->getOperand(1);
assert(VT == V0.getSimpleValueType() && VT == V1.getSimpleValueType() &&
"Unexpected input vector types");
// Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
// operands and changing the mask to 1. This saves us a bunch of
// pattern-matching possibilities related to scalar math ops in SSE/AVX.
// x86InstrInfo knows how to commute this back after instruction selection
// if it would help register allocation.
// TODO: If optimizing for size or a processor that doesn't suffer from
// partial register update stalls, this should be transformed into a MOVSD
// instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
if (VT == MVT::v2f64)
if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
}
return SDValue();
}
case X86ISD::MOVSD:
case X86ISD::MOVSS: {
SDValue V0 = peekThroughBitcasts(N->getOperand(0));
SDValue V1 = peekThroughBitcasts(N->getOperand(1));
bool isZero0 = ISD::isBuildVectorAllZeros(V0.getNode());
bool isZero1 = ISD::isBuildVectorAllZeros(V1.getNode());
if (isZero0 && isZero1)
return SDValue();
// We often lower to MOVSD/MOVSS from integer as well as native float
// types; remove unnecessary domain-crossing bitcasts if we can to make it
// easier to combine shuffles later on. We've already accounted for the
// domain switching cost when we decided to lower with it.
bool isFloat = VT.isFloatingPoint();
bool isFloat0 = V0.getSimpleValueType().isFloatingPoint();
bool isFloat1 = V1.getSimpleValueType().isFloatingPoint();
if ((isFloat != isFloat0 || isZero0) && (isFloat != isFloat1 || isZero1)) {
MVT NewVT = isFloat ? (X86ISD::MOVSD == Opcode ? MVT::v2i64 : MVT::v4i32)
: (X86ISD::MOVSD == Opcode ? MVT::v2f64 : MVT::v4f32);
V0 = DAG.getBitcast(NewVT, V0);
V1 = DAG.getBitcast(NewVT, V1);
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, NewVT, V0, V1));
}
return SDValue();
}
case X86ISD::INSERTPS: {
assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32");
SDValue Op0 = N.getOperand(0);
SDValue Op1 = N.getOperand(1);
SDValue Op2 = N.getOperand(2);
unsigned InsertPSMask = cast<ConstantSDNode>(Op2)->getZExtValue();
unsigned SrcIdx = (InsertPSMask >> 6) & 0x3;
unsigned DstIdx = (InsertPSMask >> 4) & 0x3;
unsigned ZeroMask = InsertPSMask & 0xF;
// If we zero out all elements from Op0 then we don't need to reference it.
if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef())
return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1,
DAG.getConstant(InsertPSMask, DL, MVT::i8));
// If we zero out the element from Op1 then we don't need to reference it.
if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef())
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
DAG.getConstant(InsertPSMask, DL, MVT::i8));
// Attempt to merge insertps Op1 with an inner target shuffle node.
SmallVector<int, 8> TargetMask1;
SmallVector<SDValue, 2> Ops1;
if (setTargetShuffleZeroElements(Op1, TargetMask1, Ops1)) {
int M = TargetMask1[SrcIdx];
if (isUndefOrZero(M)) {
// Zero/UNDEF insertion - zero out element and remove dependency.
InsertPSMask |= (1u << DstIdx);
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
DAG.getConstant(InsertPSMask, DL, MVT::i8));
}
// Update insertps mask srcidx and reference the source input directly.
assert(0 <= M && M < 8 && "Shuffle index out of range");
InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6);
Op1 = Ops1[M < 4 ? 0 : 1];
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
DAG.getConstant(InsertPSMask, DL, MVT::i8));
}
// Attempt to merge insertps Op0 with an inner target shuffle node.
SmallVector<int, 8> TargetMask0;
SmallVector<SDValue, 2> Ops0;
if (!setTargetShuffleZeroElements(Op0, TargetMask0, Ops0))
return SDValue();
bool Updated = false;
bool UseInput00 = false;
bool UseInput01 = false;
for (int i = 0; i != 4; ++i) {
int M = TargetMask0[i];
if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) {
// No change if element is already zero or the inserted element.
continue;
} else if (isUndefOrZero(M)) {
// If the target mask is undef/zero then we must zero the element.
InsertPSMask |= (1u << i);
Updated = true;
continue;
}
// The input vector element must be inline.
if (M != i && M != (i + 4))
return SDValue();
// Determine which inputs of the target shuffle we're using.
UseInput00 |= (0 <= M && M < 4);
UseInput01 |= (4 <= M);
}
// If we're not using both inputs of the target shuffle then use the
// referenced input directly.
if (UseInput00 && !UseInput01) {
Updated = true;
Op0 = Ops0[0];
} else if (!UseInput00 && UseInput01) {
Updated = true;
Op0 = Ops0[1];
}
if (Updated)
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
DAG.getConstant(InsertPSMask, DL, MVT::i8));
return SDValue();
}
default:
return SDValue();
}
// Nuke no-op shuffles that show up after combining.
if (isNoopShuffleMask(Mask))
return DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
// Look for simplifications involving one or two shuffle instructions.
SDValue V = N.getOperand(0);
switch (N.getOpcode()) {
default:
break;
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");
if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
return SDValue(); // We combined away this shuffle, so we're done.
// See if this reduces to a PSHUFD which is no more expensive and can
// combine with more operations. Note that it has to at least flip the
// dwords as otherwise it would have been removed as a no-op.
if (makeArrayRef(Mask).equals({2, 3, 0, 1})) {
int DMask[] = {0, 1, 2, 3};
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
DMask[DOffset + 0] = DOffset + 1;
DMask[DOffset + 1] = DOffset + 0;
MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
V = DAG.getBitcast(DVT, V);
DCI.AddToWorklist(V.getNode());
V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V,
getV4X86ShuffleImm8ForMask(DMask, DL, DAG));
DCI.AddToWorklist(V.getNode());
return DAG.getBitcast(VT, V);
}
// Look for shuffle patterns which can be implemented as a single unpack.
// FIXME: This doesn't handle the location of the PSHUFD generically, and
// only works when we have a PSHUFD followed by two half-shuffles.
if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
(V.getOpcode() == X86ISD::PSHUFLW ||
V.getOpcode() == X86ISD::PSHUFHW) &&
V.getOpcode() != N.getOpcode() &&
V.hasOneUse()) {
SDValue D = peekThroughOneUseBitcasts(V.getOperand(0));
if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
int WordMask[8];
for (int i = 0; i < 4; ++i) {
WordMask[i + NOffset] = Mask[i] + NOffset;
WordMask[i + VOffset] = VMask[i] + VOffset;
}
// Map the word mask through the DWord mask.
int MappedMask[8];
for (int i = 0; i < 8; ++i)
MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
// We can replace all three shuffles with an unpack.
V = DAG.getBitcast(VT, D.getOperand(0));
DCI.AddToWorklist(V.getNode());
return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
: X86ISD::UNPCKH,
DL, VT, V, V);
}
}
}
break;
case X86ISD::PSHUFD:
if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG))
return NewN;
break;
}
return SDValue();
}
/// Returns true iff the shuffle node \p N can be replaced with ADDSUB
/// operation. If true is returned then the operands of ADDSUB operation
/// are written to the parameters \p Opnd0 and \p Opnd1.
///
/// We combine shuffle to ADDSUB directly on the abstract vector shuffle nodes
/// so it is easier to generically match. We also insert dummy vector shuffle
/// nodes for the operands which explicitly discard the lanes which are unused
/// by this operation to try to flow through the rest of the combiner
/// the fact that they're unused.
static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget,
SDValue &Opnd0, SDValue &Opnd1) {
EVT VT = N->getValueType(0);
if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
(!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
(!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
return false;
// We only handle target-independent shuffles.
// FIXME: It would be easy and harmless to use the target shuffle mask
// extraction tool to support more.
if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
return false;
ArrayRef<int> OrigMask = cast<ShuffleVectorSDNode>(N)->getMask();
SmallVector<int, 16> Mask(OrigMask.begin(), OrigMask.end());
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
// We require the first shuffle operand to be the FSUB node, and the second to
// be the FADD node.
if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) {
ShuffleVectorSDNode::commuteMask(Mask);
std::swap(V1, V2);
} else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD)
return false;
// If there are other uses of these operations we can't fold them.
if (!V1->hasOneUse() || !V2->hasOneUse())
return false;
// Ensure that both operations have the same operands. Note that we can
// commute the FADD operands.
SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1);
if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
(V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
return false;
// We're looking for blends between FADD and FSUB nodes. We insist on these
// nodes being lined up in a specific expected pattern.
if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||
isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) ||
isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}) ||
isShuffleEquivalent(V1, V2, Mask, {0, 17, 2, 19, 4, 21, 6, 23,
8, 25, 10, 27, 12, 29, 14, 31})))
return false;
Opnd0 = LHS;
Opnd1 = RHS;
return true;
}
/// \brief Try to combine a shuffle into a target-specific add-sub or
/// mul-add-sub node.
static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Opnd0, Opnd1;
if (!isAddSub(N, Subtarget, Opnd0, Opnd1))
return SDValue();
EVT VT = N->getValueType(0);
SDLoc DL(N);
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
// Do not generate X86ISD::ADDSUB node for 512-bit types even though
// the ADDSUB idiom has been successfully recognized. There are no known
// X86 targets with 512-bit ADDSUB instructions!
if (VT.is512BitVector())
return SDValue();
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
// We are looking for a shuffle where both sources are concatenated with undef
// and have a width that is half of the output's width. AVX2 has VPERMD/Q, so
// if we can express this as a single-source shuffle, that's preferable.
static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasAVX2() || !isa<ShuffleVectorSDNode>(N))
return SDValue();
EVT VT = N->getValueType(0);
// We only care about shuffles of 128/256-bit vectors of 32/64-bit values.
if (!VT.is128BitVector() && !VT.is256BitVector())
return SDValue();
if (VT.getVectorElementType() != MVT::i32 &&
VT.getVectorElementType() != MVT::i64 &&
VT.getVectorElementType() != MVT::f32 &&
VT.getVectorElementType() != MVT::f64)
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Check that both sources are concats with undef.
if (N0.getOpcode() != ISD::CONCAT_VECTORS ||
N1.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
N1.getNumOperands() != 2 || !N0.getOperand(1).isUndef() ||
!N1.getOperand(1).isUndef())
return SDValue();
// Construct the new shuffle mask. Elements from the first source retain their
// index, but elements from the second source no longer need to skip an undef.
SmallVector<int, 8> Mask;
int NumElts = VT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
for (int Elt : SVOp->getMask())
Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2));
SDLoc DL(N);
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0),
N1.getOperand(0));
return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask);
}
static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we have legalized the vector types, look for blends of FADD and FSUB
// nodes that we can fuse into an ADDSUB node.
if (TLI.isTypeLegal(VT))
if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
return AddSub;
// During Type Legalization, when promoting illegal vector types,
// the backend might introduce new shuffle dag nodes and bitcasts.
//
// This code performs the following transformation:
// fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
// (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
//
// We do this only if both the bitcast and the BINOP dag nodes have
// one use. Also, perform this transformation only if the new binary
// operation is legal. This is to avoid introducing dag nodes that
// potentially need to be further expanded (or custom lowered) into a
// less optimal sequence of dag nodes.
if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE &&
N->getOperand(0).getOpcode() == ISD::BITCAST &&
N->getOperand(1).isUndef() && N->getOperand(0).hasOneUse()) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue BC0 = N0.getOperand(0);
EVT SVT = BC0.getValueType();
unsigned Opcode = BC0.getOpcode();
unsigned NumElts = VT.getVectorNumElements();
if (BC0.hasOneUse() && SVT.isVector() &&
SVT.getVectorNumElements() * 2 == NumElts &&
TLI.isOperationLegal(Opcode, VT)) {
bool CanFold = false;
switch (Opcode) {
default : break;
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
// isOperationLegal lies for integer ops on floating point types.
CanFold = VT.isInteger();
break;
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
// isOperationLegal lies for floating point ops on integer types.
CanFold = VT.isFloatingPoint();
break;
}
unsigned SVTNumElts = SVT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i)
CanFold = SVOp->getMaskElt(i) == (int)(i * 2);
for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i)
CanFold = SVOp->getMaskElt(i) < 0;
if (CanFold) {
SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0));
SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1));
SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, SVOp->getMask());
}
}
}
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
// load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
// consecutive, non-overlapping, and in the right order.
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) {
Elts.push_back(Elt);
continue;
}
Elts.clear();
break;
}
if (Elts.size() == VT.getVectorNumElements())
if (SDValue LD =
EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true))
return LD;
// For AVX2, we sometimes want to combine
// (vector_shuffle <mask> (concat_vectors t1, undef)
// (concat_vectors t2, undef))
// Into:
// (vector_shuffle <mask> (concat_vectors t1, t2), undef)
// Since the latter can be efficiently lowered with VPERMD/VPERMQ
if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget))
return ShufConcat;
if (isTargetShuffle(N->getOpcode())) {
SDValue Op(N, 0);
if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget))
return Shuffle;
// Try recursively combining arbitrary sequences of x86 shuffle
// instructions into higher-order shuffles. We do this after combining
// specific PSHUF instruction sequences into their minimal form so that we
// can evaluate how many specialized shuffle instructions are involved in
// a particular chain.
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget))
return SDValue(); // This routine will use CombineTo to replace N.
}
return SDValue();
}
/// Check if a vector extract from a target-specific shuffle of a load can be
/// folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
/// shuffles have been custom lowered so we need to handle those here.
static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue InVec = N->getOperand(0);
SDValue EltNo = N->getOperand(1);
EVT EltVT = N->getValueType(0);
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
EVT OriginalVT = InVec.getValueType();
// Peek through bitcasts, don't duplicate a load with other uses.
InVec = peekThroughOneUseBitcasts(InVec);
EVT CurrentVT = InVec.getValueType();
if (!CurrentVT.isVector() ||
CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
return SDValue();
if (!isTargetShuffle(InVec.getOpcode()))
return SDValue();
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
SmallVector<int, 16> ShuffleMask;
SmallVector<SDValue, 2> ShuffleOps;
bool UnaryShuffle;
if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true,
ShuffleOps, ShuffleMask, UnaryShuffle))
return SDValue();
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
if (Idx == SM_SentinelZero)
return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
: DAG.getConstantFP(+0.0, SDLoc(N), EltVT);
if (Idx == SM_SentinelUndef)
return DAG.getUNDEF(EltVT);
assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0]
: ShuffleOps[1];
// If inputs to shuffle are the same for both ops, then allow 2 uses
unsigned AllowedUses =
(ShuffleOps.size() > 1 && ShuffleOps[0] == ShuffleOps[1]) ? 2 : 1;
if (LdNode.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
return SDValue();
AllowedUses = 1; // only allow 1 load use if we have a bitcast
LdNode = LdNode.getOperand(0);
}
if (!ISD::isNormalLoad(LdNode.getNode()))
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
// If there's a bitcast before the shuffle, check if the load type and
// alignment is valid.
unsigned Align = LN0->getAlignment();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
EltVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
return SDValue();
// All checks match so transform back to vector_shuffle so that DAG combiner
// can finish the job
SDLoc dl(N);
// Create shuffle node taking into account the case that its a unary shuffle
SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1];
Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle,
ShuffleMask);
Shuffle = DAG.getBitcast(OriginalVT, Shuffle);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
EltNo);
}
// Try to match patterns such as
// (i16 bitcast (v16i1 x))
// ->
// (i16 movmsk (16i8 sext (v16i1 x)))
// before the illegal vector is scalarized on subtargets that don't have legal
// vxi1 types.
static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
const X86Subtarget &Subtarget) {
EVT VT = BitCast.getValueType();
SDValue N0 = BitCast.getOperand(0);
EVT VecVT = N0->getValueType(0);
if (!VT.isScalarInteger() || !VecVT.isSimple())
return SDValue();
// With AVX512 vxi1 types are legal and we prefer using k-regs.
// MOVMSK is supported in SSE2 or later.
if (Subtarget.hasAVX512() || !Subtarget.hasSSE2())
return SDValue();
// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
// v8f64. So all legal 128-bit and 256-bit vectors are covered except for
// v8i16 and v16i16.
// For these two cases, we can shuffle the upper element bytes to a
// consecutive sequence at the start of the vector and treat the results as
// v16i8 or v32i8, and for v61i8 this is the preferable solution. However,
// for v16i16 this is not the case, because the shuffle is expensive, so we
// avoid sign-extending to this type entirely.
// For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
// (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
MVT SExtVT;
MVT FPCastVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
switch (VecVT.getSimpleVT().SimpleTy) {
default:
return SDValue();
case MVT::v2i1:
SExtVT = MVT::v2i64;
FPCastVT = MVT::v2f64;
break;
case MVT::v4i1:
SExtVT = MVT::v4i32;
FPCastVT = MVT::v4f32;
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
// sign-extend to a 256-bit operation to avoid truncation.
if (N0->getOpcode() == ISD::SETCC &&
N0->getOperand(0)->getValueType(0).is256BitVector() &&
Subtarget.hasInt256()) {
SExtVT = MVT::v4i64;
FPCastVT = MVT::v4f64;
}
break;
case MVT::v8i1:
SExtVT = MVT::v8i16;
// For cases such as (i8 bitcast (v8i1 setcc v8i32 v1, v2)),
// sign-extend to a 256-bit operation to match the compare.
// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
// 256-bit because the shuffle is cheaper than sign extending the result of
// the compare.
if (N0->getOpcode() == ISD::SETCC &&
N0->getOperand(0)->getValueType(0).is256BitVector() &&
Subtarget.hasInt256()) {
SExtVT = MVT::v8i32;
FPCastVT = MVT::v8f32;
}
break;
case MVT::v16i1:
SExtVT = MVT::v16i8;
// For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)),
// it is not profitable to sign-extend to 256-bit because this will
// require an extra cross-lane shuffle which is more expensive than
// truncating the result of the compare to 128-bits.
break;
case MVT::v32i1:
// TODO: Handle pre-AVX2 cases by splitting to two v16i1's.
if (!Subtarget.hasInt256())
return SDValue();
SExtVT = MVT::v32i8;
break;
};
SDLoc DL(BitCast);
SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT);
if (SExtVT == MVT::v8i16) {
V = DAG.getBitcast(MVT::v16i8, V);
V = DAG.getVectorShuffle(
MVT::v16i8, DL, V, DAG.getUNDEF(MVT::v16i8),
{0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
} else
assert(SExtVT.getScalarType() != MVT::i16 &&
"Vectors of i16 must be shuffled");
if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
V = DAG.getBitcast(FPCastVT, V);
V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
return DAG.getZExtOrTrunc(V, DL, VT);
}
static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = N0.getValueType();
// Try to match patterns such as
// (i16 bitcast (v16i1 x))
// ->
// (i16 movmsk (16i8 sext (v16i1 x)))
// before the setcc result is scalarized on subtargets that don't have legal
// vxi1 types.
if (DCI.isBeforeLegalize())
if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
return V;
// Since MMX types are special and don't usually play with other vector types,
// it's better to handle them early to be sure we emit efficient code by
// avoiding store-load conversions.
// Detect bitcasts between i32 to x86mmx low word.
if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR &&
SrcVT == MVT::v2i32 && isNullConstant(N0.getOperand(1))) {
SDValue N00 = N0->getOperand(0);
if (N00.getValueType() == MVT::i32)
return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
}
// Detect bitcasts between element or subvector extraction to x86mmx.
if (VT == MVT::x86mmx &&
(N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) &&
isNullConstant(N0.getOperand(1))) {
SDValue N00 = N0->getOperand(0);
if (N00.getValueType().is128BitVector())
return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT,
DAG.getBitcast(MVT::v2i64, N00));
}
// Detect bitcasts from FP_TO_SINT to x86mmx.
if (VT == MVT::x86mmx && SrcVT == MVT::v2i32 &&
N0.getOpcode() == ISD::FP_TO_SINT) {
SDLoc DL(N0);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
DAG.getUNDEF(MVT::v2i32));
return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT,
DAG.getBitcast(MVT::v2i64, Res));
}
// Convert a bitcasted integer logic operation that has one bitcasted
// floating-point operand into a floating-point logic operation. This may
// create a load of a constant, but that is cheaper than materializing the
// constant in an integer register and transferring it to an SSE register or
// transferring the SSE operand to integer register and back.
unsigned FPOpcode;
switch (N0.getOpcode()) {
case ISD::AND: FPOpcode = X86ISD::FAND; break;
case ISD::OR: FPOpcode = X86ISD::FOR; break;
case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
default: return SDValue();
}
if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
(Subtarget.hasSSE2() && VT == MVT::f64)))
return SDValue();
SDValue LogicOp0 = N0.getOperand(0);
SDValue LogicOp1 = N0.getOperand(1);
SDLoc DL0(N0);
// bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST &&
LogicOp0.hasOneUse() && LogicOp0.getOperand(0).getValueType() == VT &&
!isa<ConstantSDNode>(LogicOp0.getOperand(0))) {
SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1);
return DAG.getNode(FPOpcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1);
}
// bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y)
if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST &&
LogicOp1.hasOneUse() && LogicOp1.getOperand(0).getValueType() == VT &&
!isa<ConstantSDNode>(LogicOp1.getOperand(0))) {
SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0);
return DAG.getNode(FPOpcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0);
}
return SDValue();
}
// Match a binop + shuffle pyramid that represents a horizontal reduction over
// the elements of a vector.
// Returns the vector that is being reduced on, or SDValue() if a reduction
// was not matched.
static SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType BinOp) {
// The pattern must end in an extract from index 0.
if ((Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT) ||
!isNullConstant(Extract->getOperand(1)))
return SDValue();
unsigned Stages =
Log2_32(Extract->getOperand(0).getValueType().getVectorNumElements());
SDValue Op = Extract->getOperand(0);
// At each stage, we're looking for something that looks like:
// %s = shufflevector <8 x i32> %op, <8 x i32> undef,
// <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
// i32 undef, i32 undef, i32 undef, i32 undef>
// %a = binop <8 x i32> %op, %s
// Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
// we expect something like:
// <4,5,6,7,u,u,u,u>
// <2,3,u,u,u,u,u,u>
// <1,u,u,u,u,u,u,u>
for (unsigned i = 0; i < Stages; ++i) {
if (Op.getOpcode() != BinOp)
return SDValue();
ShuffleVectorSDNode *Shuffle =
dyn_cast<ShuffleVectorSDNode>(Op.getOperand(0).getNode());
if (Shuffle) {
Op = Op.getOperand(1);
} else {
Shuffle = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(1).getNode());
Op = Op.getOperand(0);
}
// The first operand of the shuffle should be the same as the other operand
// of the add.
if (!Shuffle || (Shuffle->getOperand(0) != Op))
return SDValue();
// Verify the shuffle has the expected (at this stage of the pyramid) mask.
for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
return SDValue();
}
return Op;
}
// Given a select, detect the following pattern:
// 1: %2 = zext <N x i8> %0 to <N x i32>
// 2: %3 = zext <N x i8> %1 to <N x i32>
// 3: %4 = sub nsw <N x i32> %2, %3
// 4: %5 = icmp sgt <N x i32> %4, [0 x N] or [-1 x N]
// 5: %6 = sub nsw <N x i32> zeroinitializer, %4
// 6: %7 = select <N x i1> %5, <N x i32> %4, <N x i32> %6
// This is useful as it is the input into a SAD pattern.
static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
SDValue &Op1) {
// Check the condition of the select instruction is greater-than.
SDValue SetCC = Select->getOperand(0);
if (SetCC.getOpcode() != ISD::SETCC)
return false;
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
if (CC != ISD::SETGT && CC != ISD::SETLT)
return false;
SDValue SelectOp1 = Select->getOperand(1);
SDValue SelectOp2 = Select->getOperand(2);
// The following instructions assume SelectOp1 is the subtraction operand
// and SelectOp2 is the negation operand.
// In the case of SETLT this is the other way around.
if (CC == ISD::SETLT)
std::swap(SelectOp1, SelectOp2);
// The second operand of the select should be the negation of the first
// operand, which is implemented as 0 - SelectOp1.
if (!(SelectOp2.getOpcode() == ISD::SUB &&
ISD::isBuildVectorAllZeros(SelectOp2.getOperand(0).getNode()) &&
SelectOp2.getOperand(1) == SelectOp1))
return false;
// The first operand of SetCC is the first operand of the select, which is the
// difference between the two input vectors.
if (SetCC.getOperand(0) != SelectOp1)
return false;
// In SetLT case, The second operand of the comparison can be either 1 or 0.
APInt SplatVal;
if ((CC == ISD::SETLT) &&
- !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) &&
- SplatVal == 1) ||
+ !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal,
+ /*AllowShrink*/false) &&
+ SplatVal.isOneValue()) ||
(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()))))
return false;
// In SetGT case, The second operand of the comparison can be either -1 or 0.
if ((CC == ISD::SETGT) &&
!(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) ||
ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode())))
return false;
// The first operand of the select is the difference between the two input
// vectors.
if (SelectOp1.getOpcode() != ISD::SUB)
return false;
Op0 = SelectOp1.getOperand(0);
Op1 = SelectOp1.getOperand(1);
// Check if the operands of the sub are zero-extended from vectors of i8.
if (Op0.getOpcode() != ISD::ZERO_EXTEND ||
Op0.getOperand(0).getValueType().getVectorElementType() != MVT::i8 ||
Op1.getOpcode() != ISD::ZERO_EXTEND ||
Op1.getOperand(0).getValueType().getVectorElementType() != MVT::i8)
return false;
return true;
}
// Given two zexts of <k x i8> to <k x i32>, create a PSADBW of the inputs
// to these zexts.
static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
const SDValue &Zext1, const SDLoc &DL) {
// Find the appropriate width for the PSADBW.
EVT InVT = Zext0.getOperand(0).getValueType();
unsigned RegSize = std::max(128u, InVT.getSizeInBits());
// "Zero-extend" the i8 vectors. This is not a per-element zext, rather we
// fill in the missing vector elements with 0.
unsigned NumConcat = RegSize / InVT.getSizeInBits();
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, InVT));
Ops[0] = Zext0.getOperand(0);
MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8);
SDValue SadOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
Ops[0] = Zext1.getOperand(0);
SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
// Actually build the SAD
MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);
return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);
}
// Attempt to replace an all_of/any_of style horizontal reduction with a MOVMSK.
static SDValue combineHorizontalPredicateResult(SDNode *Extract,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// Bail without SSE2 or with AVX512VL (which uses predicate registers).
if (!Subtarget.hasSSE2() || Subtarget.hasVLX())
return SDValue();
EVT ExtractVT = Extract->getValueType(0);
unsigned BitWidth = ExtractVT.getSizeInBits();
if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 &&
ExtractVT != MVT::i8)
return SDValue();
// Check for OR(any_of) and AND(all_of) horizontal reduction patterns.
for (ISD::NodeType Op : {ISD::OR, ISD::AND}) {
SDValue Match = matchBinOpReduction(Extract, Op);
if (!Match)
continue;
// EXTRACT_VECTOR_ELT can require implicit extension of the vector element
// which we can't support here for now.
if (Match.getScalarValueSizeInBits() != BitWidth)
continue;
// We require AVX2 for PMOVMSKB for v16i16/v32i8;
unsigned MatchSizeInBits = Match.getValueSizeInBits();
if (!(MatchSizeInBits == 128 ||
(MatchSizeInBits == 256 &&
((Subtarget.hasAVX() && BitWidth >= 32) || Subtarget.hasAVX2()))))
return SDValue();
// Don't bother performing this for 2-element vectors.
if (Match.getValueType().getVectorNumElements() <= 2)
return SDValue();
// Check that we are extracting a reduction of all sign bits.
if (DAG.ComputeNumSignBits(Match) != BitWidth)
return SDValue();
// For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB.
MVT MaskVT;
if (64 == BitWidth || 32 == BitWidth)
MaskVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth),
MatchSizeInBits / BitWidth);
else
MaskVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8);
APInt CompareBits;
ISD::CondCode CondCode;
if (Op == ISD::OR) {
// any_of -> MOVMSK != 0
CompareBits = APInt::getNullValue(32);
CondCode = ISD::CondCode::SETNE;
} else {
// all_of -> MOVMSK == ((1 << NumElts) - 1)
CompareBits = APInt::getLowBitsSet(32, MaskVT.getVectorNumElements());
CondCode = ISD::CondCode::SETEQ;
}
// Perform the select as i32/i64 and then truncate to avoid partial register
// stalls.
unsigned ResWidth = std::max(BitWidth, 32u);
EVT ResVT = EVT::getIntegerVT(*DAG.getContext(), ResWidth);
SDLoc DL(Extract);
SDValue Zero = DAG.getConstant(0, DL, ResVT);
SDValue Ones = DAG.getAllOnesConstant(DL, ResVT);
SDValue Res = DAG.getBitcast(MaskVT, Match);
Res = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Res);
Res = DAG.getSelectCC(DL, Res, DAG.getConstant(CompareBits, DL, MVT::i32),
Ones, Zero, CondCode);
return DAG.getSExtOrTrunc(Res, DL, ExtractVT);
}
return SDValue();
}
static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// PSADBW is only supported on SSE2 and up.
if (!Subtarget.hasSSE2())
return SDValue();
// Verify the type we're extracting from is any integer type above i16.
EVT VT = Extract->getOperand(0).getValueType();
if (!VT.isSimple() || !(VT.getVectorElementType().getSizeInBits() > 16))
return SDValue();
unsigned RegSize = 128;
if (Subtarget.hasBWI())
RegSize = 512;
else if (Subtarget.hasAVX2())
RegSize = 256;
// We handle upto v16i* for SSE2 / v32i* for AVX2 / v64i* for AVX512.
// TODO: We should be able to handle larger vectors by splitting them before
// feeding them into several SADs, and then reducing over those.
if (RegSize / VT.getVectorNumElements() < 8)
return SDValue();
// Match shuffle + add pyramid.
SDValue Root = matchBinOpReduction(Extract, ISD::ADD);
// The operand is expected to be zero extended from i8
// (verified in detectZextAbsDiff).
// In order to convert to i64 and above, additional any/zero/sign
// extend is expected.
// The zero extend from 32 bit has no mathematical effect on the result.
// Also the sign extend is basically zero extend
// (extends the sign bit which is zero).
// So it is correct to skip the sign/zero extend instruction.
if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND ||
Root.getOpcode() == ISD::ZERO_EXTEND ||
Root.getOpcode() == ISD::ANY_EXTEND))
Root = Root.getOperand(0);
// If there was a match, we want Root to be a select that is the root of an
// abs-diff pattern.
if (!Root || (Root.getOpcode() != ISD::VSELECT))
return SDValue();
// Check whether we have an abs-diff pattern feeding into the select.
SDValue Zext0, Zext1;
if (!detectZextAbsDiff(Root, Zext0, Zext1))
return SDValue();
// Create the SAD instruction.
SDLoc DL(Extract);
SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL);
// If the original vector was wider than 8 elements, sum over the results
// in the SAD vector.
unsigned Stages = Log2_32(VT.getVectorNumElements());
MVT SadVT = SAD.getSimpleValueType();
if (Stages > 3) {
unsigned SadElems = SadVT.getVectorNumElements();
for(unsigned i = Stages - 3; i > 0; --i) {
SmallVector<int, 16> Mask(SadElems, -1);
for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
Mask[j] = MaskEnd + j;
SDValue Shuffle =
DAG.getVectorShuffle(SadVT, DL, SAD, DAG.getUNDEF(SadVT), Mask);
SAD = DAG.getNode(ISD::ADD, DL, SadVT, SAD, Shuffle);
}
}
MVT Type = Extract->getSimpleValueType(0);
unsigned TypeSizeInBits = Type.getSizeInBits();
// Return the lowest TypeSizeInBits bits.
MVT ResVT = MVT::getVectorVT(Type, SadVT.getSizeInBits() / TypeSizeInBits);
SAD = DAG.getNode(ISD::BITCAST, DL, ResVT, SAD);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Type, SAD,
Extract->getOperand(1));
}
// Attempt to peek through a target shuffle and extract the scalar from the
// source.
static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue Src = N->getOperand(0);
SDValue Idx = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT SrcVT = Src.getValueType();
EVT SrcSVT = SrcVT.getVectorElementType();
unsigned NumSrcElts = SrcVT.getVectorNumElements();
// Don't attempt this for boolean mask vectors or unknown extraction indices.
if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx))
return SDValue();
// Resolve the target shuffle inputs and mask.
SmallVector<int, 16> Mask;
SmallVector<SDValue, 2> Ops;
if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG))
return SDValue();
// Attempt to narrow/widen the shuffle mask to the correct size.
if (Mask.size() != NumSrcElts) {
if ((NumSrcElts % Mask.size()) == 0) {
SmallVector<int, 16> ScaledMask;
int Scale = NumSrcElts / Mask.size();
scaleShuffleMask(Scale, Mask, ScaledMask);
Mask = std::move(ScaledMask);
} else if ((Mask.size() % NumSrcElts) == 0) {
SmallVector<int, 16> WidenedMask;
while (Mask.size() > NumSrcElts &&
canWidenShuffleElements(Mask, WidenedMask))
Mask = std::move(WidenedMask);
// TODO - investigate support for wider shuffle masks with known upper
// undef/zero elements for implicit zero-extension.
}
}
// Check if narrowing/widening failed.
if (Mask.size() != NumSrcElts)
return SDValue();
int SrcIdx = Mask[N->getConstantOperandVal(1)];
SDLoc dl(N);
// If the shuffle source element is undef/zero then we can just accept it.
if (SrcIdx == SM_SentinelUndef)
return DAG.getUNDEF(VT);
if (SrcIdx == SM_SentinelZero)
return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT)
: DAG.getConstant(0, dl, VT);
SDValue SrcOp = Ops[SrcIdx / Mask.size()];
SrcOp = DAG.getBitcast(SrcVT, SrcOp);
SrcIdx = SrcIdx % Mask.size();
// We can only extract other elements from 128-bit vectors and in certain
// circumstances, depending on SSE-level.
// TODO: Investigate using extract_subvector for larger vectors.
// TODO: Investigate float/double extraction if it will be just stored.
if ((SrcVT == MVT::v4i32 || SrcVT == MVT::v2i64) &&
((SrcIdx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) {
assert(SrcSVT == VT && "Unexpected extraction type");
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcSVT, SrcOp,
DAG.getIntPtrConstant(SrcIdx, dl));
}
if ((SrcVT == MVT::v8i16 && Subtarget.hasSSE2()) ||
(SrcVT == MVT::v16i8 && Subtarget.hasSSE41())) {
assert(VT.getSizeInBits() >= SrcSVT.getSizeInBits() &&
"Unexpected extraction type");
unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB);
SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp,
DAG.getIntPtrConstant(SrcIdx, dl));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, ExtOp,
DAG.getValueType(SrcSVT));
return DAG.getZExtOrTrunc(Assert, dl, VT);
}
return SDValue();
}
/// Detect vector gather/scatter index generation and convert it from being a
/// bunch of shuffles and extracts into a somewhat faster sequence.
/// For i686, the best sequence is apparently storing the value and loading
/// scalars back, while for x64 we should use 64-bit extracts and shifts.
static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
return NewOp;
if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget))
return NewOp;
SDValue InputVector = N->getOperand(0);
SDValue EltIdx = N->getOperand(1);
EVT SrcVT = InputVector.getValueType();
EVT VT = N->getValueType(0);
SDLoc dl(InputVector);
// Detect mmx extraction of all bits as a i64. It works better as a bitcast.
if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) {
SDValue MMXSrc = InputVector.getOperand(0);
// The bitcast source is a direct mmx result.
if (MMXSrc.getValueType() == MVT::x86mmx)
return DAG.getBitcast(VT, InputVector);
}
// Detect mmx to i32 conversion through a v2i32 elt extract.
if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) {
SDValue MMXSrc = InputVector.getOperand(0);
// The bitcast source is a direct mmx result.
if (MMXSrc.getValueType() == MVT::x86mmx)
return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc);
}
if (VT == MVT::i1 && InputVector.getOpcode() == ISD::BITCAST &&
isa<ConstantSDNode>(EltIdx) &&
isa<ConstantSDNode>(InputVector.getOperand(0))) {
uint64_t ExtractedElt = N->getConstantOperandVal(1);
uint64_t InputValue = InputVector.getConstantOperandVal(0);
uint64_t Res = (InputValue >> ExtractedElt) & 1;
return DAG.getConstant(Res, dl, MVT::i1);
}
// Check whether this extract is the root of a sum of absolute differences
// pattern. This has to be done here because we really want it to happen
// pre-legalization,
if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget))
return SAD;
// Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK.
if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget))
return Cmp;
// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.
if (SrcVT != MVT::v4i32)
return SDValue();
// Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
// single use which is a sign-extend or zero-extend, and all elements are
// used.
SmallVector<SDNode *, 4> Uses;
unsigned ExtractedElements = 0;
for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
if (UI.getUse().getResNo() != InputVector.getResNo())
return SDValue();
SDNode *Extract = *UI;
if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
if (Extract->getValueType(0) != MVT::i32)
return SDValue();
if (!Extract->hasOneUse())
return SDValue();
if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
return SDValue();
if (!isa<ConstantSDNode>(Extract->getOperand(1)))
return SDValue();
// Record which element was extracted.
ExtractedElements |= 1 << Extract->getConstantOperandVal(1);
Uses.push_back(Extract);
}
// If not all the elements were used, this may not be worthwhile.
if (ExtractedElements != 15)
return SDValue();
// Ok, we've now decided to do the transformation.
// If 64-bit shifts are legal, use the extract-shift sequence,
// otherwise bounce the vector off the cache.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Vals[4];
if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector);
auto &DL = DAG.getDataLayout();
EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(DL);
SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
DAG.getConstant(0, dl, VecIdxTy));
SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
DAG.getConstant(1, dl, VecIdxTy));
SDValue ShAmt = DAG.getConstant(
32, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64, DL));
Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf);
Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRA, dl, MVT::i64, BottomHalf, ShAmt));
Vals[2] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, TopHalf);
Vals[3] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRA, dl, MVT::i64, TopHalf, ShAmt));
} else {
// Store the value to a temporary stack slot.
SDValue StackPtr = DAG.CreateStackTemporary(SrcVT);
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
MachinePointerInfo());
EVT ElementType = SrcVT.getVectorElementType();
unsigned EltSize = ElementType.getSizeInBits() / 8;
// Replace each use (extract) with a load of the appropriate element.
for (unsigned i = 0; i < 4; ++i) {
uint64_t Offset = EltSize * i;
auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
SDValue OffsetVal = DAG.getConstant(Offset, dl, PtrVT);
SDValue ScalarAddr =
DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, OffsetVal);
// Load the scalar.
Vals[i] =
DAG.getLoad(ElementType, dl, Ch, ScalarAddr, MachinePointerInfo());
}
}
// Replace the extracts
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
UE = Uses.end(); UI != UE; ++UI) {
SDNode *Extract = *UI;
uint64_t IdxVal = Extract->getConstantOperandVal(1);
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]);
}
// The replacement was made in place; don't return anything.
return SDValue();
}
// TODO - merge with combineExtractVectorElt once it can handle the implicit
// zero-extension of X86ISD::PINSRW/X86ISD::PINSRB in:
// XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and
// combineBasicSADPattern.
static SDValue combineExtractVectorElt_SSE(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
return combineExtractWithShuffle(N, DAG, DCI, Subtarget);
}
/// If a vector select has an operand that is -1 or 0, try to simplify the
/// select to a bitwise logic operation.
static SDValue
combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
EVT VT = LHS.getValueType();
EVT CondVT = Cond.getValueType();
SDLoc DL(N);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (N->getOpcode() != ISD::VSELECT)
return SDValue();
assert(CondVT.isVector() && "Vector select expects a vector selector!");
bool FValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
// Check if the first operand is all zeros and Cond type is vXi1.
// This situation only applies to avx512.
if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() &&
CondVT.getVectorElementType() == MVT::i1) {
// Invert the cond to not(cond) : xor(op,allones)=not(op)
SDValue CondNew = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
DAG.getAllOnesConstant(DL, CondVT));
// Vselect cond, op1, op2 = Vselect not(cond), op2, op1
return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
}
// To use the condition operand as a bitwise mask, it must have elements that
// are the same size as the select elements. Ie, the condition operand must
// have already been promoted from the IR select condition type <N x i1>.
// Don't check if the types themselves are equal because that excludes
// vector floating-point selects.
if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
return SDValue();
bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
// Try to invert the condition if true value is not all 1s and false value is
// not all 0s.
if (!TValIsAllOnes && !FValIsAllZeros &&
// Check if the selector will be produced by CMPP*/PCMP*.
Cond.getOpcode() == ISD::SETCC &&
// Check if SETCC has already been promoted.
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
CondVT) {
bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
if (TValIsAllZeros || FValIsAllOnes) {
SDValue CC = Cond.getOperand(2);
ISD::CondCode NewCC =
ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
Cond.getOperand(0).getValueType().isInteger());
Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
NewCC);
std::swap(LHS, RHS);
TValIsAllOnes = FValIsAllOnes;
FValIsAllZeros = TValIsAllZeros;
}
}
// vselect Cond, 111..., 000... -> Cond
if (TValIsAllOnes && FValIsAllZeros)
return DAG.getBitcast(VT, Cond);
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT))
return SDValue();
// vselect Cond, 111..., X -> or Cond, X
if (TValIsAllOnes) {
SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS);
return DAG.getBitcast(VT, Or);
}
// vselect Cond, X, 000... -> and Cond, X
if (FValIsAllZeros) {
SDValue CastLHS = DAG.getBitcast(CondVT, LHS);
SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS);
return DAG.getBitcast(VT, And);
}
return SDValue();
}
static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
SDLoc DL(N);
auto *TrueC = dyn_cast<ConstantSDNode>(LHS);
auto *FalseC = dyn_cast<ConstantSDNode>(RHS);
if (!TrueC || !FalseC)
return SDValue();
// Don't do this for crazy integer types.
if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType()))
return SDValue();
// If this is efficiently invertible, canonicalize the LHSC/RHSC values
// so that TrueC (the true value) is larger than FalseC.
bool NeedsCondInvert = false;
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
// Efficiently invertible.
(Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
(Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
isa<ConstantSDNode>(Cond.getOperand(1))))) {
NeedsCondInvert = true;
std::swap(TrueC, FalseC);
}
// Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, DL, Cond.getValueType()));
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
DAG.getConstant(ShAmt, DL, MVT::i8));
}
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
uint64_t Diff = TrueC->getZExtValue() - FalseC->getZExtValue();
if (N->getValueType(0) == MVT::i32)
Diff = (unsigned)Diff;
bool IsFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
default:
break;
case 1: // result = add base, cond
case 2: // result = lea base( , cond*2)
case 3: // result = lea base(cond, cond*2)
case 4: // result = lea base( , cond*4)
case 5: // result = lea base(cond, cond*4)
case 8: // result = lea base( , cond*8)
case 9: // result = lea base(cond, cond*8)
IsFastMultiplier = true;
break;
}
}
if (IsFastMultiplier) {
APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, DL, Cond.getValueType()));
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
// Scale the condition by the difference.
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
DAG.getConstant(Diff, DL, Cond.getValueType()));
// Add the base if non-zero.
if (FalseC->getAPIntValue() != 0)
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
return Cond;
}
}
return SDValue();
}
// If this is a bitcasted op that can be represented as another type, push the
// the bitcast to the inputs. This allows more opportunities for pattern
// matching masked instructions. This is called when we know that the operation
// is used as one of the inputs of a vselect.
static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
// Make sure we have a bitcast.
if (OrigOp.getOpcode() != ISD::BITCAST)
return false;
SDValue Op = OrigOp.getOperand(0);
// If the operation is used by anything other than the bitcast, we shouldn't
// do this combine as that would replicate the operation.
if (!Op.hasOneUse())
return false;
MVT VT = OrigOp.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
SDLoc DL(Op.getNode());
auto BitcastAndCombineShuffle = [&](unsigned Opcode, SDValue Op0, SDValue Op1,
SDValue Op2) {
Op0 = DAG.getBitcast(VT, Op0);
DCI.AddToWorklist(Op0.getNode());
Op1 = DAG.getBitcast(VT, Op1);
DCI.AddToWorklist(Op1.getNode());
DCI.CombineTo(OrigOp.getNode(),
DAG.getNode(Opcode, DL, VT, Op0, Op1, Op2));
return true;
};
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case X86ISD::PALIGNR:
// PALIGNR can be converted to VALIGND/Q for 128-bit vectors.
if (!VT.is128BitVector())
return false;
Opcode = X86ISD::VALIGN;
LLVM_FALLTHROUGH;
case X86ISD::VALIGN: {
if (EltVT != MVT::i32 && EltVT != MVT::i64)
return false;
uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
unsigned ShiftAmt = Imm * OpEltVT.getSizeInBits();
unsigned EltSize = EltVT.getSizeInBits();
// Make sure we can represent the same shift with the new VT.
if ((ShiftAmt % EltSize) != 0)
return false;
Imm = ShiftAmt / EltSize;
return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
DAG.getConstant(Imm, DL, MVT::i8));
}
case X86ISD::SHUF128: {
if (EltVT.getSizeInBits() != 32 && EltVT.getSizeInBits() != 64)
return false;
// Only change element size, not type.
if (VT.isInteger() != Op.getSimpleValueType().isInteger())
return false;
return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
Op.getOperand(2));
}
case ISD::INSERT_SUBVECTOR: {
unsigned EltSize = EltVT.getSizeInBits();
if (EltSize != 32 && EltSize != 64)
return false;
MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
// Only change element size, not type.
if (EltVT.isInteger() != OpEltVT.isInteger())
return false;
uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0));
DCI.AddToWorklist(Op0.getNode());
// Op1 needs to be bitcasted to a smaller vector with the same element type.
SDValue Op1 = Op.getOperand(1);
MVT Op1VT = MVT::getVectorVT(EltVT,
Op1.getSimpleValueType().getSizeInBits() / EltSize);
Op1 = DAG.getBitcast(Op1VT, Op1);
DCI.AddToWorklist(Op1.getNode());
DCI.CombineTo(OrigOp.getNode(),
DAG.getNode(Opcode, DL, VT, Op0, Op1,
DAG.getIntPtrConstant(Imm, DL)));
return true;
}
case ISD::EXTRACT_SUBVECTOR: {
unsigned EltSize = EltVT.getSizeInBits();
if (EltSize != 32 && EltSize != 64)
return false;
MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
// Only change element size, not type.
if (EltVT.isInteger() != OpEltVT.isInteger())
return false;
uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
// Op0 needs to be bitcasted to a larger vector with the same element type.
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = MVT::getVectorVT(EltVT,
Op0.getSimpleValueType().getSizeInBits() / EltSize);
Op0 = DAG.getBitcast(Op0VT, Op0);
DCI.AddToWorklist(Op0.getNode());
DCI.CombineTo(OrigOp.getNode(),
DAG.getNode(Opcode, DL, VT, Op0,
DAG.getIntPtrConstant(Imm, DL)));
return true;
}
case X86ISD::SUBV_BROADCAST: {
unsigned EltSize = EltVT.getSizeInBits();
if (EltSize != 32 && EltSize != 64)
return false;
// Only change element size, not type.
if (VT.isInteger() != Op.getSimpleValueType().isInteger())
return false;
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = MVT::getVectorVT(EltVT,
Op0.getSimpleValueType().getSizeInBits() / EltSize);
Op0 = DAG.getBitcast(Op0VT, Op.getOperand(0));
DCI.AddToWorklist(Op0.getNode());
DCI.CombineTo(OrigOp.getNode(),
DAG.getNode(Opcode, DL, VT, Op0));
return true;
}
}
return false;
}
/// Do target-specific dag combines on SELECT and VSELECT nodes.
static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
SDValue Cond = N->getOperand(0);
// Get the LHS/RHS of the select.
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
EVT VT = LHS.getValueType();
EVT CondVT = Cond.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
// ignored in unsafe-math mode).
// We also try to create v2f32 min/max nodes, which we later widen to v4f32.
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
VT != MVT::f80 && VT != MVT::f128 &&
(TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
(Subtarget.hasSSE2() ||
(Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
// Check for x CC y ? x : y.
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
switch (CC) {
default: break;
case ISD::SETULT:
// Converting this to a min would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETOLE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETULE:
// Converting this to a min would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
Opcode = X86ISD::FMIN;
break;
case ISD::SETOGE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGT:
// Converting this to a max would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGE:
// Converting this to a max would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
Opcode = X86ISD::FMAX;
break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
DAG.isEqualTo(RHS, Cond.getOperand(0))) {
switch (CC) {
default: break;
case ISD::SETOGE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGT:
// Converting this to a min would handle NaNs incorrectly.
if (!DAG.getTarget().Options.UnsafeFPMath &&
(!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGE:
// Converting this to a min would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
Opcode = X86ISD::FMIN;
break;
case ISD::SETULT:
// Converting this to a max would handle NaNs incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETOLE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETULE:
// Converting this to a max would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
Opcode = X86ISD::FMAX;
break;
}
}
if (Opcode)
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
// lowering on KNL. In this case we convert it to
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
// The same situation for all 128 and 256-bit vectors of i8 and i16.
// Since SKX these selects have a proper lowering.
if (Subtarget.hasAVX512() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1 &&
(VT.is128BitVector() || VT.is256BitVector()) &&
(VT.getVectorElementType() == MVT::i8 ||
VT.getVectorElementType() == MVT::i16) &&
!(Subtarget.hasBWI() && Subtarget.hasVLX())) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
DCI.AddToWorklist(Cond.getNode());
return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS);
}
if (SDValue V = combineSelectOfTwoConstants(N, DAG))
return V;
// Canonicalize max and min:
// (x > y) ? x : y -> (x >= y) ? x : y
// (x < y) ? x : y -> (x <= y) ? x : y
// This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates
// the need for an extra compare
// against zero. e.g.
// (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0
// subl %esi, %edi
// testl %edi, %edi
// movl $0, %eax
// cmovgl %edi, %eax
// =>
// xorl %eax, %eax
// subl %esi, $edi
// cmovsl %eax, %edi
if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
switch (CC) {
default: break;
case ISD::SETLT:
case ISD::SETGT: {
ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE;
Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(),
Cond.getOperand(0), Cond.getOperand(1), NewCC);
return DAG.getSelect(DL, VT, Cond, LHS, RHS);
}
}
}
// Early exit check
if (!TLI.isTypeLegal(VT))
return SDValue();
// Match VSELECTs into subs with unsigned saturation.
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
// psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
(Subtarget.hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
// Check if one of the arms of the VSELECT is a zero vector. If it's on the
// left side invert the predicate to simplify logic below.
SDValue Other;
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
Other = RHS;
CC = ISD::getSetCCInverse(CC, true);
} else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
Other = LHS;
}
if (Other.getNode() && Other->getNumOperands() == 2 &&
DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
SDValue CondRHS = Cond->getOperand(1);
// Look for a general sub with unsigned saturation first.
// x >= y ? x-y : 0 --> subus x, y
// x > y ? x-y : 0 --> subus x, y
if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
if (auto *CondRHSBV = dyn_cast<BuildVectorSDNode>(CondRHS))
if (auto *CondRHSConst = CondRHSBV->getConstantSplatNode())
// If the RHS is a constant we have to reverse the const
// canonicalization.
// x > C-1 ? x+-C : 0 --> subus x, C
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
CondRHSConst->getAPIntValue() ==
(-OpRHSConst->getAPIntValue() - 1))
return DAG.getNode(
X86ISD::SUBUS, DL, VT, OpLHS,
DAG.getConstant(-OpRHSConst->getAPIntValue(), DL, VT));
// Another special case: If C was a sign bit, the sub has been
// canonicalized into a xor.
// FIXME: Would it be better to use computeKnownBits to determine
// whether it's safe to decanonicalize the xor?
// x s< 0 ? x^C : 0 --> subus x, C
if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
OpRHSConst->getAPIntValue().isSignMask())
// Note that we have to rebuild the RHS constant here to ensure we
// don't rely on particular values of undef lanes.
return DAG.getNode(
X86ISD::SUBUS, DL, VT, OpLHS,
DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT));
}
}
}
if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
return V;
// If this is a *dynamic* select (non-constant condition) and we can match
// this node with one of the variable blend instructions, restructure the
// condition so that blends can use the high (sign) bit of each element and
// use SimplifyDemandedBits to simplify the condition operand.
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() &&
!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
unsigned BitWidth = Cond.getScalarValueSizeInBits();
// Don't optimize vector selects that map to mask-registers.
if (BitWidth == 1)
return SDValue();
// We can only handle the cases where VSELECT is directly legal on the
// subtarget. We custom lower VSELECT nodes with constant conditions and
// this makes it hard to see whether a dynamic VSELECT will correctly
// lower, so we both check the operation's status and explicitly handle the
// cases where a *dynamic* blend will fail even though a constant-condition
// blend could be custom lowered.
// FIXME: We should find a better way to handle this class of problems.
// Potentially, we should combine constant-condition vselect nodes
// pre-legalization into shuffles and not mark as many types as custom
// lowered.
if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
return SDValue();
// FIXME: We don't support i16-element blends currently. We could and
// should support them by making *all* the bits in the condition be set
// rather than just the high bit and using an i8-element blend.
if (VT.getVectorElementType() == MVT::i16)
return SDValue();
// Dynamic blending was only available from SSE4.1 onward.
if (VT.is128BitVector() && !Subtarget.hasSSE41())
return SDValue();
// Byte blends are only available in AVX2
if (VT == MVT::v32i8 && !Subtarget.hasAVX2())
return SDValue();
+ // There are no 512-bit blend instructions that use sign bits.
+ if (VT.is512BitVector())
+ return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask(APInt::getSignMask(BitWidth));
KnownBits Known;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) ||
TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) {
// If we changed the computation somewhere in the DAG, this change will
// affect all users of Cond. Make sure it is fine and update all the nodes
// so that we do not use the generic VSELECT anymore. Otherwise, we may
// perform wrong optimizations as we messed with the actual expectation
// for the vector boolean values.
if (Cond != TLO.Old) {
// Check all uses of the condition operand to check whether it will be
// consumed by non-BLEND instructions. Those may require that all bits
// are set properly.
for (SDNode *U : Cond->uses()) {
// TODO: Add other opcodes eventually lowered into BLEND.
if (U->getOpcode() != ISD::VSELECT)
return SDValue();
}
// Update all users of the condition before committing the change, so
// that the VSELECT optimizations that expect the correct vector boolean
// value will not be triggered.
for (SDNode *U : Cond->uses()) {
SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U),
U->getValueType(0), Cond, U->getOperand(1),
U->getOperand(2));
DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
}
DCI.CommitTargetLoweringOpt(TLO);
return SDValue();
}
// Only Cond (rather than other nodes in the computation chain) was
// changed. Change the condition just for N to keep the opportunity to
// optimize all other users their own way.
SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, DL, VT, TLO.New, LHS, RHS);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), SB);
return SDValue();
}
}
// Look for vselects with LHS/RHS being bitcasted from an operation that
// can be executed on another type. Push the bitcast to the inputs of
// the operation. This exposes opportunities for using masking instructions.
if (N->getOpcode() == ISD::VSELECT && DCI.isAfterLegalizeVectorOps() &&
CondVT.getVectorElementType() == MVT::i1) {
if (combineBitcastForMaskedOp(LHS, DAG, DCI))
return SDValue(N, 0);
if (combineBitcastForMaskedOp(RHS, DAG, DCI))
return SDValue(N, 0);
}
// Custom action for SELECT MMX
if (VT == MVT::x86mmx) {
LHS = DAG.getBitcast(MVT::i64, LHS);
RHS = DAG.getBitcast(MVT::i64, RHS);
SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::i64, Cond, LHS, RHS);
return DAG.getBitcast(VT, newSelect);
}
return SDValue();
}
/// Combine:
/// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S)
/// to:
/// (brcond/cmov/setcc .., (LADD x, 1), COND_LE)
/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
/// Note that this is only legal for some op/cc combinations.
static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
SelectionDAG &DAG) {
// This combine only operates on CMP-like nodes.
if (!(Cmp.getOpcode() == X86ISD::CMP ||
(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
return SDValue();
// Can't replace the cmp if it has more uses than the one we're looking at.
// FIXME: We would like to be able to handle this, but would need to make sure
// all uses were updated.
if (!Cmp.hasOneUse())
return SDValue();
// This only applies to variations of the common case:
// (icmp slt x, 0) -> (icmp sle (add x, 1), 0)
// (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
// (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
// (icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
// Using the proper condcodes (see below), overflow is checked for.
// FIXME: We can generalize both constraints:
// - XOR/OR/AND (if they were made to survive AtomicExpand)
// - LHS != 1
// if the result is compared.
SDValue CmpLHS = Cmp.getOperand(0);
SDValue CmpRHS = Cmp.getOperand(1);
if (!CmpLHS.hasOneUse())
return SDValue();
auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
if (!CmpRHSC || CmpRHSC->getZExtValue() != 0)
return SDValue();
const unsigned Opc = CmpLHS.getOpcode();
if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB)
return SDValue();
SDValue OpRHS = CmpLHS.getOperand(2);
auto *OpRHSC = dyn_cast<ConstantSDNode>(OpRHS);
if (!OpRHSC)
return SDValue();
APInt Addend = OpRHSC->getAPIntValue();
if (Opc == ISD::ATOMIC_LOAD_SUB)
Addend = -Addend;
if (CC == X86::COND_S && Addend == 1)
CC = X86::COND_LE;
else if (CC == X86::COND_NS && Addend == 1)
CC = X86::COND_G;
else if (CC == X86::COND_G && Addend == -1)
CC = X86::COND_GE;
else if (CC == X86::COND_LE && Addend == -1)
CC = X86::COND_L;
else
return SDValue();
SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
return LockOp;
}
// Check whether a boolean test is testing a boolean value generated by
// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
// code.
//
// Simplify the following patterns:
// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
// to (Op EFLAGS Cond)
//
// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
// to (Op EFLAGS !Cond)
//
// where Op could be BRCOND or CMOV.
//
static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
// This combine only operates on CMP-like nodes.
if (!(Cmp.getOpcode() == X86ISD::CMP ||
(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
return SDValue();
// Quit if not used as a boolean value.
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
// Check CMP operands. One of them should be 0 or 1 and the other should be
// an SetCC or extended from it.
SDValue Op1 = Cmp.getOperand(0);
SDValue Op2 = Cmp.getOperand(1);
SDValue SetCC;
const ConstantSDNode* C = nullptr;
bool needOppositeCond = (CC == X86::COND_E);
bool checkAgainstTrue = false; // Is it a comparison against 1?
if ((C = dyn_cast<ConstantSDNode>(Op1)))
SetCC = Op2;
else if ((C = dyn_cast<ConstantSDNode>(Op2)))
SetCC = Op1;
else // Quit if all operands are not constants.
return SDValue();
if (C->getZExtValue() == 1) {
needOppositeCond = !needOppositeCond;
checkAgainstTrue = true;
} else if (C->getZExtValue() != 0)
// Quit if the constant is neither 0 or 1.
return SDValue();
bool truncatedToBoolWithAnd = false;
// Skip (zext $x), (trunc $x), or (and $x, 1) node.
while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
SetCC.getOpcode() == ISD::TRUNCATE ||
SetCC.getOpcode() == ISD::AND) {
if (SetCC.getOpcode() == ISD::AND) {
int OpIdx = -1;
if (isOneConstant(SetCC.getOperand(0)))
OpIdx = 1;
if (isOneConstant(SetCC.getOperand(1)))
OpIdx = 0;
if (OpIdx < 0)
break;
SetCC = SetCC.getOperand(OpIdx);
truncatedToBoolWithAnd = true;
} else
SetCC = SetCC.getOperand(0);
}
switch (SetCC.getOpcode()) {
case X86ISD::SETCC_CARRY:
// Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
// simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
// i.e. it's a comparison against true but the result of SETCC_CARRY is not
// truncated to i1 using 'and'.
if (checkAgainstTrue && !truncatedToBoolWithAnd)
break;
assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
"Invalid use of SETCC_CARRY!");
LLVM_FALLTHROUGH;
case X86ISD::SETCC:
// Set the condition code or opposite one if necessary.
CC = X86::CondCode(SetCC.getConstantOperandVal(0));
if (needOppositeCond)
CC = X86::GetOppositeBranchCondition(CC);
return SetCC.getOperand(1);
case X86ISD::CMOV: {
// Check whether false/true value has canonical one, i.e. 0 or 1.
ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0));
ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1));
// Quit if true value is not a constant.
if (!TVal)
return SDValue();
// Quit if false value is not a constant.
if (!FVal) {
SDValue Op = SetCC.getOperand(0);
// Skip 'zext' or 'trunc' node.
if (Op.getOpcode() == ISD::ZERO_EXTEND ||
Op.getOpcode() == ISD::TRUNCATE)
Op = Op.getOperand(0);
// A special case for rdrand/rdseed, where 0 is set if false cond is
// found.
if ((Op.getOpcode() != X86ISD::RDRAND &&
Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
return SDValue();
}
// Quit if false value is not the constant 0 or 1.
bool FValIsFalse = true;
if (FVal && FVal->getZExtValue() != 0) {
if (FVal->getZExtValue() != 1)
return SDValue();
// If FVal is 1, opposite cond is needed.
needOppositeCond = !needOppositeCond;
FValIsFalse = false;
}
// Quit if TVal is not the constant opposite of FVal.
if (FValIsFalse && TVal->getZExtValue() != 1)
return SDValue();
if (!FValIsFalse && TVal->getZExtValue() != 0)
return SDValue();
CC = X86::CondCode(SetCC.getConstantOperandVal(2));
if (needOppositeCond)
CC = X86::GetOppositeBranchCondition(CC);
return SetCC.getOperand(3);
}
}
return SDValue();
}
/// Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS.
/// Match:
/// (X86or (X86setcc) (X86setcc))
/// (X86cmp (and (X86setcc) (X86setcc)), 0)
static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
X86::CondCode &CC1, SDValue &Flags,
bool &isAnd) {
if (Cond->getOpcode() == X86ISD::CMP) {
if (!isNullConstant(Cond->getOperand(1)))
return false;
Cond = Cond->getOperand(0);
}
isAnd = false;
SDValue SetCC0, SetCC1;
switch (Cond->getOpcode()) {
default: return false;
case ISD::AND:
case X86ISD::AND:
isAnd = true;
LLVM_FALLTHROUGH;
case ISD::OR:
case X86ISD::OR:
SetCC0 = Cond->getOperand(0);
SetCC1 = Cond->getOperand(1);
break;
};
// Make sure we have SETCC nodes, using the same flags value.
if (SetCC0.getOpcode() != X86ISD::SETCC ||
SetCC1.getOpcode() != X86ISD::SETCC ||
SetCC0->getOperand(1) != SetCC1->getOperand(1))
return false;
CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0);
CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0);
Flags = SetCC0->getOperand(1);
return true;
}
/// Optimize an EFLAGS definition used according to the condition code \p CC
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
/// uses of chain values.
static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
SelectionDAG &DAG) {
if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
return R;
return combineSetCCAtomicArith(EFLAGS, CC, DAG);
}
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
// If the flag operand isn't dead, don't touch this CMOV.
if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
return SDValue();
SDValue FalseOp = N->getOperand(0);
SDValue TrueOp = N->getOperand(1);
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
SDValue Cond = N->getOperand(3);
if (CC == X86::COND_E || CC == X86::COND_NE) {
switch (Cond.getOpcode()) {
default: break;
case X86ISD::BSR:
case X86ISD::BSF:
// If operand of BSR / BSF are proven never zero, then ZF cannot be set.
if (DAG.isKnownNeverZero(Cond.getOperand(0)))
return (CC == X86::COND_E) ? FalseOp : TrueOp;
}
}
// Try to simplify the EFLAGS and condition code operands.
// We can't always do this as FCMOV only supports a subset of X86 cond.
if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) {
if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
Flags};
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
}
}
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
// Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
// larger than FalseC (the false value).
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueC, FalseC);
std::swap(TrueOp, FalseOp);
}
// Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
// This is efficient for any integer data type (including i8/i16) and
// shift amount.
if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
Cond = getSETCC(CC, Cond, DL, DAG);
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
DAG.getConstant(ShAmt, DL, MVT::i8));
if (N->getNumValues() == 2) // Dead flag value?
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
// for any integer data type, including i8/i16.
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
Cond = getSETCC(CC, Cond, DL, DAG);
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
FalseC->getValueType(0), Cond);
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
if (N->getNumValues() == 2) // Dead flag value?
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
bool isFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
default: break;
case 1: // result = add base, cond
case 2: // result = lea base( , cond*2)
case 3: // result = lea base(cond, cond*2)
case 4: // result = lea base( , cond*4)
case 5: // result = lea base(cond, cond*4)
case 8: // result = lea base( , cond*8)
case 9: // result = lea base(cond, cond*8)
isFastMultiplier = true;
break;
}
}
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
Cond = getSETCC(CC, Cond, DL ,DAG);
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
Cond);
// Scale the condition by the difference.
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
DAG.getConstant(Diff, DL, Cond.getValueType()));
// Add the base if non-zero.
if (FalseC->getAPIntValue() != 0)
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
if (N->getNumValues() == 2) // Dead flag value?
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
}
}
}
// Handle these cases:
// (select (x != c), e, c) -> select (x != c), e, x),
// (select (x == c), c, e) -> select (x == c), x, e)
// where the c is an integer constant, and the "select" is the combination
// of CMOV and CMP.
//
// The rationale for this change is that the conditional-move from a constant
// needs two instructions, however, conditional-move from a register needs
// only one instruction.
//
// CAVEAT: By replacing a constant with a symbolic value, it may obscure
// some instruction-combining opportunities. This opt needs to be
// postponed as late as possible.
//
if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) {
// the DCI.xxxx conditions are provided to postpone the optimization as
// late as possible.
ConstantSDNode *CmpAgainst = nullptr;
if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
(CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
!isa<ConstantSDNode>(Cond.getOperand(0))) {
if (CC == X86::COND_NE &&
CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueOp, FalseOp);
}
if (CC == X86::COND_E &&
CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
SDValue Ops[] = { FalseOp, Cond.getOperand(0),
DAG.getConstant(CC, DL, MVT::i8), Cond };
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops);
}
}
}
// Fold and/or of setcc's to double CMOV:
// (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)
// (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)
//
// This combine lets us generate:
// cmovcc1 (jcc1 if we don't have CMOV)
// cmovcc2 (same)
// instead of:
// setcc1
// setcc2
// and/or
// cmovne (jne if we don't have CMOV)
// When we can't use the CMOV instruction, it might increase branch
// mispredicts.
// When we can use CMOV, or when there is no mispredict, this improves
// throughput and reduces register pressure.
//
if (CC == X86::COND_NE) {
SDValue Flags;
X86::CondCode CC0, CC1;
bool isAndSetCC;
if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) {
if (isAndSetCC) {
std::swap(FalseOp, TrueOp);
CC0 = X86::GetOppositeBranchCondition(CC0);
CC1 = X86::GetOppositeBranchCondition(CC1);
}
SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8),
Flags};
SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), LOps);
SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags};
SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(CMOV.getNode(), 1));
return CMOV;
}
}
return SDValue();
}
/// Different mul shrinking modes.
enum ShrinkMode { MULS8, MULU8, MULS16, MULU16 };
static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) {
EVT VT = N->getOperand(0).getValueType();
if (VT.getScalarSizeInBits() != 32)
return false;
assert(N->getNumOperands() == 2 && "NumOperands of Mul are 2");
unsigned SignBits[2] = {1, 1};
bool IsPositive[2] = {false, false};
for (unsigned i = 0; i < 2; i++) {
SDValue Opd = N->getOperand(i);
// DAG.ComputeNumSignBits return 1 for ISD::ANY_EXTEND, so we need to
// compute signbits for it separately.
if (Opd.getOpcode() == ISD::ANY_EXTEND) {
// For anyextend, it is safe to assume an appropriate number of leading
// sign/zero bits.
if (Opd.getOperand(0).getValueType().getVectorElementType() == MVT::i8)
SignBits[i] = 25;
else if (Opd.getOperand(0).getValueType().getVectorElementType() ==
MVT::i16)
SignBits[i] = 17;
else
return false;
IsPositive[i] = true;
} else if (Opd.getOpcode() == ISD::BUILD_VECTOR) {
// All the operands of BUILD_VECTOR need to be int constant.
// Find the smallest value range which all the operands belong to.
SignBits[i] = 32;
IsPositive[i] = true;
for (const SDValue &SubOp : Opd.getNode()->op_values()) {
if (SubOp.isUndef())
continue;
auto *CN = dyn_cast<ConstantSDNode>(SubOp);
if (!CN)
return false;
APInt IntVal = CN->getAPIntValue();
if (IntVal.isNegative())
IsPositive[i] = false;
SignBits[i] = std::min(SignBits[i], IntVal.getNumSignBits());
}
} else {
SignBits[i] = DAG.ComputeNumSignBits(Opd);
if (Opd.getOpcode() == ISD::ZERO_EXTEND)
IsPositive[i] = true;
}
}
bool AllPositive = IsPositive[0] && IsPositive[1];
unsigned MinSignBits = std::min(SignBits[0], SignBits[1]);
// When ranges are from -128 ~ 127, use MULS8 mode.
if (MinSignBits >= 25)
Mode = MULS8;
// When ranges are from 0 ~ 255, use MULU8 mode.
else if (AllPositive && MinSignBits >= 24)
Mode = MULU8;
// When ranges are from -32768 ~ 32767, use MULS16 mode.
else if (MinSignBits >= 17)
Mode = MULS16;
// When ranges are from 0 ~ 65535, use MULU16 mode.
else if (AllPositive && MinSignBits >= 16)
Mode = MULU16;
else
return false;
return true;
}
/// When the operands of vector mul are extended from smaller size values,
/// like i8 and i16, the type of mul may be shrinked to generate more
/// efficient code. Two typical patterns are handled:
/// Pattern1:
/// %2 = sext/zext <N x i8> %1 to <N x i32>
/// %4 = sext/zext <N x i8> %3 to <N x i32>
// or %4 = build_vector <N x i32> %C1, ..., %CN (%C1..%CN are constants)
/// %5 = mul <N x i32> %2, %4
///
/// Pattern2:
/// %2 = zext/sext <N x i16> %1 to <N x i32>
/// %4 = zext/sext <N x i16> %3 to <N x i32>
/// or %4 = build_vector <N x i32> %C1, ..., %CN (%C1..%CN are constants)
/// %5 = mul <N x i32> %2, %4
///
/// There are four mul shrinking modes:
/// If %2 == sext32(trunc8(%2)), i.e., the scalar value range of %2 is
/// -128 to 128, and the scalar value range of %4 is also -128 to 128,
/// generate pmullw+sext32 for it (MULS8 mode).
/// If %2 == zext32(trunc8(%2)), i.e., the scalar value range of %2 is
/// 0 to 255, and the scalar value range of %4 is also 0 to 255,
/// generate pmullw+zext32 for it (MULU8 mode).
/// If %2 == sext32(trunc16(%2)), i.e., the scalar value range of %2 is
/// -32768 to 32767, and the scalar value range of %4 is also -32768 to 32767,
/// generate pmullw+pmulhw for it (MULS16 mode).
/// If %2 == zext32(trunc16(%2)), i.e., the scalar value range of %2 is
/// 0 to 65535, and the scalar value range of %4 is also 0 to 65535,
/// generate pmullw+pmulhuw for it (MULU16 mode).
static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// Check for legality
// pmullw/pmulhw are not supported by SSE.
if (!Subtarget.hasSSE2())
return SDValue();
// Check for profitability
// pmulld is supported since SSE41. It is better to use pmulld
// instead of pmullw+pmulhw, except for subtargets where pmulld is slower than
// the expansion.
bool OptForMinSize = DAG.getMachineFunction().getFunction()->optForMinSize();
if (Subtarget.hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow()))
return SDValue();
ShrinkMode Mode;
if (!canReduceVMulWidth(N, DAG, Mode))
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getOperand(0).getValueType();
unsigned RegSize = 128;
MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16);
EVT ReducedVT =
EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements());
// Shrink the operands of mul.
SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0);
SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1);
if (VT.getVectorNumElements() >= OpsVT.getVectorNumElements()) {
// Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
// lower part is needed.
SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
if (Mode == MULU8 || Mode == MULS8) {
return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
DL, VT, MulLo);
} else {
MVT ResVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
// Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
// the higher part is also needed.
SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
ReducedVT, NewN0, NewN1);
// Repack the lower part and higher part result of mul into a wider
// result.
// Generate shuffle functioning as punpcklwd.
SmallVector<int, 16> ShuffleMask(VT.getVectorNumElements());
for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++) {
ShuffleMask[2 * i] = i;
ShuffleMask[2 * i + 1] = i + VT.getVectorNumElements();
}
SDValue ResLo =
DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
ResLo = DAG.getNode(ISD::BITCAST, DL, ResVT, ResLo);
// Generate shuffle functioning as punpckhwd.
for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++) {
ShuffleMask[2 * i] = i + VT.getVectorNumElements() / 2;
ShuffleMask[2 * i + 1] = i + VT.getVectorNumElements() * 3 / 2;
}
SDValue ResHi =
DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
ResHi = DAG.getNode(ISD::BITCAST, DL, ResVT, ResHi);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
}
} else {
// When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want
// to legalize the mul explicitly because implicit legalization for type
// <4 x i16> to <4 x i32> sometimes involves unnecessary unpack
// instructions which will not exist when we explicitly legalize it by
// extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with
// <4 x i16> undef).
//
// Legalize the operands of mul.
// FIXME: We may be able to handle non-concatenated vectors by insertion.
unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
if ((RegSize % ReducedSizeInBits) != 0)
return SDValue();
SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
DAG.getUNDEF(ReducedVT));
Ops[0] = NewN0;
NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
Ops[0] = NewN1;
NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
if (Mode == MULU8 || Mode == MULS8) {
// Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower
// part is needed.
SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
// convert the type of mul result to VT.
MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG
: ISD::SIGN_EXTEND_VECTOR_INREG,
DL, ResVT, Mul);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
DAG.getIntPtrConstant(0, DL));
} else {
// Generate the lower and higher part of mul: pmulhw/pmulhuw. For
// MULU16/MULS16, both parts are needed.
SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
OpsVT, NewN0, NewN1);
// Repack the lower part and higher part result of mul into a wider
// result. Make sure the type of mul result is VT.
MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
SDValue Res = DAG.getNode(X86ISD::UNPCKL, DL, OpsVT, MulLo, MulHi);
Res = DAG.getNode(ISD::BITCAST, DL, ResVT, Res);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
DAG.getIntPtrConstant(0, DL));
}
}
}
static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
EVT VT, SDLoc DL) {
auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) {
SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
DAG.getConstant(Mult, DL, VT));
Result = DAG.getNode(ISD::SHL, DL, VT, Result,
DAG.getConstant(Shift, DL, MVT::i8));
Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,
N->getOperand(0));
return Result;
};
auto combineMulMulAddOrSub = [&](bool isAdd) {
SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
DAG.getConstant(9, DL, VT));
Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT));
Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,
N->getOperand(0));
return Result;
};
switch (MulAmt) {
default:
break;
case 11:
// mul x, 11 => add ((shl (mul x, 5), 1), x)
return combineMulShlAddOrSub(5, 1, /*isAdd*/ true);
case 21:
// mul x, 21 => add ((shl (mul x, 5), 2), x)
return combineMulShlAddOrSub(5, 2, /*isAdd*/ true);
case 22:
// mul x, 22 => add (add ((shl (mul x, 5), 2), x), x)
return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
combineMulShlAddOrSub(5, 2, /*isAdd*/ true));
case 19:
// mul x, 19 => sub ((shl (mul x, 5), 2), x)
return combineMulShlAddOrSub(5, 2, /*isAdd*/ false);
case 13:
// mul x, 13 => add ((shl (mul x, 3), 2), x)
return combineMulShlAddOrSub(3, 2, /*isAdd*/ true);
case 23:
// mul x, 13 => sub ((shl (mul x, 3), 3), x)
return combineMulShlAddOrSub(3, 3, /*isAdd*/ false);
case 14:
// mul x, 14 => add (add ((shl (mul x, 3), 2), x), x)
return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
combineMulShlAddOrSub(3, 2, /*isAdd*/ true));
case 26:
// mul x, 26 => sub ((mul (mul x, 9), 3), x)
return combineMulMulAddOrSub(/*isAdd*/ false);
case 28:
// mul x, 28 => add ((mul (mul x, 9), 3), x)
return combineMulMulAddOrSub(/*isAdd*/ true);
case 29:
// mul x, 29 => add (add ((mul (mul x, 9), 3), x), x)
return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
combineMulMulAddOrSub(/*isAdd*/ true));
case 30:
// mul x, 30 => sub (sub ((shl x, 5), x), x)
return DAG.getNode(
ISD::SUB, DL, VT,
DAG.getNode(ISD::SUB, DL, VT,
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(5, DL, MVT::i8)),
N->getOperand(0)),
N->getOperand(0));
}
return SDValue();
}
/// Optimize a single multiply with constant into two operations in order to
/// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalize() && VT.isVector())
return reduceVMULWidth(N, DAG, Subtarget);
if (!MulConstantOptimization)
return SDValue();
// An imul is usually smaller than the alternative sequence.
if (DAG.getMachineFunction().getFunction()->optForMinSize())
return SDValue();
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
if (VT != MVT::i64 && VT != MVT::i32)
return SDValue();
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
uint64_t MulAmt = C->getZExtValue();
if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
return SDValue();
uint64_t MulAmt1 = 0;
uint64_t MulAmt2 = 0;
if ((MulAmt % 9) == 0) {
MulAmt1 = 9;
MulAmt2 = MulAmt / 9;
} else if ((MulAmt % 5) == 0) {
MulAmt1 = 5;
MulAmt2 = MulAmt / 5;
} else if ((MulAmt % 3) == 0) {
MulAmt1 = 3;
MulAmt2 = MulAmt / 3;
}
SDLoc DL(N);
SDValue NewMul;
if (MulAmt2 &&
(isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
if (isPowerOf2_64(MulAmt2) &&
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
// If second multiplifer is pow2, issue it first. We want the multiply by
// 3, 5, or 9 to be folded into the addressing mode unless the lone use
// is an add.
std::swap(MulAmt1, MulAmt2);
if (isPowerOf2_64(MulAmt1))
NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8));
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
DAG.getConstant(MulAmt1, DL, VT));
if (isPowerOf2_64(MulAmt2))
NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8));
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
} else if (!Subtarget.slowLEA())
NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
if (!NewMul) {
assert(MulAmt != 0 &&
MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
"Both cases that could cause potential overflows should have "
"already been handled.");
int64_t SignMulAmt = C->getSExtValue();
if ((SignMulAmt != INT64_MIN) && (SignMulAmt != INT64_MAX) &&
(SignMulAmt != -INT64_MAX)) {
int NumSign = SignMulAmt > 0 ? 1 : -1;
bool IsPowerOf2_64PlusOne = isPowerOf2_64(NumSign * SignMulAmt - 1);
bool IsPowerOf2_64MinusOne = isPowerOf2_64(NumSign * SignMulAmt + 1);
if (IsPowerOf2_64PlusOne) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
NewMul = DAG.getNode(
ISD::ADD, DL, VT, N->getOperand(0),
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(Log2_64(NumSign * SignMulAmt - 1), DL,
MVT::i8)));
} else if (IsPowerOf2_64MinusOne) {
// (mul x, 2^N - 1) => (sub (shl x, N), x)
NewMul = DAG.getNode(
ISD::SUB, DL, VT,
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(Log2_64(NumSign * SignMulAmt + 1), DL,
MVT::i8)),
N->getOperand(0));
}
// To negate, subtract the number from zero
if ((IsPowerOf2_64PlusOne || IsPowerOf2_64MinusOne) && NumSign == -1)
NewMul =
DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
}
}
if (NewMul)
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, NewMul, false);
return SDValue();
}
static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
// since the result of setcc_c is all zero's or all ones.
if (VT.isInteger() && !VT.isVector() &&
N1C && N0.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue N00 = N0.getOperand(0);
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask <<= N1C->getAPIntValue();
bool MaskOK = false;
// We can handle cases concerning bit-widening nodes containing setcc_c if
// we carefully interrogate the mask to make sure we are semantics
// preserving.
// The transform is not safe if the result of C1 << C2 exceeds the bitwidth
// of the underlying setcc_c operation if the setcc_c was zero extended.
// Consider the following example:
// zext(setcc_c) -> i32 0x0000FFFF
// c1 -> i32 0x0000FFFF
// c2 -> i32 0x00000001
// (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE
// (and setcc_c, (c1 << c2)) -> i32 0x0000FFFE
if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
MaskOK = true;
} else if (N00.getOpcode() == ISD::SIGN_EXTEND &&
N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
MaskOK = true;
} else if ((N00.getOpcode() == ISD::ZERO_EXTEND ||
N00.getOpcode() == ISD::ANY_EXTEND) &&
N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits());
}
if (MaskOK && Mask != 0) {
SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT));
}
}
// Hardware support for vector shifts is sparse which makes us scalarize the
// vector operations in many cases. Also, on sandybridge ADD is faster than
// shl.
// (shl V, 1) -> add V,V
if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
if (auto *N1SplatC = N1BV->getConstantSplatNode()) {
assert(N0.getValueType().isVector() && "Invalid vector shift type");
// We shift all of the values by one. In many cases we do not have
// hardware support for this operation. This is better expressed as an ADD
// of two values.
if (N1SplatC->getAPIntValue() == 1)
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
}
return SDValue();
}
static SDValue combineShiftRightAlgebraic(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
unsigned Size = VT.getSizeInBits();
// fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
// into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
// into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
// depending on sign of (SarConst - [56,48,32,24,16])
// sexts in X86 are MOVs. The MOVs have the same code size
// as above SHIFTs (only SHIFT on 1 has lower code size).
// However the MOVs have 2 advantages to a SHIFT:
// 1. MOVs can write to a register that differs from source
// 2. MOVs accept memory operands
if (!VT.isInteger() || VT.isVector() || N1.getOpcode() != ISD::Constant ||
N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
N0.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue();
APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue();
EVT CVT = N1.getValueType();
if (SarConst.isNegative())
return SDValue();
for (MVT SVT : MVT::integer_valuetypes()) {
unsigned ShiftSize = SVT.getSizeInBits();
// skipping types without corresponding sext/zext and
// ShlConst that is not one of [56,48,32,24,16]
if (ShiftSize < 8 || ShiftSize > 64 || ShlConst != Size - ShiftSize)
continue;
SDLoc DL(N);
SDValue NN =
DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT));
SarConst = SarConst - (Size - ShiftSize);
if (SarConst == 0)
return NN;
else if (SarConst.isNegative())
return DAG.getNode(ISD::SHL, DL, VT, NN,
DAG.getConstant(-SarConst, DL, CVT));
else
return DAG.getNode(ISD::SRA, DL, VT, NN,
DAG.getConstant(SarConst, DL, CVT));
}
return SDValue();
}
/// \brief Returns a vector of 0s if the node in input is a vector logical
/// shift by a constant amount which is known to be bigger than or equal
/// to the vector element size in bits.
static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
(!Subtarget.hasInt256() ||
(VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
return SDValue();
SDValue Amt = N->getOperand(1);
SDLoc DL(N);
if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
const APInt &ShiftAmt = AmtSplat->getAPIntValue();
unsigned MaxAmount =
VT.getSimpleVT().getScalarSizeInBits();
// SSE2/AVX2 logical shifts always return a vector of 0s
// if the shift amount is bigger than or equal to
// the element size. The constant shift amount will be
// encoded as a 8-bit immediate.
if (ShiftAmt.trunc(8).uge(MaxAmount))
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL);
}
return SDValue();
}
static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
if (N->getOpcode() == ISD::SHL)
if (SDValue V = combineShiftLeft(N, DAG))
return V;
if (N->getOpcode() == ISD::SRA)
if (SDValue V = combineShiftRightAlgebraic(N, DAG))
return V;
// Try to fold this logical shift into a zero vector.
if (N->getOpcode() != ISD::SRA)
if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
return V;
return SDValue();
}
static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
unsigned Opcode = N->getOpcode();
assert((X86ISD::VSHLI == Opcode || X86ISD::VSRAI == Opcode ||
X86ISD::VSRLI == Opcode) &&
"Unexpected shift opcode");
bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode;
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
"Unexpected value type");
// Out of range logical bit shifts are guaranteed to be zero.
// Out of range arithmetic bit shifts splat the sign bit.
APInt ShiftVal = cast<ConstantSDNode>(N1)->getAPIntValue();
if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) {
if (LogicalShift)
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
else
ShiftVal = NumBitsPerElt - 1;
}
// Shift N0 by zero -> N0.
if (!ShiftVal)
return N0;
// Shift zero -> zero.
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
// fold (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31).
// This VSRLI only looks at the sign bit, which is unmodified by VSRAI.
// TODO - support other sra opcodes as needed.
if (Opcode == X86ISD::VSRLI && (ShiftVal + 1) == NumBitsPerElt &&
N0.getOpcode() == X86ISD::VSRAI)
return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1);
// We can decode 'whole byte' logical bit shifts as shuffles.
if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) {
SDValue Op(N, 0);
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget))
return SDValue(); // This routine will use CombineTo to replace N.
}
// Constant Folding.
APInt UndefElts;
SmallVector<APInt, 32> EltBits;
if (N->isOnlyUserOf(N0.getNode()) &&
getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
assert(EltBits.size() == VT.getVectorNumElements() &&
"Unexpected shift value type");
unsigned ShiftImm = ShiftVal.getZExtValue();
for (APInt &Elt : EltBits) {
if (X86ISD::VSHLI == Opcode)
Elt <<= ShiftImm;
else if (X86ISD::VSRAI == Opcode)
Elt.ashrInPlace(ShiftImm);
else
Elt.lshrInPlace(ShiftImm);
}
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
}
return SDValue();
}
static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
assert(
((N->getOpcode() == X86ISD::PINSRB && N->getValueType(0) == MVT::v16i8) ||
(N->getOpcode() == X86ISD::PINSRW &&
N->getValueType(0) == MVT::v8i16)) &&
"Unexpected vector insertion");
// Attempt to combine PINSRB/PINSRW patterns to a shuffle.
SDValue Op(N, 0);
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget);
return SDValue();
}
/// Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs
/// reference the same FP CMP, and rewrite for CMPEQSS and friends. Likewise for
/// OR -> CMPNEQSS.
static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
unsigned opcode;
// SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
// we're requiring SSE2 for both.
if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CMP0 = N0->getOperand(1);
SDValue CMP1 = N1->getOperand(1);
SDLoc DL(N);
// The SETCCs should both refer to the same CMP.
if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
return SDValue();
SDValue CMP00 = CMP0->getOperand(0);
SDValue CMP01 = CMP0->getOperand(1);
EVT VT = CMP00.getValueType();
if (VT == MVT::f32 || VT == MVT::f64) {
bool ExpectingFlags = false;
// Check for any users that want flags:
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
!ExpectingFlags && UI != UE; ++UI)
switch (UI->getOpcode()) {
default:
case ISD::BR_CC:
case ISD::BRCOND:
case ISD::SELECT:
ExpectingFlags = true;
break;
case ISD::CopyToReg:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
break;
}
if (!ExpectingFlags) {
enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
X86::CondCode tmp = cc0;
cc0 = cc1;
cc1 = tmp;
}
if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
if (Subtarget.hasAVX512()) {
SDValue FSetCC =
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
DAG.getConstant(x86cc, DL, MVT::i8));
return DAG.getNode(X86ISD::VEXTRACT, DL, N->getSimpleValueType(0),
FSetCC, DAG.getIntPtrConstant(0, DL));
}
SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
CMP00.getValueType(), CMP00, CMP01,
DAG.getConstant(x86cc, DL,
MVT::i8));
bool is64BitFP = (CMP00.getValueType() == MVT::f64);
MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
if (is64BitFP && !Subtarget.is64Bit()) {
// On a 32-bit target, we cannot bitcast the 64-bit float to a
// 64-bit integer, since that's not a legal type. Since
// OnesOrZeroesF is all ones of all zeroes, we don't need all the
// bits, but can do this little dance to extract the lowest 32 bits
// and work with those going forward.
SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
OnesOrZeroesF);
SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64);
OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
Vector32, DAG.getIntPtrConstant(0, DL));
IntVT = MVT::i32;
}
SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF);
SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
DAG.getConstant(1, DL, IntVT));
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
ANDed);
return OneBitOfTruth;
}
}
}
}
return SDValue();
}
/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::AND);
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
if (VT != MVT::v2i64 && VT != MVT::v4i64 && VT != MVT::v8i64)
return SDValue();
if (N0.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
if (N1.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
return SDValue();
}
// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
// register. In most cases we actually compare or select YMM-sized registers
// and mixing the two types creates horrible code. This method optimizes
// some of the transition sequences.
static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.is256BitVector())
return SDValue();
assert((N->getOpcode() == ISD::ANY_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND ||
N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
SDValue Narrow = N->getOperand(0);
EVT NarrowVT = Narrow->getValueType(0);
if (!NarrowVT.is128BitVector())
return SDValue();
if (Narrow->getOpcode() != ISD::XOR &&
Narrow->getOpcode() != ISD::AND &&
Narrow->getOpcode() != ISD::OR)
return SDValue();
SDValue N0 = Narrow->getOperand(0);
SDValue N1 = Narrow->getOperand(1);
SDLoc DL(Narrow);
// The Left side has to be a trunc.
if (N0.getOpcode() != ISD::TRUNCATE)
return SDValue();
// The type of the truncated inputs.
EVT WideVT = N0->getOperand(0)->getValueType(0);
if (WideVT != VT)
return SDValue();
// The right side has to be a 'trunc' or a constant vector.
bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
ConstantSDNode *RHSConstSplat = nullptr;
if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
RHSConstSplat = RHSBV->getConstantSplatNode();
if (!RHSTrunc && !RHSConstSplat)
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
return SDValue();
// Set N0 and N1 to hold the inputs to the new wide operation.
N0 = N0->getOperand(0);
if (RHSConstSplat) {
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getVectorElementType(),
SDValue(RHSConstSplat, 0));
N1 = DAG.getSplatBuildVector(WideVT, DL, N1);
} else if (RHSTrunc) {
N1 = N1->getOperand(0);
}
// Generate the wide operation.
SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1);
unsigned Opcode = N->getOpcode();
switch (Opcode) {
case ISD::ANY_EXTEND:
return Op;
case ISD::ZERO_EXTEND: {
unsigned InBits = NarrowVT.getScalarSizeInBits();
APInt Mask = APInt::getAllOnesValue(InBits);
Mask = Mask.zext(VT.getScalarSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
Op, DAG.getConstant(Mask, DL, VT));
}
case ISD::SIGN_EXTEND:
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
Op, DAG.getValueType(NarrowVT));
default:
llvm_unreachable("Unexpected opcode");
}
}
/// If both input operands of a logic op are being cast from floating point
/// types, try to convert this into a floating point logic node to avoid
/// unnecessary moves from SSE to integer registers.
static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
unsigned FPOpcode = ISD::DELETED_NODE;
if (N->getOpcode() == ISD::AND)
FPOpcode = X86ISD::FAND;
else if (N->getOpcode() == ISD::OR)
FPOpcode = X86ISD::FOR;
else if (N->getOpcode() == ISD::XOR)
FPOpcode = X86ISD::FXOR;
assert(FPOpcode != ISD::DELETED_NODE &&
"Unexpected input node for FP logic conversion");
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
((Subtarget.hasSSE1() && VT == MVT::i32) ||
(Subtarget.hasSSE2() && VT == MVT::i64))) {
SDValue N00 = N0.getOperand(0);
SDValue N10 = N1.getOperand(0);
EVT N00Type = N00.getValueType();
EVT N10Type = N10.getValueType();
if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) {
SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
return DAG.getBitcast(VT, FPLogic);
}
}
return SDValue();
}
/// If this is a zero/all-bits result that is bitwise-anded with a low bits
/// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
/// with a shift-right to eliminate loading the vector constant mask value.
static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
EVT VT0 = Op0.getValueType();
EVT VT1 = Op1.getValueType();
if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger())
return SDValue();
APInt SplatVal;
- if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) ||
+ if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal,
+ /*AllowShrink*/false) ||
!SplatVal.isMask())
return SDValue();
if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
return SDValue();
unsigned EltBitWidth = VT0.getScalarSizeInBits();
if (EltBitWidth != DAG.ComputeNumSignBits(Op0))
return SDValue();
SDLoc DL(N);
unsigned ShiftVal = SplatVal.countTrailingOnes();
SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift);
}
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
return FPLogic;
if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG))
return R;
if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
return ShiftRight;
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
// Attempt to recursively combine a bitmask AND with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget))
return SDValue(); // This routine will use CombineTo to replace N.
}
// Create BEXTR instructions
// BEXTR is ((X >> imm) & (2**size-1))
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
if (!Subtarget.hasBMI() && !Subtarget.hasTBM())
return SDValue();
if (N0.getOpcode() != ISD::SRA && N0.getOpcode() != ISD::SRL)
return SDValue();
ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (MaskNode && ShiftNode) {
uint64_t Mask = MaskNode->getZExtValue();
uint64_t Shift = ShiftNode->getZExtValue();
if (isMask_64(Mask)) {
uint64_t MaskSize = countPopulation(Mask);
if (Shift + MaskSize <= VT.getSizeInBits())
return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
DAG.getConstant(Shift | (MaskSize << 8), DL,
VT));
}
}
return SDValue();
}
// Try to fold:
// (or (and (m, y), (pandn m, x)))
// into:
// (vselect m, x, y)
// As a special case, try to fold:
// (or (and (m, (sub 0, x)), (pandn m, x)))
// into:
// (sub (xor X, M), M)
static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
if (!((VT.is128BitVector() && Subtarget.hasSSE2()) ||
(VT.is256BitVector() && Subtarget.hasInt256())))
return SDValue();
// Canonicalize AND to LHS.
if (N1.getOpcode() == ISD::AND)
std::swap(N0, N1);
// TODO: Attempt to match against AND(XOR(-1,X),Y) as well, waiting for
// ANDNP combine allows other combines to happen that prevent matching.
if (N0.getOpcode() != ISD::AND || N1.getOpcode() != X86ISD::ANDNP)
return SDValue();
SDValue Mask = N1.getOperand(0);
SDValue X = N1.getOperand(1);
SDValue Y;
if (N0.getOperand(0) == Mask)
Y = N0.getOperand(1);
if (N0.getOperand(1) == Mask)
Y = N0.getOperand(0);
// Check to see if the mask appeared in both the AND and ANDNP.
if (!Y.getNode())
return SDValue();
// Validate that X, Y, and Mask are bitcasts, and see through them.
Mask = peekThroughBitcasts(Mask);
X = peekThroughBitcasts(X);
Y = peekThroughBitcasts(Y);
EVT MaskVT = Mask.getValueType();
unsigned EltBits = MaskVT.getScalarSizeInBits();
// TODO: Attempt to handle floating point cases as well?
if (!MaskVT.isInteger() || DAG.ComputeNumSignBits(Mask) != EltBits)
return SDValue();
SDLoc DL(N);
// Try to match:
// (or (and (M, (sub 0, X)), (pandn M, X)))
// which is a special case of vselect:
// (vselect M, (sub 0, X), X)
// Per:
// http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
// We know that, if fNegate is 0 or 1:
// (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
//
// Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
// ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
// ( M ? -X : X) == ((X ^ M ) + (M & 1))
// This lets us transform our vselect to:
// (add (xor X, M), (and M, 1))
// And further to:
// (sub (xor X, M), M)
if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT &&
DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) {
auto IsNegV = [](SDNode *N, SDValue V) {
return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
};
SDValue V;
if (IsNegV(Y.getNode(), X))
V = X;
else if (IsNegV(X.getNode(), Y))
V = Y;
if (V) {
SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
SDValue SubOp2 = Mask;
// If the negate was on the false side of the select, then
// the operands of the SUB need to be swapped. PR 27251.
// This is because the pattern being matched above is
// (vselect M, (sub (0, X), X) -> (sub (xor X, M), M)
// but if the pattern matched was
// (vselect M, X, (sub (0, X))), that is really negation of the pattern
// above, -(vselect M, (sub 0, X), X), and therefore the replacement
// pattern also needs to be a negation of the replacement pattern above.
// And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
// sub accomplishes the negation of the replacement pattern.
if (V == Y)
std::swap(SubOp1, SubOp2);
SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
return DAG.getBitcast(VT, Res);
}
}
// PBLENDVB is only available on SSE 4.1.
if (!Subtarget.hasSSE41())
return SDValue();
MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
X = DAG.getBitcast(BlendVT, X);
Y = DAG.getBitcast(BlendVT, Y);
Mask = DAG.getBitcast(BlendVT, Mask);
Mask = DAG.getSelect(DL, BlendVT, Mask, Y, X);
return DAG.getBitcast(VT, Mask);
}
// Helper function for combineOrCmpEqZeroToCtlzSrl
// Transforms:
// seteq(cmp x, 0)
// into:
// srl(ctlz x), log2(bitsize(x))
// Input pattern is checked by caller.
static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
SelectionDAG &DAG) {
SDValue Cmp = Op.getOperand(1);
EVT VT = Cmp.getOperand(0).getValueType();
unsigned Log2b = Log2_32(VT.getSizeInBits());
SDLoc dl(Op);
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0));
// The result of the shift is true or false, and on X86, the 32-bit
// encoding of shr and lzcnt is more desirable.
SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
DAG.getConstant(Log2b, dl, VT));
return DAG.getZExtOrTrunc(Scc, dl, ExtTy);
}
// Try to transform:
// zext(or(setcc(eq, (cmp x, 0)), setcc(eq, (cmp y, 0))))
// into:
// srl(or(ctlz(x), ctlz(y)), log2(bitsize(x))
// Will also attempt to match more generic cases, eg:
// zext(or(or(setcc(eq, cmp 0), setcc(eq, cmp 0)), setcc(eq, cmp 0)))
// Only applies if the target supports the FastLZCNT feature.
static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast())
return SDValue();
auto isORCandidate = [](SDValue N) {
return (N->getOpcode() == ISD::OR && N->hasOneUse());
};
// Check the zero extend is extending to 32-bit or more. The code generated by
// srl(ctlz) for 16-bit or less variants of the pattern would require extra
// instructions to clear the upper bits.
if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) ||
!isORCandidate(N->getOperand(0)))
return SDValue();
// Check the node matches: setcc(eq, cmp 0)
auto isSetCCCandidate = [](SDValue N) {
return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() &&
X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E &&
N->getOperand(1).getOpcode() == X86ISD::CMP &&
isNullConstant(N->getOperand(1).getOperand(1)) &&
N->getOperand(1).getValueType().bitsGE(MVT::i32);
};
SDNode *OR = N->getOperand(0).getNode();
SDValue LHS = OR->getOperand(0);
SDValue RHS = OR->getOperand(1);
// Save nodes matching or(or, setcc(eq, cmp 0)).
SmallVector<SDNode *, 2> ORNodes;
while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) ||
(isORCandidate(RHS) && isSetCCCandidate(LHS)))) {
ORNodes.push_back(OR);
OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode();
LHS = OR->getOperand(0);
RHS = OR->getOperand(1);
}
// The last OR node should match or(setcc(eq, cmp 0), setcc(eq, cmp 0)).
if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) ||
!isORCandidate(SDValue(OR, 0)))
return SDValue();
// We have a or(setcc(eq, cmp 0), setcc(eq, cmp 0)) pattern, try to lower it
// to
// or(srl(ctlz),srl(ctlz)).
// The dag combiner can then fold it into:
// srl(or(ctlz, ctlz)).
EVT VT = OR->getValueType(0);
SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG);
SDValue Ret, NewRHS;
if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG)))
Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS);
if (!Ret)
return SDValue();
// Try to lower nodes matching the or(or, setcc(eq, cmp 0)) pattern.
while (ORNodes.size() > 0) {
OR = ORNodes.pop_back_val();
LHS = OR->getOperand(0);
RHS = OR->getOperand(1);
// Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
if (RHS->getOpcode() == ISD::OR)
std::swap(LHS, RHS);
EVT VT = OR->getValueType(0);
SDValue NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
if (!NewRHS)
return SDValue();
Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
}
if (Ret)
Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
return Ret;
}
static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
return FPLogic;
if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
return R;
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
// SHLD/SHRD instructions have lower register pressure, but on some
// platforms they have higher latency than the equivalent
// series of shifts/or that would otherwise be generated.
// Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions
// have higher latencies and we are not optimizing for size.
if (!OptForSize && Subtarget.isSHLDSlow())
return SDValue();
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
return SDValue();
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
SDValue ShAmt0 = N0.getOperand(1);
if (ShAmt0.getValueType() != MVT::i8)
return SDValue();
SDValue ShAmt1 = N1.getOperand(1);
if (ShAmt1.getValueType() != MVT::i8)
return SDValue();
if (ShAmt0.getOpcode() == ISD::TRUNCATE)
ShAmt0 = ShAmt0.getOperand(0);
if (ShAmt1.getOpcode() == ISD::TRUNCATE)
ShAmt1 = ShAmt1.getOperand(0);
SDLoc DL(N);
unsigned Opc = X86ISD::SHLD;
SDValue Op0 = N0.getOperand(0);
SDValue Op1 = N1.getOperand(0);
if (ShAmt0.getOpcode() == ISD::SUB ||
ShAmt0.getOpcode() == ISD::XOR) {
Opc = X86ISD::SHRD;
std::swap(Op0, Op1);
std::swap(ShAmt0, ShAmt1);
}
// OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C )
// OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C )
// OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C )
// OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C )
unsigned Bits = VT.getSizeInBits();
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE)
ShAmt1Op1 = ShAmt1Op1.getOperand(0);
if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
return DAG.getNode(Opc, DL, VT,
Op0, Op1,
DAG.getNode(ISD::TRUNCATE, DL,
MVT::i8, ShAmt0));
}
} else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
return DAG.getNode(Opc, DL, VT,
N0.getOperand(0), N1.getOperand(0),
DAG.getNode(ISD::TRUNCATE, DL,
MVT::i8, ShAmt0));
} else if (ShAmt1.getOpcode() == ISD::XOR) {
SDValue Mask = ShAmt1.getOperand(1);
if (ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL);
SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
ShAmt1Op0 = ShAmt1Op0.getOperand(0);
if (MaskC->getSExtValue() == (Bits - 1) && ShAmt1Op0 == ShAmt0) {
if (Op1.getOpcode() == InnerShift &&
isa<ConstantSDNode>(Op1.getOperand(1)) &&
Op1.getConstantOperandVal(1) == 1) {
return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
}
// Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ).
if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
Op1.getOperand(0) == Op1.getOperand(1)) {
return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
}
}
}
}
return SDValue();
}
/// Generate NEG and CMOV for integer abs.
static SDValue combineIntegerAbs(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
// Since X86 does not have CMOV for 8-bit integer, we don't convert
// 8-bit integer abs to NEG and CMOV.
if (VT.isInteger() && VT.getSizeInBits() == 8)
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
// Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
// and change it to SUB and CMOV.
if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) {
auto *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (Y1C && Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
// Generate SUB & CMOV.
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
DAG.getConstant(0, DL, VT), N0.getOperand(0));
SDValue Ops[] = {N0.getOperand(0), Neg,
DAG.getConstant(X86::COND_GE, DL, MVT::i8),
SDValue(Neg.getNode(), 1)};
return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops);
}
}
return SDValue();
}
/// Try to turn tests against the signbit in the form of:
/// XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
/// into:
/// SETGT(X, -1)
static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) {
// This is only worth doing if the output type is i8 or i1.
EVT ResultType = N->getValueType(0);
if (ResultType != MVT::i8 && ResultType != MVT::i1)
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// We should be performing an xor against a truncated shift.
if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse())
return SDValue();
// Make sure we are performing an xor against one.
if (!isOneConstant(N1))
return SDValue();
// SetCC on x86 zero extends so only act on this if it's a logical shift.
SDValue Shift = N0.getOperand(0);
if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse())
return SDValue();
// Make sure we are truncating from one of i16, i32 or i64.
EVT ShiftTy = Shift.getValueType();
if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64)
return SDValue();
// Make sure the shift amount extracts the sign bit.
if (!isa<ConstantSDNode>(Shift.getOperand(1)) ||
Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1)
return SDValue();
// Create a greater-than comparison against -1.
// N.B. Using SETGE against 0 works but we want a canonical looking
// comparison, using SETGT matches up with what TranslateX86CC.
SDLoc DL(N);
SDValue ShiftOp = Shift.getOperand(0);
EVT ShiftOpTy = ShiftOp.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), ResultType);
SDValue Cond = DAG.getSetCC(DL, SetCCResultType, ShiftOp,
DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT);
if (SetCCResultType != ResultType)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, ResultType, Cond);
return Cond;
}
/// Turn vector tests of the signbit in the form of:
/// xor (sra X, elt_size(X)-1), -1
/// into:
/// pcmpgt X, -1
///
/// This should be called before type legalization because the pattern may not
/// persist after that.
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.isSimple())
return SDValue();
switch (VT.getSimpleVT().SimpleTy) {
default: return SDValue();
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32: if (!Subtarget.hasSSE2()) return SDValue(); break;
case MVT::v2i64: if (!Subtarget.hasSSE42()) return SDValue(); break;
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
case MVT::v4i64: if (!Subtarget.hasAVX2()) return SDValue(); break;
}
// There must be a shift right algebraic before the xor, and the xor must be a
// 'not' operation.
SDValue Shift = N->getOperand(0);
SDValue Ones = N->getOperand(1);
if (Shift.getOpcode() != ISD::SRA || !Shift.hasOneUse() ||
!ISD::isBuildVectorAllOnes(Ones.getNode()))
return SDValue();
// The shift should be smearing the sign bit across each vector element.
auto *ShiftBV = dyn_cast<BuildVectorSDNode>(Shift.getOperand(1));
if (!ShiftBV)
return SDValue();
EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
auto *ShiftAmt = ShiftBV->getConstantSplatNode();
if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
return SDValue();
// Create a greater-than comparison against -1. We don't use the more obvious
// greater-than-or-equal-to-zero because SSE/AVX don't have that instruction.
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
/// Check if truncation with saturation form type \p SrcVT to \p DstVT
/// is valid for the given \p Subtarget.
static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasAVX512())
return false;
// FIXME: Scalar type may be supported if we move it to vector register.
if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
return false;
EVT SrcElVT = SrcVT.getScalarType();
EVT DstElVT = DstVT.getScalarType();
if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
return false;
if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
return false;
if (SrcVT.is512BitVector() || Subtarget.hasVLX())
return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
return false;
}
/// Detect a pattern of truncation with saturation:
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
/// Return the source value to be truncated or SDValue() if the pattern was not
/// matched.
static SDValue detectUSatPattern(SDValue In, EVT VT) {
if (In.getOpcode() != ISD::UMIN)
return SDValue();
//Saturation with truncation. We truncate from InVT to VT.
assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
"Unexpected types for truncate operation");
APInt C;
- if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
+ if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C,
+ /*AllowShrink*/false)) {
// C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
// the element size of the destination type.
return C.isMask(VT.getScalarSizeInBits()) ? In.getOperand(0) :
SDValue();
}
return SDValue();
}
/// Detect a pattern of truncation with saturation:
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
/// The types should allow to use VPMOVUS* instruction on AVX512.
/// Return the source value to be truncated or SDValue() if the pattern was not
/// matched.
static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
const X86Subtarget &Subtarget) {
if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
return SDValue();
return detectUSatPattern(In, VT);
}
static SDValue
combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT))
return SDValue();
if (auto USatVal = detectUSatPattern(In, VT))
if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
return SDValue();
}
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.
static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
const SDLoc &DL) {
if (!VT.isVector() || !VT.isSimple())
return SDValue();
EVT InVT = In.getValueType();
unsigned NumElems = VT.getVectorNumElements();
EVT ScalarVT = VT.getVectorElementType();
if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
isPowerOf2_32(NumElems)))
return SDValue();
// InScalarVT is the intermediate type in AVG pattern and it should be greater
// than the original input type (i8/i16).
EVT InScalarVT = InVT.getVectorElementType();
if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
return SDValue();
if (!Subtarget.hasSSE2())
return SDValue();
if (Subtarget.hasBWI()) {
if (VT.getSizeInBits() > 512)
return SDValue();
} else if (Subtarget.hasAVX2()) {
if (VT.getSizeInBits() > 256)
return SDValue();
} else {
if (VT.getSizeInBits() > 128)
return SDValue();
}
// Detect the following pattern:
//
// %1 = zext <N x i8> %a to <N x i32>
// %2 = zext <N x i8> %b to <N x i32>
// %3 = add nuw nsw <N x i32> %1, <i32 1 x N>
// %4 = add nuw nsw <N x i32> %3, %2
// %5 = lshr <N x i32> %N, <i32 1 x N>
// %6 = trunc <N x i32> %5 to <N x i8>
//
// In AVX512, the last instruction can also be a trunc store.
if (In.getOpcode() != ISD::SRL)
return SDValue();
// A lambda checking the given SDValue is a constant vector and each element
// is in the range [Min, Max].
auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) {
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V);
if (!BV || !BV->isConstant())
return false;
for (SDValue Op : V->ops()) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
if (!C)
return false;
uint64_t Val = C->getZExtValue();
if (Val < Min || Val > Max)
return false;
}
return true;
};
// Check if each element of the vector is left-shifted by one.
auto LHS = In.getOperand(0);
auto RHS = In.getOperand(1);
if (!IsConstVectorInRange(RHS, 1, 1))
return SDValue();
if (LHS.getOpcode() != ISD::ADD)
return SDValue();
// Detect a pattern of a + b + 1 where the order doesn't matter.
SDValue Operands[3];
Operands[0] = LHS.getOperand(0);
Operands[1] = LHS.getOperand(1);
// Take care of the case when one of the operands is a constant vector whose
// element is in the range [1, 256].
if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
Operands[0].getOpcode() == ISD::ZERO_EXTEND &&
Operands[0].getOperand(0).getValueType() == VT) {
// The pattern is detected. Subtract one from the constant vector, then
// demote it and emit X86ISD::AVG instruction.
SDValue VecOnes = DAG.getConstant(1, DL, InVT);
Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes);
Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
Operands[1]);
}
if (Operands[0].getOpcode() == ISD::ADD)
std::swap(Operands[0], Operands[1]);
else if (Operands[1].getOpcode() != ISD::ADD)
return SDValue();
Operands[2] = Operands[1].getOperand(0);
Operands[1] = Operands[1].getOperand(1);
// Now we have three operands of two additions. Check that one of them is a
// constant vector with ones, and the other two are promoted from i8/i16.
for (int i = 0; i < 3; ++i) {
if (!IsConstVectorInRange(Operands[i], 1, 1))
continue;
std::swap(Operands[i], Operands[2]);
// Check if Operands[0] and Operands[1] are results of type promotion.
for (int j = 0; j < 2; ++j)
if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
Operands[j].getOperand(0).getValueType() != VT)
return SDValue();
// The pattern is detected, emit X86ISD::AVG instruction.
return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
Operands[1].getOperand(0));
}
return SDValue();
}
static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
LoadSDNode *Ld = cast<LoadSDNode>(N);
EVT RegVT = Ld->getValueType(0);
EVT MemVT = Ld->getMemoryVT();
SDLoc dl(Ld);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// For chips with slow 32-byte unaligned loads, break the 32-byte operation
// into two 16-byte operations. Also split non-temporal aligned loads on
// pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
ISD::LoadExtType Ext = Ld->getExtensionType();
bool Fast;
unsigned AddressSpace = Ld->getAddressSpace();
unsigned Alignment = Ld->getAlignment();
if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
Ext == ISD::NON_EXTLOAD &&
((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) ||
(TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
AddressSpace, Alignment, &Fast) && !Fast))) {
unsigned NumElems = RegVT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
SDValue Ptr = Ld->getBasePtr();
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
NumElems/2);
SDValue Load1 =
DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
Alignment, Ld->getMemOperand()->getFlags());
Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl);
SDValue Load2 =
DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
std::min(16U, Alignment), Ld->getMemOperand()->getFlags());
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Load1.getValue(1),
Load2.getValue(1));
SDValue NewVec = DAG.getUNDEF(RegVT);
NewVec = insert128BitVector(NewVec, Load1, 0, DAG, dl);
NewVec = insert128BitVector(NewVec, Load2, NumElems / 2, DAG, dl);
return DCI.CombineTo(N, NewVec, TF, true);
}
return SDValue();
}
/// If V is a build vector of boolean constants and exactly one of those
/// constants is true, return the operand index of that true element.
/// Otherwise, return -1.
static int getOneTrueElt(SDValue V) {
// This needs to be a build vector of booleans.
// TODO: Checking for the i1 type matches the IR definition for the mask,
// but the mask check could be loosened to i8 or other types. That might
// also require checking more than 'allOnesValue'; eg, the x86 HW
// instructions only require that the MSB is set for each mask element.
// The ISD::MSTORE comments/definition do not specify how the mask operand
// is formatted.
auto *BV = dyn_cast<BuildVectorSDNode>(V);
if (!BV || BV->getValueType(0).getVectorElementType() != MVT::i1)
return -1;
int TrueIndex = -1;
unsigned NumElts = BV->getValueType(0).getVectorNumElements();
for (unsigned i = 0; i < NumElts; ++i) {
const SDValue &Op = BV->getOperand(i);
if (Op.isUndef())
continue;
auto *ConstNode = dyn_cast<ConstantSDNode>(Op);
if (!ConstNode)
return -1;
if (ConstNode->getAPIntValue().isAllOnesValue()) {
// If we already found a one, this is too many.
if (TrueIndex >= 0)
return -1;
TrueIndex = i;
}
}
return TrueIndex;
}
/// Given a masked memory load/store operation, return true if it has one mask
/// bit set. If it has one mask bit set, then also return the memory address of
/// the scalar element to load/store, the vector index to insert/extract that
/// scalar element, and the alignment for the scalar memory access.
static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
SelectionDAG &DAG, SDValue &Addr,
SDValue &Index, unsigned &Alignment) {
int TrueMaskElt = getOneTrueElt(MaskedOp->getMask());
if (TrueMaskElt < 0)
return false;
// Get the address of the one scalar element that is specified by the mask
// using the appropriate offset from the base pointer.
EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType();
Addr = MaskedOp->getBasePtr();
if (TrueMaskElt != 0) {
unsigned Offset = TrueMaskElt * EltVT.getStoreSize();
Addr = DAG.getMemBasePlusOffset(Addr, Offset, SDLoc(MaskedOp));
}
Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp));
Alignment = MinAlign(MaskedOp->getAlignment(), EltVT.getStoreSize());
return true;
}
/// If exactly one element of the mask is set for a non-extending masked load,
/// it is a scalar load and vector insert.
/// Note: It is expected that the degenerate cases of an all-zeros or all-ones
/// mask have already been optimized in IR, so we don't bother with those here.
static SDValue
reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
// However, some target hooks may need to be added to know when the transform
// is profitable. Endianness would also have to be considered.
SDValue Addr, VecIndex;
unsigned Alignment;
if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment))
return SDValue();
// Load the one scalar element that is specified by the mask using the
// appropriate offset from the base pointer.
SDLoc DL(ML);
EVT VT = ML->getValueType(0);
EVT EltVT = VT.getVectorElementType();
SDValue Load =
DAG.getLoad(EltVT, DL, ML->getChain(), Addr, ML->getPointerInfo(),
Alignment, ML->getMemOperand()->getFlags());
// Insert the loaded element into the appropriate place in the vector.
SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, ML->getSrc0(),
Load, VecIndex);
return DCI.CombineTo(ML, Insert, Load.getValue(1), true);
}
static SDValue
combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode()))
return SDValue();
SDLoc DL(ML);
EVT VT = ML->getValueType(0);
// If we are loading the first and last elements of a vector, it is safe and
// always faster to load the whole vector. Replace the masked load with a
// vector load and select.
unsigned NumElts = VT.getVectorNumElements();
BuildVectorSDNode *MaskBV = cast<BuildVectorSDNode>(ML->getMask());
bool LoadFirstElt = !isNullConstant(MaskBV->getOperand(0));
bool LoadLastElt = !isNullConstant(MaskBV->getOperand(NumElts - 1));
if (LoadFirstElt && LoadLastElt) {
SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
ML->getMemOperand());
SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, ML->getSrc0());
return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);
}
// Convert a masked load with a constant mask into a masked load and a select.
// This allows the select operation to use a faster kind of select instruction
// (for example, vblendvps -> vblendps).
// Don't try this if the pass-through operand is already undefined. That would
// cause an infinite loop because that's what we're about to create.
if (ML->getSrc0().isUndef())
return SDValue();
// The new masked load has an undef pass-through operand. The select uses the
// original pass-through operand.
SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
ML->getMask(), DAG.getUNDEF(VT),
ML->getMemoryVT(), ML->getMemOperand(),
ML->getExtensionType());
SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getSrc0());
return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
}
static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
// TODO: Expanding load with constant mask may be optimized as well.
if (Mld->isExpandingLoad())
return SDValue();
if (Mld->getExtensionType() == ISD::NON_EXTLOAD) {
if (SDValue ScalarLoad = reduceMaskedLoadToScalarLoad(Mld, DAG, DCI))
return ScalarLoad;
// TODO: Do some AVX512 subsets benefit from this transform?
if (!Subtarget.hasAVX512())
if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI))
return Blend;
}
if (Mld->getExtensionType() != ISD::SEXTLOAD)
return SDValue();
// Resolve extending loads.
EVT VT = Mld->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
EVT LdVT = Mld->getMemoryVT();
SDLoc dl(Mld);
assert(LdVT != VT && "Cannot extend to the same type");
unsigned ToSz = VT.getScalarSizeInBits();
unsigned FromSz = LdVT.getScalarSizeInBits();
// From/To sizes and ElemCount must be pow of two.
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for extending masked load");
unsigned SizeRatio = ToSz / FromSz;
assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
// Create a type on which we perform the shuffle.
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
LdVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
// Convert Src0 value.
SDValue WideSrc0 = DAG.getBitcast(WideVecVT, Mld->getSrc0());
if (!Mld->getSrc0().isUndef()) {
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
"WideVecVT should be legal");
WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
DAG.getUNDEF(WideVecVT), ShuffleVec);
}
// Prepare the new mask.
SDValue NewMask;
SDValue Mask = Mld->getMask();
if (Mask.getValueType() == VT) {
// Mask and original value have the same type.
NewMask = DAG.getBitcast(WideVecVT, Mask);
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i)
ShuffleVec[i] = NumElems * SizeRatio;
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
DAG.getConstant(0, dl, WideVecVT),
ShuffleVec);
} else {
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
unsigned WidenNumElts = NumElems*SizeRatio;
unsigned MaskNumElts = VT.getVectorNumElements();
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WidenNumElts);
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
}
SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
Mld->getBasePtr(), NewMask, WideSrc0,
Mld->getMemoryVT(), Mld->getMemOperand(),
ISD::NON_EXTLOAD);
SDValue NewVec = getExtendInVec(X86ISD::VSEXT, dl, VT, WideLd, DAG);
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
}
/// If exactly one element of the mask is set for a non-truncating masked store,
/// it is a vector extract and scalar store.
/// Note: It is expected that the degenerate cases of an all-zeros or all-ones
/// mask have already been optimized in IR, so we don't bother with those here.
static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
SelectionDAG &DAG) {
// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
// However, some target hooks may need to be added to know when the transform
// is profitable. Endianness would also have to be considered.
SDValue Addr, VecIndex;
unsigned Alignment;
if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment))
return SDValue();
// Extract the one scalar element that is actually being stored.
SDLoc DL(MS);
EVT VT = MS->getValue().getValueType();
EVT EltVT = VT.getVectorElementType();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
MS->getValue(), VecIndex);
// Store that element at the appropriate offset from the base pointer.
return DAG.getStore(MS->getChain(), DL, Extract, Addr, MS->getPointerInfo(),
Alignment, MS->getMemOperand()->getFlags());
}
static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
if (Mst->isCompressingStore())
return SDValue();
if (!Mst->isTruncatingStore())
return reduceMaskedStoreToScalarStore(Mst, DAG);
// Resolve truncating stores.
EVT VT = Mst->getValue().getValueType();
unsigned NumElems = VT.getVectorNumElements();
EVT StVT = Mst->getMemoryVT();
SDLoc dl(Mst);
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getScalarSizeInBits();
unsigned ToSz = StVT.getScalarSizeInBits();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// The truncating store is legal in some cases. For example
// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
// are designated for truncate store.
// In this case we don't need any further transformations.
if (TLI.isTruncStoreLegal(VT, StVT))
return SDValue();
// From/To sizes and ElemCount must be pow of two.
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for truncating masked store");
// We are going to use the original vector elt for storing.
// Accumulated smaller vector elements must be a multiple of the store size.
assert (((NumElems * FromSz) % ToSz) == 0 &&
"Unexpected ratio for truncating masked store");
unsigned SizeRatio = FromSz / ToSz;
assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
// Create a type on which we perform the shuffle.
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
StVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue());
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
"WideVecVT should be legal");
SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
DAG.getUNDEF(WideVecVT),
ShuffleVec);
SDValue NewMask;
SDValue Mask = Mst->getMask();
if (Mask.getValueType() == VT) {
// Mask and original value have the same type.
NewMask = DAG.getBitcast(WideVecVT, Mask);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
ShuffleVec[i] = NumElems*SizeRatio;
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
DAG.getConstant(0, dl, WideVecVT),
ShuffleVec);
} else {
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
unsigned WidenNumElts = NumElems*SizeRatio;
unsigned MaskNumElts = VT.getVectorNumElements();
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WidenNumElts);
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
}
return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal,
Mst->getBasePtr(), NewMask, StVT,
Mst->getMemOperand(), false);
}
static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
StoreSDNode *St = cast<StoreSDNode>(N);
EVT VT = St->getValue().getValueType();
EVT StVT = St->getMemoryVT();
SDLoc dl(St);
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we are saving a concatenation of two XMM registers and 32-byte stores
// are slow, such as on Sandy Bridge, perform two 16-byte stores.
bool Fast;
unsigned AddressSpace = St->getAddressSpace();
unsigned Alignment = St->getAlignment();
if (VT.is256BitVector() && StVT == VT &&
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
AddressSpace, Alignment, &Fast) &&
!Fast) {
unsigned NumElems = VT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, dl);
SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, dl);
SDValue Ptr0 = St->getBasePtr();
SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, dl);
SDValue Ch0 =
DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(),
Alignment, St->getMemOperand()->getFlags());
SDValue Ch1 =
DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(),
std::min(16U, Alignment), St->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
// Optimize trunc store (of multiple scalars) to shuffle and store.
// First, pack all of the elements in one place. Next, store to memory
// in fewer chunks.
if (St->isTruncatingStore() && VT.isVector()) {
// Check if we can detect an AVG pattern from the truncation. If yes,
// replace the trunc store by a normal store with the result of X86ISD::AVG
// instruction.
if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG,
Subtarget, dl))
return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
if (SDValue Val =
detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
dl, Val, St->getBasePtr(),
St->getMemoryVT(), St->getMemOperand(), DAG);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getScalarSizeInBits();
unsigned ToSz = StVT.getScalarSizeInBits();
// The truncating store is legal in some cases. For example
// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
// are designated for truncate store.
// In this case we don't need any further transformations.
if (TLI.isTruncStoreLegalOrCustom(VT, StVT))
return SDValue();
// From, To sizes and ElemCount must be pow of two
if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
// We are going to use the original vector elt for storing.
// Accumulated smaller vector elements must be a multiple of the store size.
if (0 != (NumElems * FromSz) % ToSz) return SDValue();
unsigned SizeRatio = FromSz / ToSz;
assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
// Create a type on which we perform the shuffle
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
StVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue());
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
if (!TLI.isTypeLegal(WideVecVT))
return SDValue();
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
DAG.getUNDEF(WideVecVT),
ShuffleVec);
// At this point all of the data is stored at the bottom of the
// register. We now need to save it to mem.
// Find the largest store unit
MVT StoreType = MVT::i8;
for (MVT Tp : MVT::integer_valuetypes()) {
if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz)
StoreType = Tp;
}
// On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 &&
(64 <= NumElems * ToSz))
StoreType = MVT::f64;
// Bitcast the original vector into a vector of store-size units
EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
SDValue Ptr = St->getBasePtr();
// Perform one or more big stores into memory.
for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
StoreType, ShuffWide,
DAG.getIntPtrConstant(i, dl));
SDValue Ch =
DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(),
St->getAlignment(), St->getMemOperand()->getFlags());
Ptr = DAG.getMemBasePlusOffset(Ptr, StoreType.getStoreSize(), dl);
Chains.push_back(Ch);
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
// places to insert EMMS. This qualifies as a quick hack.
// Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
if (VT.getSizeInBits() != 64)
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat);
bool F64IsLegal =
!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
if ((VT.isVector() ||
(VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) &&
isa<LoadSDNode>(St->getValue()) &&
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
St->getChain().hasOneUse() && !St->isVolatile()) {
SDNode* LdVal = St->getValue().getNode();
LoadSDNode *Ld = nullptr;
int TokenFactorIndex = -1;
SmallVector<SDValue, 8> Ops;
SDNode* ChainVal = St->getChain().getNode();
// Must be a store of a load. We currently handle two cases: the load
// is a direct child, and it's under an intervening TokenFactor. It is
// possible to dig deeper under nested TokenFactors.
if (ChainVal == LdVal)
Ld = cast<LoadSDNode>(St->getChain());
else if (St->getValue().hasOneUse() &&
ChainVal->getOpcode() == ISD::TokenFactor) {
for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
if (ChainVal->getOperand(i).getNode() == LdVal) {
TokenFactorIndex = i;
Ld = cast<LoadSDNode>(St->getValue());
} else
Ops.push_back(ChainVal->getOperand(i));
}
}
if (!Ld || !ISD::isNormalLoad(Ld))
return SDValue();
// If this is not the MMX case, i.e. we are just turning i64 load/store
// into f64 load/store, avoid the transformation if there are multiple
// uses of the loaded value.
if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
return SDValue();
SDLoc LdDL(Ld);
SDLoc StDL(N);
// If we are a 64-bit capable x86, lower to a single movq load/store pair.
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
if (Subtarget.is64Bit() || F64IsLegal) {
MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
// Make sure new load is placed in same chain order.
SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
if (TokenFactorIndex >= 0) {
Ops.push_back(NewChain);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
}
return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
}
// Otherwise, lower to two pairs of 32-bit loads / stores.
SDValue LoAddr = Ld->getBasePtr();
SDValue HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, LdDL);
SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
Ld->getPointerInfo(), Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
Ld->getPointerInfo().getWithOffset(4),
MinAlign(Ld->getAlignment(), 4),
Ld->getMemOperand()->getFlags());
// Make sure new loads are placed in same chain order.
SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd);
NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd);
if (TokenFactorIndex >= 0) {
Ops.push_back(NewChain);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
}
LoAddr = St->getBasePtr();
HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, StDL);
SDValue LoSt =
DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getPointerInfo(),
St->getAlignment(), St->getMemOperand()->getFlags());
SDValue HiSt = DAG.getStore(
NewChain, StDL, HiLd, HiAddr, St->getPointerInfo().getWithOffset(4),
MinAlign(St->getAlignment(), 4), St->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
}
// This is similar to the above case, but here we handle a scalar 64-bit
// integer store that is extracted from a vector on a 32-bit target.
// If we have SSE2, then we can treat it like a floating-point double
// to get past legalization. The execution dependencies fixup pass will
// choose the optimal machine instruction for the store if this really is
// an integer or v2f32 rather than an f64.
if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() &&
St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
SDValue OldExtract = St->getOperand(1);
SDValue ExtOp0 = OldExtract.getOperand(0);
unsigned VecSize = ExtOp0.getValueSizeInBits();
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64);
SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0);
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
BitCast, OldExtract.getOperand(1));
return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
}
return SDValue();
}
/// Return 'true' if this vector operation is "horizontal"
/// and return the operands for the horizontal operation in LHS and RHS. A
/// horizontal operation performs the binary operation on successive elements
/// of its first operand, then on successive elements of its second operand,
/// returning the resulting values in a vector. For example, if
/// A = < float a0, float a1, float a2, float a3 >
/// and
/// B = < float b0, float b1, float b2, float b3 >
/// then the result of doing a horizontal operation on A and B is
/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
/// A horizontal-op B, for some already available A and B, and if so then LHS is
/// set to A, RHS to B, and the routine returns 'true'.
/// Note that the binary operation should have the property that if one of the
/// operands is UNDEF then the result is UNDEF.
static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
// Look for the following pattern: if
// A = < float a0, float a1, float a2, float a3 >
// B = < float b0, float b1, float b2, float b3 >
// and
// LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6>
// RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7>
// then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
// which is A horizontal-op B.
// At least one of the operands should be a vector shuffle.
if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
return false;
MVT VT = LHS.getSimpleValueType();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for horizontal add/sub");
// Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
// operate independently on 128-bit lanes.
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts / NumLanes;
assert((NumLaneElts % 2 == 0) &&
"Vector type should have an even number of elements in each lane");
unsigned HalfLaneElts = NumLaneElts/2;
// View LHS in the form
// LHS = VECTOR_SHUFFLE A, B, LMask
// If LHS is not a shuffle then pretend it is the shuffle
// LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
// NOTE: in what follows a default initialized SDValue represents an UNDEF of
// type VT.
SDValue A, B;
SmallVector<int, 16> LMask(NumElts);
if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (!LHS.getOperand(0).isUndef())
A = LHS.getOperand(0);
if (!LHS.getOperand(1).isUndef())
B = LHS.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
std::copy(Mask.begin(), Mask.end(), LMask.begin());
} else {
if (!LHS.isUndef())
A = LHS;
for (unsigned i = 0; i != NumElts; ++i)
LMask[i] = i;
}
// Likewise, view RHS in the form
// RHS = VECTOR_SHUFFLE C, D, RMask
SDValue C, D;
SmallVector<int, 16> RMask(NumElts);
if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (!RHS.getOperand(0).isUndef())
C = RHS.getOperand(0);
if (!RHS.getOperand(1).isUndef())
D = RHS.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
std::copy(Mask.begin(), Mask.end(), RMask.begin());
} else {
if (!RHS.isUndef())
C = RHS;
for (unsigned i = 0; i != NumElts; ++i)
RMask[i] = i;
}
// Check that the shuffles are both shuffling the same vectors.
if (!(A == C && B == D) && !(A == D && B == C))
return false;
// If everything is UNDEF then bail out: it would be better to fold to UNDEF.
if (!A.getNode() && !B.getNode())
return false;
// If A and B occur in reverse order in RHS, then "swap" them (which means
// rewriting the mask).
if (A != C)
ShuffleVectorSDNode::commuteMask(RMask);
// At this point LHS and RHS are equivalent to
// LHS = VECTOR_SHUFFLE A, B, LMask
// RHS = VECTOR_SHUFFLE A, B, RMask
// Check that the masks correspond to performing a horizontal operation.
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
int LIdx = LMask[i+l], RIdx = RMask[i+l];
// Ignore any UNDEF components.
if (LIdx < 0 || RIdx < 0 ||
(!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
(!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
continue;
// Check that successive elements are being operated on. If not, this is
// not a horizontal operation.
unsigned Src = (i/HalfLaneElts); // each lane is split between srcs
int Index = 2*(i%HalfLaneElts) + NumElts*Src + l;
if (!(LIdx == Index && RIdx == Index + 1) &&
!(IsCommutative && LIdx == Index + 1 && RIdx == Index))
return false;
}
}
LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
return true;
}
/// Do target-specific dag combines on floating-point adds/subs.
static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
bool IsFadd = N->getOpcode() == ISD::FADD;
assert((IsFadd || N->getOpcode() == ISD::FSUB) && "Wrong opcode");
// Try to synthesize horizontal add/sub from adds/subs of shuffles.
if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget.hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, IsFadd)) {
auto NewOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB;
return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
}
return SDValue();
}
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
/// the codegen.
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SDLoc &DL) {
assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
SDValue Src = N->getOperand(0);
unsigned Opcode = Src.getOpcode();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = N->getValueType(0);
EVT SrcVT = Src.getValueType();
auto IsRepeatedOpOrFreeTruncation = [VT](SDValue Op0, SDValue Op1) {
unsigned TruncSizeInBits = VT.getScalarSizeInBits();
// Repeated operand, so we are only trading one output truncation for
// one input truncation.
if (Op0 == Op1)
return true;
// See if either operand has been extended from a smaller/equal size to
// the truncation size, allowing a truncation to combine with the extend.
unsigned Opcode0 = Op0.getOpcode();
if ((Opcode0 == ISD::ANY_EXTEND || Opcode0 == ISD::SIGN_EXTEND ||
Opcode0 == ISD::ZERO_EXTEND) &&
Op0.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
return true;
unsigned Opcode1 = Op1.getOpcode();
if ((Opcode1 == ISD::ANY_EXTEND || Opcode1 == ISD::SIGN_EXTEND ||
Opcode1 == ISD::ZERO_EXTEND) &&
Op1.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
return true;
// See if either operand is a single use constant which can be constant
// folded.
SDValue BC0 = peekThroughOneUseBitcasts(Op0);
SDValue BC1 = peekThroughOneUseBitcasts(Op1);
return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
};
auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1);
};
// Don't combine if the operation has other uses.
if (!N->isOnlyUserOf(Src.getNode()))
return SDValue();
// Only support vector truncation for now.
// TODO: i64 scalar math would benefit as well.
if (!VT.isVector())
return SDValue();
// In most cases its only worth pre-truncating if we're only facing the cost
// of one truncation.
// i.e. if one of the inputs will constant fold or the input is repeated.
switch (Opcode) {
case ISD::AND:
case ISD::XOR:
case ISD::OR: {
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
IsRepeatedOpOrFreeTruncation(Op0, Op1))
return TruncateArithmetic(Op0, Op1);
break;
}
case ISD::MUL:
// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
// better to truncate if we have the chance.
if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
!TLI.isOperationLegal(Opcode, SrcVT))
return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
LLVM_FALLTHROUGH;
case ISD::ADD: {
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegal(Opcode, VT) &&
IsRepeatedOpOrFreeTruncation(Op0, Op1))
return TruncateArithmetic(Op0, Op1);
break;
}
}
return SDValue();
}
/// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
static SDValue
combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 8> &Regs) {
assert(Regs.size() > 0 && (Regs[0].getValueType() == MVT::v4i32 ||
Regs[0].getValueType() == MVT::v2i64));
EVT OutVT = N->getValueType(0);
EVT OutSVT = OutVT.getVectorElementType();
EVT InVT = Regs[0].getValueType();
EVT InSVT = InVT.getVectorElementType();
SDLoc DL(N);
// First, use mask to unset all bits that won't appear in the result.
assert((OutSVT == MVT::i8 || OutSVT == MVT::i16) &&
"OutSVT can only be either i8 or i16.");
APInt Mask =
APInt::getLowBitsSet(InSVT.getSizeInBits(), OutSVT.getSizeInBits());
SDValue MaskVal = DAG.getConstant(Mask, DL, InVT);
for (auto &Reg : Regs)
Reg = DAG.getNode(ISD::AND, DL, InVT, MaskVal, Reg);
MVT UnpackedVT, PackedVT;
if (OutSVT == MVT::i8) {
UnpackedVT = MVT::v8i16;
PackedVT = MVT::v16i8;
} else {
UnpackedVT = MVT::v4i32;
PackedVT = MVT::v8i16;
}
// In each iteration, truncate the type by a half size.
auto RegNum = Regs.size();
for (unsigned j = 1, e = InSVT.getSizeInBits() / OutSVT.getSizeInBits();
j < e; j *= 2, RegNum /= 2) {
for (unsigned i = 0; i < RegNum; i++)
Regs[i] = DAG.getBitcast(UnpackedVT, Regs[i]);
for (unsigned i = 0; i < RegNum / 2; i++)
Regs[i] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[i * 2],
Regs[i * 2 + 1]);
}
// If the type of the result is v8i8, we need do one more X86ISD::PACKUS, and
// then extract a subvector as the result since v8i8 is not a legal type.
if (OutVT == MVT::v8i8) {
Regs[0] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[0], Regs[0]);
Regs[0] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, Regs[0],
DAG.getIntPtrConstant(0, DL));
return Regs[0];
} else if (RegNum > 1) {
Regs.resize(RegNum);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs);
} else
return Regs[0];
}
/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
static SDValue
combineVectorTruncationWithPACKSS(SDNode *N, const X86Subtarget &Subtarget,
SelectionDAG &DAG,
SmallVector<SDValue, 8> &Regs) {
assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32);
EVT OutVT = N->getValueType(0);
SDLoc DL(N);
// Shift left by 16 bits, then arithmetic-shift right by 16 bits.
SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32);
for (auto &Reg : Regs) {
Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt,
Subtarget, DAG);
Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt,
Subtarget, DAG);
}
for (unsigned i = 0, e = Regs.size() / 2; i < e; i++)
Regs[i] = DAG.getNode(X86ISD::PACKSS, DL, MVT::v8i16, Regs[i * 2],
Regs[i * 2 + 1]);
if (Regs.size() > 2) {
Regs.resize(Regs.size() / 2);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs);
} else
return Regs[0];
}
/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into
/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type
/// legalization the truncation will be translated into a BUILD_VECTOR with each
/// element that is extracted from a vector and then truncated, and it is
/// difficult to do this optimization based on them.
static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT OutVT = N->getValueType(0);
if (!OutVT.isVector())
return SDValue();
SDValue In = N->getOperand(0);
if (!In.getValueType().isSimple())
return SDValue();
EVT InVT = In.getValueType();
unsigned NumElems = OutVT.getVectorNumElements();
// TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on
// SSE2, and we need to take care of it specially.
// AVX512 provides vpmovdb.
if (!Subtarget.hasSSE2() || Subtarget.hasAVX2())
return SDValue();
EVT OutSVT = OutVT.getVectorElementType();
EVT InSVT = InVT.getVectorElementType();
if (!((InSVT == MVT::i32 || InSVT == MVT::i64) &&
(OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
NumElems >= 8))
return SDValue();
// SSSE3's pshufb results in less instructions in the cases below.
if (Subtarget.hasSSSE3() && NumElems == 8 &&
((OutSVT == MVT::i8 && InSVT != MVT::i64) ||
(InSVT == MVT::i32 && OutSVT == MVT::i16)))
return SDValue();
SDLoc DL(N);
// Split a long vector into vectors of legal type.
unsigned RegNum = InVT.getSizeInBits() / 128;
SmallVector<SDValue, 8> SubVec(RegNum);
unsigned NumSubRegElts = 128 / InSVT.getSizeInBits();
EVT SubRegVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubRegElts);
for (unsigned i = 0; i < RegNum; i++)
SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubRegVT, In,
DAG.getIntPtrConstant(i * NumSubRegElts, DL));
// SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
// for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
// truncate 2 x v4i32 to v8i16.
if (Subtarget.hasSSE41() || OutSVT == MVT::i8)
return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
else if (InSVT == MVT::i32)
return combineVectorTruncationWithPACKSS(N, Subtarget, DAG, SubVec);
else
return SDValue();
}
/// This function transforms vector truncation of 'all or none' bits values.
/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS operations.
static SDValue combineVectorSignBitsTruncation(SDNode *N, SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// Requires SSE2 but AVX512 has fast truncate.
if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
return SDValue();
if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple())
return SDValue();
SDValue In = N->getOperand(0);
if (!In.getValueType().isSimple())
return SDValue();
MVT VT = N->getValueType(0).getSimpleVT();
MVT SVT = VT.getScalarType();
MVT InVT = In.getValueType().getSimpleVT();
MVT InSVT = InVT.getScalarType();
// Use PACKSS if the input is a splatted sign bit.
// e.g. Comparison result, sext_in_reg, etc.
unsigned NumSignBits = DAG.ComputeNumSignBits(In);
if (NumSignBits != InSVT.getSizeInBits())
return SDValue();
// Check we have a truncation suited for PACKSS.
if (!VT.is128BitVector() && !VT.is256BitVector())
return SDValue();
if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32)
return SDValue();
if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64)
return SDValue();
return truncateVectorCompareWithPACKSS(VT, In, DL, DAG, Subtarget);
}
static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
SDLoc DL(N);
// Attempt to pre-truncate inputs to arithmetic ops instead.
if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
return V;
// Try to detect AVG pattern first.
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
// Try to combine truncation with unsigned saturation.
if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
return Val;
// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
SDValue BCSrc = Src.getOperand(0);
if (BCSrc.getValueType() == MVT::x86mmx)
return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
}
// Try to truncate extended sign bits with PACKSS.
if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget))
return V;
return combineVectorTruncation(N, DAG, Subtarget);
}
/// Returns the negated value if the node \p N flips sign of FP value.
///
/// FP-negation node may have different forms: FNEG(x) or FXOR (x, 0x80000000).
/// AVX512F does not have FXOR, so FNEG is lowered as
/// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).
/// In this case we go though all bitcasts.
static SDValue isFNEG(SDNode *N) {
if (N->getOpcode() == ISD::FNEG)
return N->getOperand(0);
SDValue Op = peekThroughBitcasts(SDValue(N, 0));
if (Op.getOpcode() != X86ISD::FXOR && Op.getOpcode() != ISD::XOR)
return SDValue();
SDValue Op1 = peekThroughBitcasts(Op.getOperand(1));
if (!Op1.getValueType().isFloatingPoint())
return SDValue();
SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
unsigned EltBits = Op1.getScalarValueSizeInBits();
auto isSignMask = [&](const ConstantFP *C) {
return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits);
};
// There is more than one way to represent the same constant on
// the different X86 targets. The type of the node may also depend on size.
// - load scalar value and broadcast
// - BUILD_VECTOR node
// - load from a constant pool.
// We check all variants here.
if (Op1.getOpcode() == X86ISD::VBROADCAST) {
if (auto *C = getTargetConstantFromNode(Op1.getOperand(0)))
if (isSignMask(cast<ConstantFP>(C)))
return Op0;
} else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) {
if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode())
if (isSignMask(CN->getConstantFPValue()))
return Op0;
} else if (auto *C = getTargetConstantFromNode(Op1)) {
if (C->getType()->isVectorTy()) {
if (auto *SplatV = C->getSplatValue())
if (isSignMask(cast<ConstantFP>(SplatV)))
return Op0;
} else if (auto *FPConst = dyn_cast<ConstantFP>(C))
if (isSignMask(FPConst))
return Op0;
}
return SDValue();
}
/// Do target-specific dag combines on floating point negations.
static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT OrigVT = N->getValueType(0);
SDValue Arg = isFNEG(N);
assert(Arg.getNode() && "N is expected to be an FNEG node");
EVT VT = Arg.getValueType();
EVT SVT = VT.getScalarType();
SDLoc DL(N);
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
// If we're negating a FMUL node on a target with FMA, then we can avoid the
// use of a constant by performing (-0 - A*B) instead.
// FIXME: Check rounding control flags as well once it becomes available.
if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) &&
Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) {
SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
Arg.getOperand(1), Zero);
return DAG.getBitcast(OrigVT, NewNode);
}
// If we're negating an FMA node, then we can adjust the
// instruction to include the extra negation.
unsigned NewOpcode = 0;
if (Arg.hasOneUse()) {
switch (Arg.getOpcode()) {
case X86ISD::FMADD: NewOpcode = X86ISD::FNMSUB; break;
case X86ISD::FMSUB: NewOpcode = X86ISD::FNMADD; break;
case X86ISD::FNMADD: NewOpcode = X86ISD::FMSUB; break;
case X86ISD::FNMSUB: NewOpcode = X86ISD::FMADD; break;
case X86ISD::FMADD_RND: NewOpcode = X86ISD::FNMSUB_RND; break;
case X86ISD::FMSUB_RND: NewOpcode = X86ISD::FNMADD_RND; break;
case X86ISD::FNMADD_RND: NewOpcode = X86ISD::FMSUB_RND; break;
case X86ISD::FNMSUB_RND: NewOpcode = X86ISD::FMADD_RND; break;
// We can't handle scalar intrinsic node here because it would only
// invert one element and not the whole vector. But we could try to handle
// a negation of the lower element only.
}
}
if (NewOpcode)
return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT,
Arg.getNode()->ops()));
return SDValue();
}
static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
// If we have integer vector types available, use the integer opcodes.
if (VT.isVector() && Subtarget.hasSSE2()) {
SDLoc dl(N);
MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
unsigned IntOpcode;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected FP logic op");
case X86ISD::FOR: IntOpcode = ISD::OR; break;
case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
case X86ISD::FAND: IntOpcode = ISD::AND; break;
case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
}
SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
return DAG.getBitcast(VT, IntOp);
}
return SDValue();
}
static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
return Cmp;
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
return RV;
if (Subtarget.hasCMov())
if (SDValue RV = combineIntegerAbs(N, DAG))
return RV;
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
return FPLogic;
if (isFNEG(N))
return combineFneg(N, DAG, Subtarget);
return SDValue();
}
static bool isNullFPScalarOrVectorConst(SDValue V) {
return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode());
}
/// If a value is a scalar FP zero or a vector FP zero (potentially including
/// undefined elements), return a zero constant that may be used to fold away
/// that value. In the case of a vector, the returned constant will not contain
/// undefined elements even if the input parameter does. This makes it suitable
/// to be used as a replacement operand with operations (eg, bitwise-and) where
/// an undef should not propagate.
static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (!isNullFPScalarOrVectorConst(V))
return SDValue();
if (V.getValueType().isVector())
return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V));
return V;
}
static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
// Vector types are handled in combineANDXORWithAllOnesIntoANDNP().
if (!((VT == MVT::f32 && Subtarget.hasSSE1()) ||
(VT == MVT::f64 && Subtarget.hasSSE2())))
return SDValue();
auto isAllOnesConstantFP = [](SDValue V) {
auto *C = dyn_cast<ConstantFPSDNode>(V);
return C && C->getConstantFPValue()->isAllOnesValue();
};
// fand (fxor X, -1), Y --> fandn X, Y
if (N0.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N0.getOperand(1)))
return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1);
// fand X, (fxor Y, -1) --> fandn Y, X
if (N1.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N1.getOperand(1)))
return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0);
return SDValue();
}
/// Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// FAND(0.0, x) -> 0.0
if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget))
return V;
// FAND(x, 0.0) -> 0.0
if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
return V;
if (SDValue V = combineFAndFNotToFAndn(N, DAG, Subtarget))
return V;
return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FANDN nodes.
static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// FANDN(0.0, x) -> x
if (isNullFPScalarOrVectorConst(N->getOperand(0)))
return N->getOperand(1);
// FANDN(x, 0.0) -> 0.0
if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
return V;
return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
// F[X]OR(0.0, x) -> x
if (isNullFPScalarOrVectorConst(N->getOperand(0)))
return N->getOperand(1);
// F[X]OR(x, 0.0) -> x
if (isNullFPScalarOrVectorConst(N->getOperand(1)))
return N->getOperand(0);
if (isFNEG(N))
if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
return NewVal;
return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
// Only perform optimizations if UnsafeMath is used.
if (!DAG.getTarget().Options.UnsafeFPMath)
return SDValue();
// If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
// into FMINC and FMAXC, which are Commutative operations.
unsigned NewOp = 0;
switch (N->getOpcode()) {
default: llvm_unreachable("unknown opcode");
case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
}
return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),
N->getOperand(0), N->getOperand(1));
}
static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (Subtarget.useSoftFloat())
return SDValue();
// TODO: Check for global or instruction-level "nnan". In that case, we
// should be able to lower to FMAX/FMIN alone.
// TODO: If an operand is already known to be a NaN or not a NaN, this
// should be an optional swap and FMAX/FMIN.
EVT VT = N->getValueType(0);
if (!((Subtarget.hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
(Subtarget.hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
(Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))))
return SDValue();
// This takes at least 3 instructions, so favor a library call when operating
// on a scalar and minimizing code size.
if (!VT.isVector() && DAG.getMachineFunction().getFunction()->optForMinSize())
return SDValue();
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDLoc DL(N);
EVT SetCCType = DAG.getTargetLoweringInfo().getSetCCResultType(
DAG.getDataLayout(), *DAG.getContext(), VT);
// There are 4 possibilities involving NaN inputs, and these are the required
// outputs:
// Op1
// Num NaN
// ----------------
// Num | Max | Op0 |
// Op0 ----------------
// NaN | Op1 | NaN |
// ----------------
//
// The SSE FP max/min instructions were not designed for this case, but rather
// to implement:
// Min = Op1 < Op0 ? Op1 : Op0
// Max = Op1 > Op0 ? Op1 : Op0
//
// So they always return Op0 if either input is a NaN. However, we can still
// use those instructions for fmaxnum by selecting away a NaN input.
// If either operand is NaN, the 2nd source operand (Op0) is passed through.
auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN;
SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0);
SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType , Op0, Op0, ISD::SETUO);
// If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands
// are NaN, the NaN value of Op1 is the result.
return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax);
}
/// Do target-specific dag combines on X86ISD::ANDNP nodes.
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
// ANDNP(0, x) -> x
if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
return N->getOperand(1);
// ANDNP(x, 0) -> 0
if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N));
EVT VT = N->getValueType(0);
// Attempt to recursively combine a bitmask ANDNP with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget))
return SDValue(); // This routine will use CombineTo to replace N.
}
return SDValue();
}
static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
// BT ignores high bits in the bit index operand.
SDValue Op1 = N->getOperand(1);
if (Op1.hasOneUse()) {
unsigned BitWidth = Op1.getValueSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
KnownBits Known;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) ||
TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO))
DCI.CommitTargetLoweringOpt(TLO);
}
return SDValue();
}
static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.isVector())
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
SDLoc dl(N);
// The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
// both SSE and AVX2 since there is no sign-extended shift right
// operation on a vector with 64-bit elements.
//(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
// (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND)) {
SDValue N00 = N0.getOperand(0);
// EXTLOAD has a better solution on AVX2,
// it may be replaced with X86ISD::VSEXT node.
if (N00.getOpcode() == ISD::LOAD && Subtarget.hasInt256())
if (!ISD::isNormalLoad(N00.getNode()))
return SDValue();
if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
N00, N1);
return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
}
}
return SDValue();
}
/// sext(add_nsw(x, C)) --> add(sext(x), C_sext)
/// zext(add_nuw(x, C)) --> add(zext(x), C_zext)
/// Promoting a sign/zero extension ahead of a no overflow 'add' exposes
/// opportunities to combine math ops, use an LEA, or use a complex addressing
/// mode. This can eliminate extend, add, and shift instructions.
static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (Ext->getOpcode() != ISD::SIGN_EXTEND &&
Ext->getOpcode() != ISD::ZERO_EXTEND)
return SDValue();
// TODO: This should be valid for other integer types.
EVT VT = Ext->getValueType(0);
if (VT != MVT::i64)
return SDValue();
SDValue Add = Ext->getOperand(0);
if (Add.getOpcode() != ISD::ADD)
return SDValue();
bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND;
bool NSW = Add->getFlags().hasNoSignedWrap();
bool NUW = Add->getFlags().hasNoUnsignedWrap();
// We need an 'add nsw' feeding into the 'sext' or 'add nuw' feeding
// into the 'zext'
if ((Sext && !NSW) || (!Sext && !NUW))
return SDValue();
// Having a constant operand to the 'add' ensures that we are not increasing
// the instruction count because the constant is extended for free below.
// A constant operand can also become the displacement field of an LEA.
auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
if (!AddOp1)
return SDValue();
// Don't make the 'add' bigger if there's no hope of combining it with some
// other 'add' or 'shl' instruction.
// TODO: It may be profitable to generate simpler LEA instructions in place
// of single 'add' instructions, but the cost model for selecting an LEA
// currently has a high threshold.
bool HasLEAPotential = false;
for (auto *User : Ext->uses()) {
if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) {
HasLEAPotential = true;
break;
}
}
if (!HasLEAPotential)
return SDValue();
// Everything looks good, so pull the '{s|z}ext' ahead of the 'add'.
int64_t AddConstant = Sext ? AddOp1->getSExtValue() : AddOp1->getZExtValue();
SDValue AddOp0 = Add.getOperand(0);
SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0);
SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT);
// The wider add is guaranteed to not wrap because both operands are
// sign-extended.
SDNodeFlags Flags;
Flags.setNoSignedWrap(NSW);
Flags.setNoUnsignedWrap(NUW);
return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags);
}
/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y)
/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly
/// extends from AH (which we otherwise need to do contortions to access).
static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
auto OpcodeN = N->getOpcode();
auto OpcodeN0 = N0.getOpcode();
if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) ||
(OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM)))
return SDValue();
EVT VT = N->getValueType(0);
EVT InVT = N0.getValueType();
if (N0.getResNo() != 1 || InVT != MVT::i8 || VT != MVT::i32)
return SDValue();
SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG
: X86ISD::UDIVREM8_ZEXT_HREG;
SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0),
N0.getOperand(1));
DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
return R.getValue(1);
}
/// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or
/// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating
/// with UNDEFs) of the input to vectors of the same size as the target type
/// which then extends the lowest elements.
static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
unsigned Opcode = N->getOpcode();
if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND)
return SDValue();
if (!DCI.isBeforeLegalizeOps())
return SDValue();
if (!Subtarget.hasSSE2())
return SDValue();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SVT = VT.getScalarType();
EVT InVT = N0.getValueType();
EVT InSVT = InVT.getScalarType();
// Input type must be a vector and we must be extending legal integer types.
if (!VT.isVector())
return SDValue();
if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
return SDValue();
if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
return SDValue();
// On AVX2+ targets, if the input/output types are both legal then we will be
// able to use SIGN_EXTEND/ZERO_EXTEND directly.
if (Subtarget.hasInt256() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
DAG.getTargetLoweringInfo().isTypeLegal(InVT))
return SDValue();
SDLoc DL(N);
auto ExtendVecSize = [&DAG](const SDLoc &DL, SDValue N, unsigned Size) {
EVT InVT = N.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
Size / InVT.getScalarSizeInBits());
SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(),
DAG.getUNDEF(InVT));
Opnds[0] = N;
return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
};
// If target-size is less than 128-bits, extend to a type that would extend
// to 128 bits, extend that and extract the original target vector.
if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits())) {
unsigned Scale = 128 / VT.getSizeInBits();
EVT ExVT =
EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
SDValue SExt = DAG.getNode(Opcode, DL, ExVT, Ex);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
DAG.getIntPtrConstant(0, DL));
}
// If target-size is 128-bits (or 256-bits on AVX2 target), then convert to
// ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT.
// Also use this if we don't have SSE41 to allow the legalizer do its job.
if (!Subtarget.hasSSE41() || VT.is128BitVector() ||
(VT.is256BitVector() && Subtarget.hasInt256()) ||
(VT.is512BitVector() && Subtarget.hasAVX512())) {
SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
return Opcode == ISD::SIGN_EXTEND
? DAG.getSignExtendVectorInReg(ExOp, DL, VT)
: DAG.getZeroExtendVectorInReg(ExOp, DL, VT);
}
auto SplitAndExtendInReg = [&](unsigned SplitSize) {
unsigned NumVecs = VT.getSizeInBits() / SplitSize;
unsigned NumSubElts = SplitSize / SVT.getSizeInBits();
EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
DAG.getIntPtrConstant(Offset, DL));
SrcVec = ExtendVecSize(DL, SrcVec, SplitSize);
SrcVec = Opcode == ISD::SIGN_EXTEND
? DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT)
: DAG.getZeroExtendVectorInReg(SrcVec, DL, SubVT);
Opnds.push_back(SrcVec);
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
};
// On pre-AVX2 targets, split into 128-bit nodes of
// ISD::*_EXTEND_VECTOR_INREG.
if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128))
return SplitAndExtendInReg(128);
// On pre-AVX512 targets, split into 256-bit nodes of
// ISD::*_EXTEND_VECTOR_INREG.
if (!Subtarget.hasAVX512() && !(VT.getSizeInBits() % 256))
return SplitAndExtendInReg(256);
return SDValue();
}
static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT InVT = N0.getValueType();
SDLoc DL(N);
if (SDValue DivRem8 = getDivRem8(N, DAG))
return DivRem8;
if (!DCI.isBeforeLegalizeOps()) {
if (InVT == MVT::i1) {
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
return DAG.getSelect(DL, VT, N0, AllOnes, Zero);
}
return SDValue();
}
if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR &&
isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) {
// Invert and sign-extend a boolean is the same as zero-extend and subtract
// 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently
// lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1.
// sext (xor Bool, -1) --> sub (zext Bool), 1
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT));
}
if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
return V;
if (Subtarget.hasAVX() && VT.is256BitVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
return NewAdd;
return SDValue();
}
static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
EVT ScalarVT = VT.getScalarType();
if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
return SDValue();
SDValue A = N->getOperand(0);
SDValue B = N->getOperand(1);
SDValue C = N->getOperand(2);
auto invertIfNegative = [](SDValue &V) {
if (SDValue NegVal = isFNEG(V.getNode())) {
V = NegVal;
return true;
}
return false;
};
// Do not convert the passthru input of scalar intrinsics.
// FIXME: We could allow negations of the lower element only.
bool NegA = N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A);
bool NegB = invertIfNegative(B);
bool NegC = N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C);
// Negative multiplication when NegA xor NegB
bool NegMul = (NegA != NegB);
unsigned NewOpcode;
if (!NegMul)
NewOpcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
else
NewOpcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
if (N->getOpcode() == X86ISD::FMADD_RND) {
switch (NewOpcode) {
case X86ISD::FMADD: NewOpcode = X86ISD::FMADD_RND; break;
case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUB_RND; break;
case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD_RND; break;
case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB_RND; break;
}
} else if (N->getOpcode() == X86ISD::FMADDS1_RND) {
switch (NewOpcode) {
case X86ISD::FMADD: NewOpcode = X86ISD::FMADDS1_RND; break;
case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS1_RND; break;
case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1_RND; break;
case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1_RND; break;
}
} else if (N->getOpcode() == X86ISD::FMADDS3_RND) {
switch (NewOpcode) {
case X86ISD::FMADD: NewOpcode = X86ISD::FMADDS3_RND; break;
case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS3_RND; break;
case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3_RND; break;
case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3_RND; break;
}
} else {
assert((N->getOpcode() == X86ISD::FMADD || N->getOpcode() == ISD::FMA) &&
"Unexpected opcode!");
return DAG.getNode(NewOpcode, dl, VT, A, B, C);
}
return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3));
}
static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
// (and (i32 x86isd::setcc_carry), 1)
// This eliminates the zext. This transformation is necessary because
// ISD::SETCC is always legalized to i8.
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (N0.getOpcode() == ISD::AND &&
N0.hasOneUse() &&
N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
if (!isOneConstant(N0.getOperand(1)))
return SDValue();
return DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
N00.getOperand(0), N00.getOperand(1)),
DAG.getConstant(1, dl, VT));
}
}
if (N0.getOpcode() == ISD::TRUNCATE &&
N0.hasOneUse() &&
N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
return DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
N00.getOperand(0), N00.getOperand(1)),
DAG.getConstant(1, dl, VT));
}
}
if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
return V;
if (VT.is256BitVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
if (SDValue DivRem8 = getDivRem8(N, DAG))
return DivRem8;
if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
return NewAdd;
if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget))
return R;
return SDValue();
}
/// Try to map a 128-bit or larger integer comparison to vector instructions
/// before type legalization splits it up into chunks.
static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
// We're looking for an oversized integer equality comparison, but ignore a
// comparison with zero because that gets special treatment in EmitTest().
SDValue X = SetCC->getOperand(0);
SDValue Y = SetCC->getOperand(1);
EVT OpVT = X.getValueType();
unsigned OpSize = OpVT.getSizeInBits();
if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y))
return SDValue();
// TODO: Use PXOR + PTEST for SSE4.1 or later?
// TODO: Add support for AVX-512.
EVT VT = SetCC->getValueType(0);
SDLoc DL(SetCC);
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
(OpSize == 256 && Subtarget.hasAVX2())) {
EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
SDValue VecX = DAG.getBitcast(VecVT, X);
SDValue VecY = DAG.getBitcast(VecVT, Y);
// If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
// setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
// setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
// setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq
// setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne
SDValue Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY);
SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL,
MVT::i32);
return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC);
}
return SDValue();
}
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
if (CC == ISD::SETNE || CC == ISD::SETEQ) {
EVT OpVT = LHS.getValueType();
// 0-x == y --> x+y == 0
// 0-x != y --> x+y != 0
if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
LHS.hasOneUse()) {
SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, RHS, LHS.getOperand(1));
return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
}
// x == 0-y --> x+y == 0
// x != 0-y --> x+y != 0
if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
RHS.hasOneUse()) {
SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
}
if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget))
return V;
}
if (VT.getScalarType() == MVT::i1 &&
(CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
bool IsSEXT0 =
(LHS.getOpcode() == ISD::SIGN_EXTEND) &&
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
if (!IsSEXT0 || !IsVZero1) {
// Swap the operands and update the condition code.
std::swap(LHS, RHS);
CC = ISD::getSetCCSwappedOperands(CC);
IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
}
if (IsSEXT0 && IsVZero1) {
assert(VT == LHS.getOperand(0).getValueType() &&
"Uexpected operand type");
if (CC == ISD::SETGT)
return DAG.getConstant(0, DL, VT);
if (CC == ISD::SETLE)
return DAG.getConstant(1, DL, VT);
if (CC == ISD::SETEQ || CC == ISD::SETGE)
return DAG.getNOT(DL, LHS.getOperand(0), VT);
assert((CC == ISD::SETNE || CC == ISD::SETLT) &&
"Unexpected condition code!");
return LHS.getOperand(0);
}
}
// For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
// to avoid scalarization via legalization because v4i32 is not a legal type.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&
LHS.getValueType() == MVT::v4f32)
return LowerVSETCC(SDValue(N, 0), Subtarget, DAG);
return SDValue();
}
static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
// Gather and Scatter instructions use k-registers for masks. The type of
// the masks is v*i1. So the mask will be truncated anyway.
// The SIGN_EXTEND_INREG my be dropped.
SDValue Mask = N->getOperand(2);
if (Mask.getOpcode() == ISD::SIGN_EXTEND_INREG) {
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
NewOps[2] = Mask.getOperand(0);
DAG.UpdateNodeOperands(N, NewOps);
}
return SDValue();
}
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
SDValue EFLAGS = N->getOperand(1);
// Try to simplify the EFLAGS and condition code operands.
if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG))
return getSETCC(CC, Flags, DL, DAG);
return SDValue();
}
/// Optimize branch condition evaluation.
static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
SDValue EFLAGS = N->getOperand(3);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
// Try to simplify the EFLAGS and condition code operands.
// Make sure to not keep references to operands, as combineSetCCEFLAGS can
// RAUW them under us.
if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
N->getOperand(1), Cond, Flags);
}
return SDValue();
}
static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
SelectionDAG &DAG) {
// Take advantage of vector comparisons producing 0 or -1 in each lane to
// optimize away operation when it's from a constant.
//
// The general transformation is:
// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
// AND(VECTOR_CMP(x,y), constant2)
// constant2 = UNARYOP(constant)
// Early exit if this isn't a vector operation, the operand of the
// unary operation isn't a bitwise AND, or if the sizes of the operations
// aren't the same.
EVT VT = N->getValueType(0);
if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
return SDValue();
// Now check that the other operand of the AND is a constant. We could
// make the transformation for non-constant splats as well, but it's unclear
// that would be a benefit as it would not eliminate any operations, just
// perform one more step in scalar code before moving to the vector unit.
if (BuildVectorSDNode *BV =
dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
// Bail out if the vector isn't a constant.
if (!BV->isConstant())
return SDValue();
// Everything checks out. Build up the new and improved node.
SDLoc DL(N);
EVT IntVT = BV->getValueType(0);
// Create a new constant of the appropriate type for the transformed
// DAG.
SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
// The AND node needs bitcasts to/from an integer vector type around it.
SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
N->getOperand(0)->getOperand(0), MaskConst);
SDValue Res = DAG.getBitcast(VT, NewAnd);
return Res;
}
return SDValue();
}
static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Op0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
EVT InSVT = InVT.getScalarType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
// UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
SDLoc dl(N);
EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
InVT.getVectorNumElements());
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
if (TLI.isOperationLegal(ISD::UINT_TO_FP, DstVT))
return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
}
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
if (DAG.SignBitIsZero(Op0))
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
return SDValue();
}
static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// First try to optimize away the conversion entirely when it's
// conditionally from a constant. Vectors only.
if (SDValue Res = combineVectorCompareAndMaskUnaryOp(N, DAG))
return Res;
// Now move on to more general possibilities.
SDValue Op0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
EVT InSVT = InVT.getScalarType();
// SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
if (InVT.isVector() &&
(InSVT == MVT::i8 || InSVT == MVT::i16 ||
(InSVT == MVT::i1 && !DAG.getTargetLoweringInfo().isTypeLegal(InVT)))) {
SDLoc dl(N);
EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
InVT.getVectorNumElements());
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
}
// Without AVX512DQ we only support i64 to float scalar conversion. For both
// vectors and scalars, see if we know that the upper bits are all the sign
// bit, in which case we can truncate the input to i32 and convert from that.
if (InVT.getScalarSizeInBits() > 32 && !Subtarget.hasDQI()) {
unsigned BitWidth = InVT.getScalarSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0);
if (NumSignBits >= (BitWidth - 31)) {
EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32);
if (InVT.isVector())
TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT,
InVT.getVectorNumElements());
SDLoc dl(N);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
}
}
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
if (!Subtarget.useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT LdVT = Ld->getValueType(0);
// This transformation is not supported if the result type is f16 or f128.
if (VT == MVT::f16 || VT == MVT::f128)
return SDValue();
if (!Ld->isVolatile() && !VT.isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!Subtarget.is64Bit() && LdVT == MVT::i64) {
SDValue FILDChain = Subtarget.getTargetLowering()->BuildFILD(
SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
return FILDChain;
}
}
return SDValue();
}
// Optimize RES, EFLAGS = X86ISD::ADD LHS, RHS
static SDValue combineX86ADD(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
// When legalizing carry, we create carries via add X, -1
// If that comes from an actual carry, via setcc, we use the
// carry directly.
if (isAllOnesConstant(N->getOperand(1)) && N->hasAnyUseOfValue(1)) {
SDValue Carry = N->getOperand(0);
while (Carry.getOpcode() == ISD::TRUNCATE ||
Carry.getOpcode() == ISD::ZERO_EXTEND ||
Carry.getOpcode() == ISD::SIGN_EXTEND ||
Carry.getOpcode() == ISD::ANY_EXTEND ||
(Carry.getOpcode() == ISD::AND &&
isOneConstant(Carry.getOperand(1))))
Carry = Carry.getOperand(0);
if (Carry.getOpcode() == X86ISD::SETCC ||
Carry.getOpcode() == X86ISD::SETCC_CARRY) {
if (Carry.getConstantOperandVal(0) == X86::COND_B)
return DCI.CombineTo(N, SDValue(N, 0), Carry.getOperand(1));
}
}
return SDValue();
}
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
// If the LHS and RHS of the ADC node are zero, then it can't overflow and
// the result is either zero or one (depending on the input carry bit).
// Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
if (X86::isZeroNode(N->getOperand(0)) &&
X86::isZeroNode(N->getOperand(1)) &&
// We don't have a good way to replace an EFLAGS use, so only do this when
// dead right now.
SDValue(N, 1).use_empty()) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1));
SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
DAG.getConstant(X86::COND_B, DL,
MVT::i8),
N->getOperand(2)),
DAG.getConstant(1, DL, VT));
return DCI.CombineTo(N, Res1, CarryOut);
}
return SDValue();
}
/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit
/// which is more useful than 0/1 in some cases.
static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) {
SDLoc DL(N);
// "Condition code B" is also known as "the carry flag" (CF).
SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8);
SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS);
MVT VT = N->getSimpleValueType(0);
if (VT == MVT::i8)
return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT));
assert(VT == MVT::i1 && "Unexpected type for SETCC node");
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB);
}
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
/// with CMP+{ADC, SBB}.
static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
bool IsSub = N->getOpcode() == ISD::SUB;
SDValue X = N->getOperand(0);
SDValue Y = N->getOperand(1);
// If this is an add, canonicalize a zext operand to the RHS.
// TODO: Incomplete? What if both sides are zexts?
if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND &&
Y.getOpcode() != ISD::ZERO_EXTEND)
std::swap(X, Y);
// Look through a one-use zext.
bool PeekedThroughZext = false;
if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) {
Y = Y.getOperand(0);
PeekedThroughZext = true;
}
// If this is an add, canonicalize a setcc operand to the RHS.
// TODO: Incomplete? What if both sides are setcc?
// TODO: Should we allow peeking through a zext of the other operand?
if (!IsSub && !PeekedThroughZext && X.getOpcode() == X86ISD::SETCC &&
Y.getOpcode() != X86ISD::SETCC)
std::swap(X, Y);
if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse())
return SDValue();
SDLoc DL(N);
EVT VT = N->getValueType(0);
X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0);
// If X is -1 or 0, then we have an opportunity to avoid constants required in
// the general case below.
auto *ConstantX = dyn_cast<ConstantSDNode>(X);
if (ConstantX) {
if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) ||
(IsSub && CC == X86::COND_B && ConstantX->isNullValue())) {
// This is a complicated way to get -1 or 0 from the carry flag:
// -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
// 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
DAG.getConstant(X86::COND_B, DL, MVT::i8),
Y.getOperand(1));
}
if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) ||
(IsSub && CC == X86::COND_A && ConstantX->isNullValue())) {
SDValue EFLAGS = Y->getOperand(1);
if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
// Swap the operands of a SUB, and we have the same pattern as above.
// -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB
// 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB
SDValue NewSub = DAG.getNode(
X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
EFLAGS.getOperand(1), EFLAGS.getOperand(0));
SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
DAG.getConstant(X86::COND_B, DL, MVT::i8),
NewEFLAGS);
}
}
}
if (CC == X86::COND_B) {
// X + SETB Z --> X + (mask SBB Z, Z)
// X - SETB Z --> X - (mask SBB Z, Z)
// TODO: Produce ADC/SBB here directly and avoid SETCC_CARRY?
SDValue SBB = materializeSBB(Y.getNode(), Y.getOperand(1), DAG);
if (SBB.getValueSizeInBits() != VT.getSizeInBits())
SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
}
if (CC == X86::COND_A) {
SDValue EFLAGS = Y->getOperand(1);
// Try to convert COND_A into COND_B in an attempt to facilitate
// materializing "setb reg".
//
// Do not flip "e > c", where "c" is a constant, because Cmp instruction
// cannot take an immediate as its first operand.
//
if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
EFLAGS.getNode()->getVTList(),
EFLAGS.getOperand(1), EFLAGS.getOperand(0));
SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
SDValue SBB = materializeSBB(Y.getNode(), NewEFLAGS, DAG);
if (SBB.getValueSizeInBits() != VT.getSizeInBits())
SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
}
}
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
SDValue Cmp = Y.getOperand(1);
if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
!X86::isZeroNode(Cmp.getOperand(1)) ||
!Cmp.getOperand(0).getValueType().isInteger())
return SDValue();
SDValue Z = Cmp.getOperand(0);
EVT ZVT = Z.getValueType();
// If X is -1 or 0, then we have an opportunity to avoid constants required in
// the general case below.
if (ConstantX) {
// 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with
// fake operands:
// 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
// -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) ||
(!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) {
SDValue Zero = DAG.getConstant(0, DL, ZVT);
SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
DAG.getConstant(X86::COND_B, DL, MVT::i8),
SDValue(Neg.getNode(), 1));
}
// cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb'
// with fake operands:
// 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
// -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) ||
(!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) {
SDValue One = DAG.getConstant(1, DL, ZVT);
SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);
return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp1);
}
}
// (cmp Z, 1) sets the carry flag if Z is 0.
SDValue One = DAG.getConstant(1, DL, ZVT);
SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);
// Add the flags type for ADC/SBB nodes.
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
// X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
// X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
if (CC == X86::COND_NE)
return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
DAG.getConstant(-1ULL, DL, VT), Cmp1);
// X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
// X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
DAG.getConstant(0, DL, VT), Cmp1);
}
static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue MulOp = N->getOperand(0);
SDValue Phi = N->getOperand(1);
if (MulOp.getOpcode() != ISD::MUL)
std::swap(MulOp, Phi);
if (MulOp.getOpcode() != ISD::MUL)
return SDValue();
ShrinkMode Mode;
if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16)
return SDValue();
EVT VT = N->getValueType(0);
unsigned RegSize = 128;
if (Subtarget.hasBWI())
RegSize = 512;
else if (Subtarget.hasAVX2())
RegSize = 256;
unsigned VectorSize = VT.getVectorNumElements() * 16;
// If the vector size is less than 128, or greater than the supported RegSize,
// do not use PMADD.
if (VectorSize < 128 || VectorSize > RegSize)
return SDValue();
SDLoc DL(N);
EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
VT.getVectorNumElements());
EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
VT.getVectorNumElements() / 2);
// Shrink the operands of mul.
SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0));
SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1));
// Madd vector size is half of the original vector size
SDValue Madd = DAG.getNode(X86ISD::VPMADDWD, DL, MAddVT, N0, N1);
// Fill the rest of the output with 0
SDValue Zero = getZeroVector(Madd.getSimpleValueType(), Subtarget, DAG, DL);
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero);
return DAG.getNode(ISD::ADD, DL, VT, Concat, Phi);
}
static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// TODO: There's nothing special about i32, any integer type above i16 should
// work just as well.
if (!VT.isVector() || !VT.isSimple() ||
!(VT.getVectorElementType() == MVT::i32))
return SDValue();
unsigned RegSize = 128;
if (Subtarget.hasBWI())
RegSize = 512;
else if (Subtarget.hasAVX2())
RegSize = 256;
// We only handle v16i32 for SSE2 / v32i32 for AVX2 / v64i32 for AVX512.
// TODO: We should be able to handle larger vectors by splitting them before
// feeding them into several SADs, and then reducing over those.
if (VT.getSizeInBits() / 4 > RegSize)
return SDValue();
// We know N is a reduction add, which means one of its operands is a phi.
// To match SAD, we need the other operand to be a vector select.
SDValue SelectOp, Phi;
if (Op0.getOpcode() == ISD::VSELECT) {
SelectOp = Op0;
Phi = Op1;
} else if (Op1.getOpcode() == ISD::VSELECT) {
SelectOp = Op1;
Phi = Op0;
} else
return SDValue();
// Check whether we have an abs-diff pattern feeding into the select.
if(!detectZextAbsDiff(SelectOp, Op0, Op1))
return SDValue();
// SAD pattern detected. Now build a SAD instruction and an addition for
// reduction. Note that the number of elements of the result of SAD is less
// than the number of elements of its input. Therefore, we could only update
// part of elements in the reduction vector.
SDValue Sad = createPSADBW(DAG, Op0, Op1, DL);
// The output of PSADBW is a vector of i64.
// We need to turn the vector of i64 into a vector of i32.
// If the reduction vector is at least as wide as the psadbw result, just
// bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
// anyway.
MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
if (VT.getSizeInBits() >= ResVT.getSizeInBits())
Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
else
Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
// Update part of elements of the reduction vector. This is done by first
// extracting a sub-vector from it, updating this sub-vector, and inserting
// it back.
SDValue SubPhi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Phi,
DAG.getIntPtrConstant(0, DL));
SDValue Res = DAG.getNode(ISD::ADD, DL, ResVT, Sad, SubPhi);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Phi, Res,
DAG.getIntPtrConstant(0, DL));
} else
return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi);
}
/// Convert vector increment or decrement to sub/add with an all-ones constant:
/// add X, <1, 1...> --> sub X, <-1, -1...>
/// sub X, <1, 1...> --> add X, <-1, -1...>
/// The all-ones vector constant can be materialized using a pcmpeq instruction
/// that is commonly recognized as an idiom (has no register dependency), so
/// that's better/smaller than loading a splat 1 constant.
static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
"Unexpected opcode for increment/decrement transform");
// Pseudo-legality check: getOnesVector() expects one of these types, so bail
// out and wait for legalization if we have an unsupported vector length.
EVT VT = N->getValueType(0);
if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
return SDValue();
SDNode *N1 = N->getOperand(1).getNode();
APInt SplatVal;
- if (!ISD::isConstantSplatVector(N1, SplatVal) || !SplatVal.isOneValue())
+ if (!ISD::isConstantSplatVector(N1, SplatVal, /*AllowShrink*/false) ||
+ !SplatVal.isOneValue())
return SDValue();
SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec);
}
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
const SDNodeFlags Flags = N->getFlags();
if (Flags.hasVectorReduction()) {
if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget))
return Sad;
if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget))
return MAdd;
}
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
(Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
if (SDValue V = combineIncDecVector(N, DAG))
return V;
return combineAddOrSubToADCOrSBB(N, DAG);
}
static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// X86 can't encode an immediate LHS of a sub. See if we can push the
// negation into a preceding instruction.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
// If the RHS of the sub is a XOR with one use and a constant, invert the
// immediate. Then add one to the LHS of the sub so we can turn
// X-Y -> X+~Y+1, saving one register.
if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
isa<ConstantSDNode>(Op1.getOperand(1))) {
APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
EVT VT = Op0.getValueType();
SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT,
Op1.getOperand(0),
DAG.getConstant(~XorC, SDLoc(Op1), VT));
return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor,
DAG.getConstant(C->getAPIntValue() + 1, SDLoc(N), VT));
}
}
// Try to synthesize horizontal subs from subs of shuffles.
EVT VT = N->getValueType(0);
if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
(Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, false))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
if (SDValue V = combineIncDecVector(N, DAG))
return V;
return combineAddOrSubToADCOrSBB(N, DAG);
}
static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
if (DCI.isBeforeLegalize())
return SDValue();
SDLoc DL(N);
unsigned Opcode = N->getOpcode();
MVT VT = N->getSimpleValueType(0);
MVT SVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = SVT.getSizeInBits();
SDValue Op = N->getOperand(0);
MVT OpVT = Op.getSimpleValueType();
MVT OpEltVT = OpVT.getVectorElementType();
unsigned OpEltSizeInBits = OpEltVT.getSizeInBits();
unsigned InputBits = OpEltSizeInBits * NumElts;
// Perform any constant folding.
// FIXME: Reduce constant pool usage and don't fold when OptSize is enabled.
APInt UndefElts;
SmallVector<APInt, 64> EltBits;
if (getTargetConstantBitsFromNode(Op, OpEltSizeInBits, UndefElts, EltBits)) {
APInt Undefs(NumElts, 0);
SmallVector<APInt, 4> Vals(NumElts, APInt(EltSizeInBits, 0));
bool IsZEXT =
(Opcode == X86ISD::VZEXT) || (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG);
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
Undefs.setBit(i);
continue;
}
Vals[i] = IsZEXT ? EltBits[i].zextOrTrunc(EltSizeInBits)
: EltBits[i].sextOrTrunc(EltSizeInBits);
}
return getConstVector(Vals, Undefs, VT, DAG, DL);
}
// (vzext (bitcast (vzext (x)) -> (vzext x)
// TODO: (vsext (bitcast (vsext (x)) -> (vsext x)
SDValue V = peekThroughBitcasts(Op);
if (Opcode == X86ISD::VZEXT && V != Op && V.getOpcode() == X86ISD::VZEXT) {
MVT InnerVT = V.getSimpleValueType();
MVT InnerEltVT = InnerVT.getVectorElementType();
// If the element sizes match exactly, we can just do one larger vzext. This
// is always an exact type match as vzext operates on integer types.
if (OpEltVT == InnerEltVT) {
assert(OpVT == InnerVT && "Types must match for vzext!");
return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0));
}
// The only other way we can combine them is if only a single element of the
// inner vzext is used in the input to the outer vzext.
if (InnerEltVT.getSizeInBits() < InputBits)
return SDValue();
// In this case, the inner vzext is completely dead because we're going to
// only look at bits inside of the low element. Just do the outer vzext on
// a bitcast of the input to the inner.
return DAG.getNode(X86ISD::VZEXT, DL, VT, DAG.getBitcast(OpVT, V));
}
// Check if we can bypass extracting and re-inserting an element of an input
// vector. Essentially:
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
// TODO: Add X86ISD::VSEXT support
if (Opcode == X86ISD::VZEXT &&
V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
SDValue ExtractedV = V.getOperand(0);
SDValue OrigV = ExtractedV.getOperand(0);
if (isNullConstant(ExtractedV.getOperand(1))) {
MVT OrigVT = OrigV.getSimpleValueType();
// Extract a subvector if necessary...
if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
int Ratio = OrigVT.getSizeInBits() / OpVT.getSizeInBits();
OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(),
OrigVT.getVectorNumElements() / Ratio);
OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
DAG.getIntPtrConstant(0, DL));
}
Op = DAG.getBitcast(OpVT, OrigV);
return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
}
}
return SDValue();
}
/// Canonicalize (LSUB p, 1) -> (LADD p, -1).
static SDValue combineLockSub(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Chain = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
MVT VT = RHS.getSimpleValueType();
SDLoc DL(N);
auto *C = dyn_cast<ConstantSDNode>(RHS);
if (!C || C->getZExtValue() != 1)
return SDValue();
RHS = DAG.getConstant(-1, DL, VT);
MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
return DAG.getMemIntrinsicNode(X86ISD::LADD, DL,
DAG.getVTList(MVT::i32, MVT::Other),
{Chain, LHS, RHS}, VT, MMO);
}
// TEST (AND a, b) ,(AND a, b) -> TEST a, b
static SDValue combineTestM(SDNode *N, SelectionDAG &DAG) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
if (Op0 != Op1 || Op1->getOpcode() != ISD::AND)
return SDValue();
EVT VT = N->getValueType(0);
SDLoc DL(N);
return DAG.getNode(X86ISD::TESTM, DL, VT,
Op0->getOperand(0), Op0->getOperand(1));
}
static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
SDLoc DL(N);
if (N->getOperand(0) == N->getOperand(1)) {
if (N->getOpcode() == X86ISD::PCMPEQ)
return getOnesVector(VT, DAG, DL);
if (N->getOpcode() == X86ISD::PCMPGT)
return getZeroVector(VT, Subtarget, DAG, DL);
}
return SDValue();
}
static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDLoc dl(N);
SDValue Vec = N->getOperand(0);
SDValue SubVec = N->getOperand(1);
SDValue Idx = N->getOperand(2);
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
MVT OpVT = N->getSimpleValueType(0);
MVT SubVecVT = SubVec.getSimpleValueType();
// If this is an insert of an extract, combine to a shuffle. Don't do this
// if the insert or extract can be represented with a subvector operation.
if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
SubVec.getOperand(0).getSimpleValueType() == OpVT &&
(IdxVal != 0 || !Vec.isUndef())) {
int ExtIdxVal = cast<ConstantSDNode>(SubVec.getOperand(1))->getZExtValue();
if (ExtIdxVal != 0) {
int VecNumElts = OpVT.getVectorNumElements();
int SubVecNumElts = SubVecVT.getVectorNumElements();
SmallVector<int, 64> Mask(VecNumElts);
// First create an identity shuffle mask.
for (int i = 0; i != VecNumElts; ++i)
Mask[i] = i;
// Now insert the extracted portion.
for (int i = 0; i != SubVecNumElts; ++i)
Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask);
}
}
// Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
// load:
// (insert_subvector (insert_subvector undef, (load16 addr), 0),
// (load16 addr + 16), Elts/2)
// --> load32 addr
// or:
// (insert_subvector (insert_subvector undef, (load32 addr), 0),
// (load32 addr + 32), Elts/2)
// --> load64 addr
// or a 16-byte or 32-byte broadcast:
// (insert_subvector (insert_subvector undef, (load16 addr), 0),
// (load16 addr), Elts/2)
// --> X86SubVBroadcast(load16 addr)
// or:
// (insert_subvector (insert_subvector undef, (load32 addr), 0),
// (load32 addr), Elts/2)
// --> X86SubVBroadcast(load32 addr)
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
if (Idx2 && Idx2->getZExtValue() == 0) {
SDValue SubVec2 = Vec.getOperand(1);
// If needed, look through bitcasts to get to the load.
if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
bool Fast;
unsigned Alignment = FirstLd->getAlignment();
unsigned AS = FirstLd->getAddressSpace();
const X86TargetLowering *TLI = Subtarget.getTargetLowering();
if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
OpVT, AS, Alignment, &Fast) && Fast) {
SDValue Ops[] = {SubVec2, SubVec};
if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG,
Subtarget, false))
return Ld;
}
}
// If lower/upper loads are the same and the only users of the load, then
// lower to a VBROADCASTF128/VBROADCASTI128/etc.
if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
SDNode::areOnlyUsersOf({N, Vec.getNode()}, SubVec2.getNode())) {
return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
}
}
// If this is subv_broadcast insert into both halves, use a larger
// subv_broadcast.
if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) {
return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
SubVec.getOperand(0));
}
}
}
return SDValue();
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
return combineExtractVectorElt(N, DAG, DCI, Subtarget);
case X86ISD::PEXTRW:
case X86ISD::PEXTRB:
return combineExtractVectorElt_SSE(N, DAG, DCI, Subtarget);
case ISD::INSERT_SUBVECTOR:
return combineInsertSubvector(N, DAG, DCI, Subtarget);
case ISD::VSELECT:
case ISD::SELECT:
case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
case ISD::ADD: return combineAdd(N, DAG, Subtarget);
case ISD::SUB: return combineSub(N, DAG, Subtarget);
case X86ISD::ADD: return combineX86ADD(N, DAG, DCI);
case X86ISD::ADC: return combineADC(N, DAG, DCI);
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return combineShift(N, DAG, DCI, Subtarget);
case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, Subtarget);
case ISD::MSTORE: return combineMaskedStore(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget);
case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget);
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return combineFOr(N, DAG, Subtarget);
case X86ISD::FMIN:
case X86ISD::FMAX: return combineFMinFMax(N, DAG);
case ISD::FMINNUM:
case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget);
case X86ISD::BT: return combineBT(N, DAG, DCI);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
case X86ISD::VSHLI:
case X86ISD::VSRAI:
case X86ISD::VSRLI:
return combineVectorShiftImm(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
case X86ISD::VSEXT:
case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget);
case X86ISD::PINSRB:
case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::INSERTPS:
case X86ISD::EXTRQI:
case X86ISD::INSERTQI:
case X86ISD::PALIGNR:
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
case X86ISD::BLENDI:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::VPPERM:
case X86ISD::VPERMI:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
case X86ISD::VPERMIV3:
case X86ISD::VPERMIL2:
case X86ISD::VPERMILPI:
case X86ISD::VPERMILPV:
case X86ISD::VPERM2X128:
case X86ISD::VZEXT_MOVL:
case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
case X86ISD::FMADD:
case X86ISD::FMADD_RND:
case X86ISD::FMADDS1_RND:
case X86ISD::FMADDS3_RND:
case ISD::FMA: return combineFMA(N, DAG, Subtarget);
case ISD::MGATHER:
case ISD::MSCATTER: return combineGatherScatter(N, DAG);
case X86ISD::LSUB: return combineLockSub(N, DAG, Subtarget);
case X86ISD::TESTM: return combineTestM(N, DAG);
case X86ISD::PCMPEQ:
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
}
return SDValue();
}
/// Return true if the target has native support for the specified value type
/// and it is 'desirable' to use the type for the given node type. e.g. On x86
/// i16 is legal, but undesirable since i16 instruction encodings are longer and
/// some i16 instructions are slow.
bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (!isTypeLegal(VT))
return false;
if (VT != MVT::i16)
return true;
switch (Opc) {
default:
return true;
case ISD::LOAD:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::SHL:
case ISD::SRL:
case ISD::SUB:
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
return false;
}
}
/// This function checks if any of the users of EFLAGS copies the EFLAGS. We
/// know that the code that lowers COPY of EFLAGS has to use the stack, and if
/// we don't adjust the stack we clobber the first frame index.
/// See X86InstrInfo::copyPhysReg.
static bool hasCopyImplyingStackAdjustment(const MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
return any_of(MRI.reg_instructions(X86::EFLAGS),
[](const MachineInstr &RI) { return RI.isCopy(); });
}
void X86TargetLowering::finalizeLowering(MachineFunction &MF) const {
if (hasCopyImplyingStackAdjustment(MF)) {
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setHasCopyImplyingStackAdjustment(true);
}
TargetLoweringBase::finalizeLowering(MF);
}
/// This method query the target whether it is beneficial for dag combiner to
/// promote the specified node. If true, it should return the desired promotion
/// type by reference.
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
EVT VT = Op.getValueType();
if (VT != MVT::i16)
return false;
bool Promote = false;
bool Commute = false;
switch (Op.getOpcode()) {
default: break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
Promote = true;
break;
case ISD::SHL:
case ISD::SRL: {
SDValue N0 = Op.getOperand(0);
// Look out for (store (shl (load), x)).
if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
return false;
Promote = true;
break;
}
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
Commute = true;
LLVM_FALLTHROUGH;
case ISD::SUB: {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
if (!Commute && MayFoldLoad(N1))
return false;
// Avoid disabling potential load folding opportunities.
if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
return false;
if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
return false;
Promote = true;
}
}
PVT = MVT::i32;
return Promote;
}
//===----------------------------------------------------------------------===//
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
// Helper to match a string separated by whitespace.
static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) {
S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace.
for (StringRef Piece : Pieces) {
if (!S.startswith(Piece)) // Check if the piece matches.
return false;
S = S.substr(Piece.size());
StringRef::size_type Pos = S.find_first_not_of(" \t");
if (Pos == 0) // We matched a prefix.
return false;
S = S.substr(Pos);
}
return S.empty();
}
static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") &&
std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") &&
std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) {
if (AsmPieces.size() == 3)
return true;
else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}"))
return true;
}
}
return false;
}
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
const std::string &AsmStr = IA->getAsmString();
IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
// TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
SmallVector<StringRef, 4> AsmPieces;
SplitString(AsmStr, AsmPieces, ";\n");
switch (AsmPieces.size()) {
default: return false;
case 1:
// FIXME: this should verify that we are targeting a 486 or better. If not,
// we will turn this bswap into something that will be lowered to logical
// ops instead of emitting the bswap asm. For now, we don't support 486 or
// lower so don't worry about this.
// bswap $0
if (matchAsm(AsmPieces[0], {"bswap", "$0"}) ||
matchAsm(AsmPieces[0], {"bswapl", "$0"}) ||
matchAsm(AsmPieces[0], {"bswapq", "$0"}) ||
matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) ||
matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) ||
matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
return IntrinsicLowering::LowerToByteSwap(CI);
}
// rorw $$8, ${0:w} --> llvm.bswap.i16
if (CI->getType()->isIntegerTy(16) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
(matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) ||
matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) {
AsmPieces.clear();
StringRef ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (clobbersFlagRegisters(AsmPieces))
return IntrinsicLowering::LowerToByteSwap(CI);
}
break;
case 3:
if (CI->getType()->isIntegerTy(32) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) &&
matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) &&
matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) {
AsmPieces.clear();
StringRef ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (clobbersFlagRegisters(AsmPieces))
return IntrinsicLowering::LowerToByteSwap(CI);
}
if (CI->getType()->isIntegerTy(64)) {
InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
if (Constraints.size() >= 2 &&
Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
// bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) &&
matchAsm(AsmPieces[1], {"bswap", "%edx"}) &&
matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"}))
return IntrinsicLowering::LowerToByteSwap(CI);
}
}
break;
}
return false;
}
/// Given a constraint letter, return the type of constraint for this target.
X86TargetLowering::ConstraintType
X86TargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'R':
case 'q':
case 'Q':
case 'f':
case 't':
case 'u':
case 'y':
case 'x':
case 'v':
case 'Y':
case 'l':
return C_RegisterClass;
case 'k': // AVX512 masking registers.
case 'a':
case 'b':
case 'c':
case 'd':
case 'S':
case 'D':
case 'A':
return C_Register;
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'G':
case 'C':
case 'e':
case 'Z':
return C_Other;
default:
break;
}
}
else if (Constraint.size() == 2) {
switch (Constraint[0]) {
default:
break;
case 'Y':
switch (Constraint[1]) {
default:
break;
case 'k':
return C_Register;
}
}
}
return TargetLowering::getConstraintType(Constraint);
}
/// Examine constraint type and operand type and determine a weight value.
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
TargetLowering::ConstraintWeight
X86TargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
LLVM_FALLTHROUGH;
case 'R':
case 'q':
case 'Q':
case 'a':
case 'b':
case 'c':
case 'd':
case 'S':
case 'D':
case 'A':
if (CallOperandVal->getType()->isIntegerTy())
weight = CW_SpecificReg;
break;
case 'f':
case 't':
case 'u':
if (type->isFloatingPointTy())
weight = CW_SpecificReg;
break;
case 'y':
if (type->isX86_MMXTy() && Subtarget.hasMMX())
weight = CW_SpecificReg;
break;
case 'Y':
// Other "Y<x>" (e.g. "Yk") constraints should be implemented below.
if (constraint[1] == 'k') {
// Support for 'Yk' (similarly to the 'k' variant below).
weight = CW_SpecificReg;
break;
}
// Else fall through (handle "Y" constraint).
LLVM_FALLTHROUGH;
case 'v':
if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
weight = CW_Register;
LLVM_FALLTHROUGH;
case 'x':
if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasFp256()))
weight = CW_Register;
break;
case 'k':
// Enable conditional vector operations using %k<#> registers.
weight = CW_SpecificReg;
break;
case 'I':
if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
if (C->getZExtValue() <= 31)
weight = CW_Constant;
}
break;
case 'J':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 63)
weight = CW_Constant;
}
break;
case 'K':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
weight = CW_Constant;
}
break;
case 'L':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
weight = CW_Constant;
}
break;
case 'M':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 3)
weight = CW_Constant;
}
break;
case 'N':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 0xff)
weight = CW_Constant;
}
break;
case 'G':
case 'C':
if (isa<ConstantFP>(CallOperandVal)) {
weight = CW_Constant;
}
break;
case 'e':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getSExtValue() >= -0x80000000LL) &&
(C->getSExtValue() <= 0x7fffffffLL))
weight = CW_Constant;
}
break;
case 'Z':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 0xffffffff)
weight = CW_Constant;
}
break;
}
return weight;
}
/// Try to replace an X constraint, which matches anything, with another that
/// has more specific requirements based on the type of the corresponding
/// operand.
const char *X86TargetLowering::
LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
if (Subtarget.hasSSE2())
return "Y";
if (Subtarget.hasSSE1())
return "x";
}
return TargetLowering::LowerXConstraint(ConstraintVT);
}
/// Lower the specified operand into the Ops vector.
/// If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Only support length 1 constraints for now.
if (Constraint.length() > 1) return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default: break;
case 'I':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 31) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
break;
}
}
return;
case 'J':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 63) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
break;
}
}
return;
case 'K':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (isInt<8>(C->getSExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
break;
}
}
return;
case 'L':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff ||
(Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) {
Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
Op.getValueType());
break;
}
}
return;
case 'M':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 3) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
break;
}
}
return;
case 'N':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 255) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
break;
}
}
return;
case 'O':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 127) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
break;
}
}
return;
case 'e': {
// 32-bit signed value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
C->getSExtValue())) {
// Widen to 64 bits here to get it sign extended.
Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64);
break;
}
// FIXME gcc accepts some relocatable values here too, but only in certain
// memory models; it's complicated.
}
return;
}
case 'Z': {
// 32-bit unsigned value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
C->getZExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
break;
}
}
// FIXME gcc accepts some relocatable values here too, but only in certain
// memory models; it's complicated.
return;
}
case 'i': {
// Literal immediates are always ok.
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
// Widen to 64 bits here to get it sign extended.
Result = DAG.getTargetConstant(CST->getSExtValue(), SDLoc(Op), MVT::i64);
break;
}
// In any sort of PIC mode addresses need to be computed at runtime by
// adding in a register or some sort of table lookup. These can't
// be used as immediates.
if (Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC())
return;
// If we are in non-pic codegen mode, we allow the address of a global (with
// an optional displacement) to be used with 'i'.
GlobalAddressSDNode *GA = nullptr;
int64_t Offset = 0;
// Match either (GA), (GA+C), (GA+C1+C2), etc.
while (1) {
if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
Offset += GA->getOffset();
break;
} else if (Op.getOpcode() == ISD::ADD) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
Offset += C->getZExtValue();
Op = Op.getOperand(0);
continue;
}
} else if (Op.getOpcode() == ISD::SUB) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
Offset += -C->getZExtValue();
Op = Op.getOperand(0);
continue;
}
}
// Otherwise, this isn't something we can handle, reject it.
return;
}
const GlobalValue *GV = GA->getGlobal();
// If we require an extra load to get this address, as in PIC mode, we
// can't accept it.
if (isGlobalStubReference(Subtarget.classifyGlobalReference(GV)))
return;
Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
GA->getValueType(0), Offset);
break;
}
}
if (Result.getNode()) {
Ops.push_back(Result);
return;
}
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
/// Check if \p RC is a general purpose register class.
/// I.e., GR* or one of their variant.
static bool isGRClass(const TargetRegisterClass &RC) {
return RC.hasSuperClassEq(&X86::GR8RegClass) ||
RC.hasSuperClassEq(&X86::GR16RegClass) ||
RC.hasSuperClassEq(&X86::GR32RegClass) ||
RC.hasSuperClassEq(&X86::GR64RegClass) ||
RC.hasSuperClassEq(&X86::LOW32_ADDR_ACCESS_RBPRegClass);
}
/// Check if \p RC is a vector register class.
/// I.e., FR* / VR* or one of their variant.
static bool isFRClass(const TargetRegisterClass &RC) {
return RC.hasSuperClassEq(&X86::FR32XRegClass) ||
RC.hasSuperClassEq(&X86::FR64XRegClass) ||
RC.hasSuperClassEq(&X86::VR128XRegClass) ||
RC.hasSuperClassEq(&X86::VR256XRegClass) ||
RC.hasSuperClassEq(&X86::VR512RegClass);
}
std::pair<unsigned, const TargetRegisterClass *>
X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
// First, see if this is a constraint that directly corresponds to an LLVM
// register class.
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
default: break;
// TODO: Slight differences here in allocation order and leaving
// RIP in the class. Do they matter any more here than they do
// in the normal allocation?
case 'k':
if (Subtarget.hasAVX512()) {
// Only supported in AVX512 or later.
switch (VT.SimpleTy) {
default: break;
case MVT::i32:
return std::make_pair(0U, &X86::VK32RegClass);
case MVT::i16:
return std::make_pair(0U, &X86::VK16RegClass);
case MVT::i8:
return std::make_pair(0U, &X86::VK8RegClass);
case MVT::i1:
return std::make_pair(0U, &X86::VK1RegClass);
case MVT::i64:
return std::make_pair(0U, &X86::VK64RegClass);
}
}
break;
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
if (Subtarget.is64Bit()) {
if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, &X86::GR32RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16RegClass);
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8RegClass);
if (VT == MVT::i64 || VT == MVT::f64)
return std::make_pair(0U, &X86::GR64RegClass);
break;
}
LLVM_FALLTHROUGH;
// 32-bit fallthrough
case 'Q': // Q_REGS
if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, &X86::GR32_ABCDRegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_ABCDRegClass);
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
if (VT == MVT::i64)
return std::make_pair(0U, &X86::GR64_ABCDRegClass);
break;
case 'r': // GENERAL_REGS
case 'l': // INDEX_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16RegClass);
if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget.is64Bit())
return std::make_pair(0U, &X86::GR32RegClass);
return std::make_pair(0U, &X86::GR64RegClass);
case 'R': // LEGACY_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_NOREXRegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_NOREXRegClass);
if (VT == MVT::i32 || !Subtarget.is64Bit())
return std::make_pair(0U, &X86::GR32_NOREXRegClass);
return std::make_pair(0U, &X86::GR64_NOREXRegClass);
case 'f': // FP Stack registers.
// If SSE is enabled for this VT, use f80 to ensure the isel moves the
// value to the correct fpstack register class.
if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
return std::make_pair(0U, &X86::RFP32RegClass);
if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
return std::make_pair(0U, &X86::RFP64RegClass);
return std::make_pair(0U, &X86::RFP80RegClass);
case 'y': // MMX_REGS if MMX allowed.
if (!Subtarget.hasMMX()) break;
return std::make_pair(0U, &X86::VR64RegClass);
case 'Y': // SSE_REGS if SSE2 allowed
if (!Subtarget.hasSSE2()) break;
LLVM_FALLTHROUGH;
case 'v':
case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
if (!Subtarget.hasSSE1()) break;
bool VConstraint = (Constraint[0] == 'v');
switch (VT.SimpleTy) {
default: break;
// Scalar SSE types.
case MVT::f32:
case MVT::i32:
if (VConstraint && Subtarget.hasAVX512() && Subtarget.hasVLX())
return std::make_pair(0U, &X86::FR32XRegClass);
return std::make_pair(0U, &X86::FR32RegClass);
case MVT::f64:
case MVT::i64:
if (VConstraint && Subtarget.hasVLX())
return std::make_pair(0U, &X86::FR64XRegClass);
return std::make_pair(0U, &X86::FR64RegClass);
// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
// Vector types.
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
if (VConstraint && Subtarget.hasVLX())
return std::make_pair(0U, &X86::VR128XRegClass);
return std::make_pair(0U, &X86::VR128RegClass);
// AVX types.
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
case MVT::v4i64:
case MVT::v8f32:
case MVT::v4f64:
if (VConstraint && Subtarget.hasVLX())
return std::make_pair(0U, &X86::VR256XRegClass);
return std::make_pair(0U, &X86::VR256RegClass);
case MVT::v8f64:
case MVT::v16f32:
case MVT::v16i32:
case MVT::v8i64:
return std::make_pair(0U, &X86::VR512RegClass);
}
break;
}
} else if (Constraint.size() == 2 && Constraint[0] == 'Y') {
switch (Constraint[1]) {
default:
break;
case 'k':
// This register class doesn't allocate k0 for masked vector operation.
if (Subtarget.hasAVX512()) { // Only supported in AVX512.
switch (VT.SimpleTy) {
default: break;
case MVT::i32:
return std::make_pair(0U, &X86::VK32WMRegClass);
case MVT::i16:
return std::make_pair(0U, &X86::VK16WMRegClass);
case MVT::i8:
return std::make_pair(0U, &X86::VK8WMRegClass);
case MVT::i1:
return std::make_pair(0U, &X86::VK1WMRegClass);
case MVT::i64:
return std::make_pair(0U, &X86::VK64WMRegClass);
}
}
break;
}
}
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass*> Res;
Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
if (!Res.second) {
// Map st(0) -> st(7) -> ST0
if (Constraint.size() == 7 && Constraint[0] == '{' &&
tolower(Constraint[1]) == 's' &&
tolower(Constraint[2]) == 't' &&
Constraint[3] == '(' &&
(Constraint[4] >= '0' && Constraint[4] <= '7') &&
Constraint[5] == ')' &&
Constraint[6] == '}') {
Res.first = X86::FP0+Constraint[4]-'0';
Res.second = &X86::RFP80RegClass;
return Res;
}
// GCC allows "st(0)" to be called just plain "st".
if (StringRef("{st}").equals_lower(Constraint)) {
Res.first = X86::FP0;
Res.second = &X86::RFP80RegClass;
return Res;
}
// flags -> EFLAGS
if (StringRef("{flags}").equals_lower(Constraint)) {
Res.first = X86::EFLAGS;
Res.second = &X86::CCRRegClass;
return Res;
}
// 'A' means [ER]AX + [ER]DX.
if (Constraint == "A") {
if (Subtarget.is64Bit()) {
Res.first = X86::RAX;
Res.second = &X86::GR64_ADRegClass;
} else {
assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
"Expecting 64, 32 or 16 bit subtarget");
Res.first = X86::EAX;
Res.second = &X86::GR32_ADRegClass;
}
return Res;
}
return Res;
}
// Otherwise, check to see if this is a register class of the wrong value
// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
// turn into {ax},{dx}.
// MVT::Other is used to specify clobber names.
if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other)
return Res; // Correct type already, nothing to do.
// Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
// return "eax". This should even work for things like getting 64bit integer
// registers when given an f64 type.
const TargetRegisterClass *Class = Res.second;
// The generic code will match the first register class that contains the
// given register. Thus, based on the ordering of the tablegened file,
// the "plain" GR classes might not come first.
// Therefore, use a helper method.
if (isGRClass(*Class)) {
unsigned Size = VT.getSizeInBits();
if (Size == 1) Size = 8;
unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
if (DestReg > 0) {
Res.first = DestReg;
Res.second = Size == 8 ? &X86::GR8RegClass
: Size == 16 ? &X86::GR16RegClass
: Size == 32 ? &X86::GR32RegClass
: &X86::GR64RegClass;
assert(Res.second->contains(Res.first) && "Register in register class");
} else {
// No register found/type mismatch.
Res.first = 0;
Res.second = nullptr;
}
} else if (isFRClass(*Class)) {
// Handle references to XMM physical registers that got mapped into the
// wrong class. This can happen with constraints like {xmm0} where the
// target independent register mapper will just pick the first match it can
// find, ignoring the required type.
// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
if (VT == MVT::f32 || VT == MVT::i32)
Res.second = &X86::FR32RegClass;
else if (VT == MVT::f64 || VT == MVT::i64)
Res.second = &X86::FR64RegClass;
else if (TRI->isTypeLegalForClass(X86::VR128RegClass, VT))
Res.second = &X86::VR128RegClass;
else if (TRI->isTypeLegalForClass(X86::VR256RegClass, VT))
Res.second = &X86::VR256RegClass;
else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT))
Res.second = &X86::VR512RegClass;
else {
// Type mismatch and not a clobber: Return an error;
Res.first = 0;
Res.second = nullptr;
}
}
return Res;
}
int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
// Scaling factors are not free at all.
// An indexed folded instruction, i.e., inst (reg1, reg2, scale),
// will take 2 allocations in the out of order engine instead of 1
// for plain addressing mode, i.e. inst (reg1).
// E.g.,
// vaddps (%rsi,%drx), %ymm0, %ymm1
// Requires two allocations (one for the load, one for the computation)
// whereas:
// vaddps (%rsi), %ymm0, %ymm1
// Requires just 1 allocation, i.e., freeing allocations for other operations
// and having less micro operations to execute.
//
// For some X86 architectures, this is even worse because for instance for
// stores, the complex addressing mode forces the instruction to use the
// "load" ports instead of the dedicated "store" port.
// E.g., on Haswell:
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1
// as soon as we use a second register.
return AM.Scale != 0;
return -1;
}
bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// Integer division on x86 is expensive. However, when aggressively optimizing
// for code size, we prefer to use a div instruction, as it is usually smaller
// than the alternative sequence.
// The exception to this is vector division. Since x86 doesn't have vector
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
bool OptSize =
Attr.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize);
return OptSize && !VT.isVector();
}
void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
if (!Subtarget.is64Bit())
return;
// Update IsSplitCSR in X86MachineFunctionInfo.
X86MachineFunctionInfo *AFI =
Entry->getParent()->getInfo<X86MachineFunctionInfo>();
AFI->setIsSplitCSR(true);
}
void X86TargetLowering::insertCopiesSplitCSR(
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
if (!IStart)
return;
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
MachineBasicBlock::iterator MBBI = Entry->begin();
for (const MCPhysReg *I = IStart; *I; ++I) {
const TargetRegisterClass *RC = nullptr;
if (X86::GR64RegClass.contains(*I))
RC = &X86::GR64RegClass;
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
unsigned NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
assert(Entry->getParent()->getFunction()->hasFnAttribute(
Attribute::NoUnwind) &&
"Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
// Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
TII->get(TargetOpcode::COPY), *I)
.addReg(NewVR);
}
}
bool X86TargetLowering::supportSwiftError() const {
return Subtarget.is64Bit();
}
/// Returns the name of the symbol used to emit stack probes or the empty
/// string if not applicable.
StringRef X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
// If the function specifically requests stack probes, emit them.
if (MF.getFunction()->hasFnAttribute("probe-stack"))
return MF.getFunction()->getFnAttribute("probe-stack").getValueAsString();
// Generally, if we aren't on Windows, the platform ABI does not include
// support for stack probes, so don't emit them.
if (!Subtarget.isOSWindows() || Subtarget.isTargetMachO())
return "";
// We need a stack probe to conform to the Windows ABI. Choose the right
// symbol.
if (Subtarget.is64Bit())
return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk";
return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk";
}
Index: head/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
===================================================================
--- head/contrib/llvm/lib/Target/X86/X86InstrAVX512.td (revision 322854)
+++ head/contrib/llvm/lib/Target/X86/X86InstrAVX512.td (revision 322855)
@@ -1,10244 +1,10244 @@
//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 AVX512 instruction set, defining the
// instructions, and properties of the instructions which are needed for code
// generation, machine code emission, and analysis.
//
//===----------------------------------------------------------------------===//
// Group template arguments that can be derived from the vector type (EltNum x
// EltVT). These are things like the register class for the writemask, etc.
// The idea is to pass one of these as the template argument rather than the
// individual arguments.
// The template is also used for scalar types, in this case numelts is 1.
class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
string suffix = ""> {
RegisterClass RC = rc;
ValueType EltVT = eltvt;
int NumElts = numelts;
// Corresponding mask register class.
RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
// Corresponding write-mask register class.
RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
// The mask VT.
ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
// Suffix used in the instruction mnemonic.
string Suffix = suffix;
// VTName is a string name for vector VT. For vector types it will be
// v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
// It is a little bit complex for scalar types, where NumElts = 1.
// In this case we build v4f32 or v2f64
string VTName = "v" # !if (!eq (NumElts, 1),
!if (!eq (EltVT.Size, 32), 4,
!if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
// The vector VT.
ValueType VT = !cast<ValueType>(VTName);
string EltTypeName = !cast<string>(EltVT);
// Size of the element type in bits, e.g. 32 for v16i32.
string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
int EltSize = EltVT.Size;
// "i" for integer types and "f" for floating-point types
string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
// Size of RC in bits, e.g. 512 for VR512.
int Size = VT.Size;
// The corresponding memory operand, e.g. i512mem for VR512.
X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
// FP scalar memory operand for intrinsics - ssmem/sdmem.
Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
!if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
// Load patterns
// Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
// due to load promotion during legalization
PatFrag LdFrag = !cast<PatFrag>("load" #
!if (!eq (TypeVariantName, "i"),
!if (!eq (Size, 128), "v2i64",
!if (!eq (Size, 256), "v4i64",
!if (!eq (Size, 512), "v8i64",
VTName))), VTName));
PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
!if (!eq (TypeVariantName, "i"),
!if (!eq (Size, 128), "v2i64",
!if (!eq (Size, 256), "v4i64",
!if (!eq (Size, 512), "v8i64",
VTName))), VTName));
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
!cast<ComplexPattern>("sse_load_f32"),
!if (!eq (EltTypeName, "f64"),
!cast<ComplexPattern>("sse_load_f64"),
?));
// The corresponding float type, e.g. v16f32 for v16i32
// Note: For EltSize < 32, FloatVT is illegal and TableGen
// fails to compile, so we choose FloatVT = VT
ValueType FloatVT = !cast<ValueType>(
!if (!eq (!srl(EltSize,5),0),
VTName,
!if (!eq(TypeVariantName, "i"),
"v" # NumElts # "f" # EltSize,
VTName)));
ValueType IntVT = !cast<ValueType>(
!if (!eq (!srl(EltSize,5),0),
VTName,
!if (!eq(TypeVariantName, "f"),
"v" # NumElts # "i" # EltSize,
VTName)));
// The string to specify embedded broadcast in assembly.
string BroadcastStr = "{1to" # NumElts # "}";
// 8-bit compressed displacement tuple/subvector format. This is only
// defined for NumElts <= 8.
CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
!cast<CD8VForm>("CD8VT" # NumElts), ?);
SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
!if (!eq (Size, 256), sub_ymm, ?));
Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
!if (!eq (EltTypeName, "f64"), SSEPackedDouble,
SSEPackedInt));
RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
// A vector tye of the same width with element type i64. This is used to
// create patterns for logic ops.
ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
// A vector type of the same width with element type i32. This is used to
// create the canonical constant zero node ImmAllZerosV.
ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
string ZSuffix = !if (!eq (Size, 128), "Z128",
!if (!eq (Size, 256), "Z256", "Z"));
}
def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
// "x" in v32i8x_info means RC = VR256X
def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
// We map scalar types to the smallest (128-bit) vector type
// with the appropriate element type. This allows to use the same masking logic.
def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
X86VectorVTInfo i128> {
X86VectorVTInfo info512 = i512;
X86VectorVTInfo info256 = i256;
X86VectorVTInfo info128 = i128;
}
def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
v16i8x_info>;
def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
v8i16x_info>;
def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
v4i32x_info>;
def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
v2i64x_info>;
def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
v4f32x_info>;
def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
v2f64x_info>;
class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
ValueType _vt> {
RegisterClass KRC = _krc;
RegisterClass KRCWM = _krcwm;
ValueType KVT = _vt;
}
def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
// This multiclass generates the masking variants from the non-masking
// variant. It only provides the assembly pieces for the masking variants.
// It assumes custom ISel patterns for masking which can be provided as
// template arguments.
multiclass AVX512_maskable_custom<bits<8> O, Format F,
dag Outs,
dag Ins, dag MaskingIns, dag ZeroMaskingIns,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
list<dag> Pattern,
list<dag> MaskingPattern,
list<dag> ZeroMaskingPattern,
string MaskingConstraint = "",
InstrItinClass itin = NoItinerary,
bit IsCommutable = 0,
bit IsKCommutable = 0> {
let isCommutable = IsCommutable in
def NAME: AVX512<O, F, Outs, Ins,
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
"$dst, "#IntelSrcAsm#"}",
Pattern, itin>;
// Prefer over VMOV*rrk Pat<>
let isCommutable = IsKCommutable in
def NAME#k: AVX512<O, F, Outs, MaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
"$dst {${mask}}, "#IntelSrcAsm#"}",
MaskingPattern, itin>,
EVEX_K {
// In case of the 3src subclass this is overridden with a let.
string Constraints = MaskingConstraint;
}
// Zero mask does not add any restrictions to commute operands transformation.
// So, it is Ok to use IsCommutable instead of IsKCommutable.
let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
ZeroMaskingPattern,
itin>,
EVEX_KZ;
}
// Common base class of AVX512_maskable and AVX512_maskable_3src.
multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs,
dag Ins, dag MaskingIns, dag ZeroMaskingIns,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskingRHS,
SDNode Select = vselect,
string MaskingConstraint = "",
InstrItinClass itin = NoItinerary,
bit IsCommutable = 0,
bit IsKCommutable = 0> :
AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[(set _.RC:$dst, RHS)],
[(set _.RC:$dst, MaskingRHS)],
[(set _.RC:$dst,
(Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
MaskingConstraint, NoItinerary, IsCommutable,
IsKCommutable>;
// Similar to AVX512_maskable_common, but with scalar types.
multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs,
dag Ins, dag MaskingIns, dag ZeroMaskingIns,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
SDNode Select = vselect,
string MaskingConstraint = "",
InstrItinClass itin = NoItinerary,
bit IsCommutable = 0,
bit IsKCommutable = 0> :
AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[], [], [],
MaskingConstraint, NoItinerary, IsCommutable,
IsKCommutable>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
// perserved vector elements come from a new dummy input operand tied to $dst.
multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS,
InstrItinClass itin = NoItinerary,
bit IsCommutable = 0, bit IsKCommutable = 0,
SDNode Select = vselect> :
AVX512_maskable_common<O, F, _, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(Select _.KRCWM:$mask, RHS, _.RC:$src0), Select,
"$src0 = $dst", itin, IsCommutable, IsKCommutable>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the scalar instruction.
multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS,
InstrItinClass itin = NoItinerary,
bit IsCommutable = 0> :
AVX512_maskable_common<O, F, _, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(X86selects _.KRCWM:$mask, RHS, _.RC:$src0),
X86selects, "$src0 = $dst", itin, IsCommutable>;
// Similar to AVX512_maskable but in this case one of the source operands
// ($src1) is already tied to $dst so we just use that for the preserved
// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
// $src1.
multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, bit IsCommutable = 0,
bit IsKCommutable = 0> :
AVX512_maskable_common<O, F, _, Outs,
!con((ins _.RC:$src1), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(vselect _.KRCWM:$mask, RHS, _.RC:$src1),
vselect, "", NoItinerary, IsCommutable, IsKCommutable>;
multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, bit IsCommutable = 0,
bit IsKCommutable = 0> :
AVX512_maskable_common<O, F, _, Outs,
!con((ins _.RC:$src1), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(X86selects _.KRCWM:$mask, RHS, _.RC:$src1),
X86selects, "", NoItinerary, IsCommutable,
IsKCommutable>;
multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
list<dag> Pattern> :
AVX512_maskable_custom<O, F, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
"$src0 = $dst">;
// Instruction with mask that puts result in mask register,
// like "compare" and "vptest"
multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
dag Outs,
dag Ins, dag MaskingIns,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
list<dag> Pattern,
list<dag> MaskingPattern,
bit IsCommutable = 0> {
let isCommutable = IsCommutable in
def NAME: AVX512<O, F, Outs, Ins,
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
"$dst, "#IntelSrcAsm#"}",
Pattern, NoItinerary>;
def NAME#k: AVX512<O, F, Outs, MaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
"$dst {${mask}}, "#IntelSrcAsm#"}",
MaskingPattern, NoItinerary>, EVEX_K;
}
multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs,
dag Ins, dag MaskingIns,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskingRHS,
bit IsCommutable = 0> :
AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[(set _.KRC:$dst, RHS)],
[(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, bit IsCommutable = 0> :
AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(and _.KRCWM:$mask, RHS), IsCommutable>;
multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm> :
AVX512_maskable_custom_cmp<O, F, Outs,
Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
AttSrcAsm, IntelSrcAsm, [],[]>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
// perserved vector elements come from a new dummy input operand tied to $dst.
multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskedRHS,
InstrItinClass itin = NoItinerary,
bit IsCommutable = 0, SDNode Select = vselect> :
AVX512_maskable_custom<O, F, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm,
[(set _.RC:$dst, RHS)],
[(set _.RC:$dst,
(Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
[(set _.RC:$dst,
(Select _.KRCWM:$mask, MaskedRHS,
_.ImmAllZerosV))],
"$src0 = $dst", itin, IsCommutable>;
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion.
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
// swizzled by ExecutionDepsFix to pxor.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllZerosV))]>;
def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllOnesV))]>;
}
// Alias instructions that allow VPTERNLOG to be used with a mask to create
// a mix of all ones and all zeros elements. This is done this way to force
// the same register to be used as input for all three sources.
let isPseudo = 1, Predicates = [HasAVX512] in {
def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
(ins VK16WM:$mask), "",
[(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
(v16i32 immAllOnesV),
(v16i32 immAllZerosV)))]>;
def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
(ins VK8WM:$mask), "",
[(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
(bc_v8i64 (v16i32 immAllOnesV)),
(bc_v8i64 (v16i32 immAllZerosV))))]>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
[(set VR128X:$dst, (v4i32 immAllZerosV))]>;
def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
[(set VR256X:$dst, (v8i32 immAllZerosV))]>;
}
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
[(set FR32X:$dst, fp32imm0)]>;
def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
[(set FR64X:$dst, fpimm0)]>;
}
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
PatFrag vinsert_insert> {
let ExeDomain = To.ExeDomain in {
defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
(ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
"vinsert" # From.EltTypeName # "x" # From.NumElts,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(vinsert_insert:$src3 (To.VT To.RC:$src1),
(From.VT From.RC:$src2),
(iPTR imm))>, AVX512AIi8Base, EVEX_4V;
defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
(ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
"vinsert" # From.EltTypeName # "x" # From.NumElts,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(vinsert_insert:$src3 (To.VT To.RC:$src1),
(From.VT (bitconvert (From.LdFrag addr:$src2))),
(iPTR imm))>, AVX512AIi8Base, EVEX_4V,
EVEX_CD8<From.EltSize, From.CD8TupleForm>;
}
}
multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
X86VectorVTInfo To, PatFrag vinsert_insert,
SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
let Predicates = p in {
def : Pat<(vinsert_insert:$ins
(To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
(To.VT (!cast<Instruction>(InstrStr#"rr")
To.RC:$src1, From.RC:$src2,
(INSERT_get_vinsert_imm To.RC:$ins)))>;
def : Pat<(vinsert_insert:$ins
(To.VT To.RC:$src1),
(From.VT (bitconvert (From.LdFrag addr:$src2))),
(iPTR imm)),
(To.VT (!cast<Instruction>(InstrStr#"rm")
To.RC:$src1, addr:$src2,
(INSERT_get_vinsert_imm To.RC:$ins)))>;
}
}
multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
ValueType EltVT64, int Opcode256> {
let Predicates = [HasVLX] in
defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
X86VectorVTInfo< 4, EltVT32, VR128X>,
X86VectorVTInfo< 8, EltVT32, VR256X>,
vinsert128_insert>, EVEX_V256;
defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
X86VectorVTInfo< 4, EltVT32, VR128X>,
X86VectorVTInfo<16, EltVT32, VR512>,
vinsert128_insert>, EVEX_V512;
defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
X86VectorVTInfo< 4, EltVT64, VR256X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
vinsert256_insert>, VEX_W, EVEX_V512;
let Predicates = [HasVLX, HasDQI] in
defm NAME # "64x2Z256" : vinsert_for_size<Opcode128,
X86VectorVTInfo< 2, EltVT64, VR128X>,
X86VectorVTInfo< 4, EltVT64, VR256X>,
vinsert128_insert>, VEX_W, EVEX_V256;
let Predicates = [HasDQI] in {
defm NAME # "64x2Z" : vinsert_for_size<Opcode128,
X86VectorVTInfo< 2, EltVT64, VR128X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
vinsert128_insert>, VEX_W, EVEX_V512;
defm NAME # "32x8Z" : vinsert_for_size<Opcode256,
X86VectorVTInfo< 8, EltVT32, VR256X>,
X86VectorVTInfo<16, EltVT32, VR512>,
vinsert256_insert>, EVEX_V512;
}
}
defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
// Codegen pattern with the alternative types,
// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
// Codegen pattern with the alternative types insert VEC128 into VEC256
defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
// Codegen pattern with the alternative types insert VEC128 into VEC512
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
// Codegen pattern with the alternative types insert VEC256 into VEC512
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
// vinsertps - insert f32 to XMM
let ExeDomain = SSEPackedSingle in {
def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
EVEX_4V;
def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
(ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
}
//===----------------------------------------------------------------------===//
// AVX-512 VECTOR EXTRACT
//---
multiclass vextract_for_size<int Opcode,
X86VectorVTInfo From, X86VectorVTInfo To,
PatFrag vextract_extract,
SDNodeXForm EXTRACT_get_vextract_imm> {
let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
// use AVX512_maskable_in_asm (AVX512_maskable can't be used due to
// vextract_extract), we interesting only in patterns without mask,
// intrinsics pattern match generated bellow.
defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
(ins From.RC:$src1, u8imm:$idx),
"vextract" # To.EltTypeName # "x" # To.NumElts,
"$idx, $src1", "$src1, $idx",
[(set To.RC:$dst, (vextract_extract:$idx (From.VT From.RC:$src1),
(iPTR imm)))]>,
AVX512AIi8Base, EVEX;
def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
(ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
"vextract" # To.EltTypeName # "x" # To.NumElts #
"\t{$idx, $src1, $dst|$dst, $src1, $idx}",
[(store (To.VT (vextract_extract:$idx
(From.VT From.RC:$src1), (iPTR imm))),
addr:$dst)]>, EVEX;
let mayStore = 1, hasSideEffects = 0 in
def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
(ins To.MemOp:$dst, To.KRCWM:$mask,
From.RC:$src1, u8imm:$idx),
"vextract" # To.EltTypeName # "x" # To.NumElts #
"\t{$idx, $src1, $dst {${mask}}|"
"$dst {${mask}}, $src1, $idx}",
[]>, EVEX_K, EVEX;
}
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(vextract_extract:$ext (From.VT From.RC:$src1),
(iPTR imm)),
To.RC:$src0)),
(!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
From.ZSuffix # "rrk")
To.RC:$src0, To.KRCWM:$mask, From.RC:$src1,
(EXTRACT_get_vextract_imm To.RC:$ext))>;
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(vextract_extract:$ext (From.VT From.RC:$src1),
(iPTR imm)),
To.ImmAllZerosV)),
(!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
From.ZSuffix # "rrkz")
To.KRCWM:$mask, From.RC:$src1,
(EXTRACT_get_vextract_imm To.RC:$ext))>;
}
// Codegen pattern for the alternative types
multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
X86VectorVTInfo To, PatFrag vextract_extract,
SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
let Predicates = p in {
def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
(To.VT (!cast<Instruction>(InstrStr#"rr")
From.RC:$src1,
(EXTRACT_get_vextract_imm To.RC:$ext)))>;
def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
(iPTR imm))), addr:$dst),
(!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
(EXTRACT_get_vextract_imm To.RC:$ext))>;
}
}
multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
ValueType EltVT64, int Opcode256> {
defm NAME # "32x4Z" : vextract_for_size<Opcode128,
X86VectorVTInfo<16, EltVT32, VR512>,
X86VectorVTInfo< 4, EltVT32, VR128X>,
vextract128_extract,
EXTRACT_get_vextract128_imm>,
EVEX_V512, EVEX_CD8<32, CD8VT4>;
defm NAME # "64x4Z" : vextract_for_size<Opcode256,
X86VectorVTInfo< 8, EltVT64, VR512>,
X86VectorVTInfo< 4, EltVT64, VR256X>,
vextract256_extract,
EXTRACT_get_vextract256_imm>,
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
let Predicates = [HasVLX] in
defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
X86VectorVTInfo< 8, EltVT32, VR256X>,
X86VectorVTInfo< 4, EltVT32, VR128X>,
vextract128_extract,
EXTRACT_get_vextract128_imm>,
EVEX_V256, EVEX_CD8<32, CD8VT4>;
let Predicates = [HasVLX, HasDQI] in
defm NAME # "64x2Z256" : vextract_for_size<Opcode128,
X86VectorVTInfo< 4, EltVT64, VR256X>,
X86VectorVTInfo< 2, EltVT64, VR128X>,
vextract128_extract,
EXTRACT_get_vextract128_imm>,
VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
let Predicates = [HasDQI] in {
defm NAME # "64x2Z" : vextract_for_size<Opcode128,
X86VectorVTInfo< 8, EltVT64, VR512>,
X86VectorVTInfo< 2, EltVT64, VR128X>,
vextract128_extract,
EXTRACT_get_vextract128_imm>,
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
defm NAME # "32x8Z" : vextract_for_size<Opcode256,
X86VectorVTInfo<16, EltVT32, VR512>,
X86VectorVTInfo< 8, EltVT32, VR256X>,
vextract256_extract,
EXTRACT_get_vextract256_imm>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
}
}
defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
// extract_subvector codegen patterns with the alternative types.
// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
// Codegen pattern with the alternative types extract VEC128 from VEC256
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
// Codegen pattern with the alternative types extract VEC128 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
// Codegen pattern with the alternative types extract VEC256 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
// A 128-bit subvector extract from the first 256-bit vector position
// is a subregister copy that needs no instruction.
def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
(v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
(v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
(v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
(v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 0))),
(v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm))>;
def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 0))),
(v16i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_xmm))>;
// A 256-bit subvector extract from the first 256-bit vector position
// is a subregister copy that needs no instruction.
def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
(v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
(v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
(v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
(v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
def : Pat<(v16i16 (extract_subvector (v32i16 VR512:$src), (iPTR 0))),
(v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm))>;
def : Pat<(v32i8 (extract_subvector (v64i8 VR512:$src), (iPTR 0))),
(v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm))>;
let AddedComplexity = 25 in { // to give priority over vinsertf128rm
// A 128-bit subvector insert to the first 512-bit vector position
// is a subregister copy that needs no instruction.
def : Pat<(v8i64 (insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0))),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
def : Pat<(v8f64 (insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0))),
(INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
def : Pat<(v16i32 (insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0))),
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
def : Pat<(v16f32 (insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0))),
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
def : Pat<(v32i16 (insert_subvector undef, (v8i16 VR128X:$src), (iPTR 0))),
(INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
def : Pat<(v64i8 (insert_subvector undef, (v16i8 VR128X:$src), (iPTR 0))),
(INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
// A 256-bit subvector insert to the first 512-bit vector position
// is a subregister copy that needs no instruction.
def : Pat<(v8i64 (insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0))),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
def : Pat<(v8f64 (insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0))),
(INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
def : Pat<(v16i32 (insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0))),
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
def : Pat<(v16f32 (insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0))),
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
def : Pat<(v32i16 (insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0))),
(INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
def : Pat<(v64i8 (insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0))),
(INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
}
// vextractps - extract 32 bits from XMM
def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
(ins VR128X:$src1, u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
EVEX;
def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
(ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
//---
// broadcast with a scalar argument.
multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
(COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
(X86VBroadcast SrcInfo.FRC:$src),
DestInfo.RC:$src0)),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
(COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
(X86VBroadcast SrcInfo.FRC:$src),
DestInfo.ImmAllZerosV)),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
}
multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
let ExeDomain = DestInfo.ExeDomain in {
defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
T8PD, EVEX;
defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
(DestInfo.VT (X86VBroadcast
(SrcInfo.ScalarLdFrag addr:$src)))>,
T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
}
def : Pat<(DestInfo.VT (X86VBroadcast
(SrcInfo.VT (scalar_to_vector
(SrcInfo.ScalarLdFrag addr:$src))))),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#m) addr:$src)>;
def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
(X86VBroadcast
(SrcInfo.VT (scalar_to_vector
(SrcInfo.ScalarLdFrag addr:$src)))),
DestInfo.RC:$src0)),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
DestInfo.RC:$src0, DestInfo.KRCWM:$mask, addr:$src)>;
def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
(X86VBroadcast
(SrcInfo.VT (scalar_to_vector
(SrcInfo.ScalarLdFrag addr:$src)))),
DestInfo.ImmAllZerosV)),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#mkz)
DestInfo.KRCWM:$mask, addr:$src)>;
}
multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
EVEX_V512;
let Predicates = [HasVLX] in {
defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
EVEX_V256;
}
}
multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
EVEX_V512;
let Predicates = [HasVLX] in {
defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
EVEX_V256;
defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
EVEX_V128;
}
}
defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
avx512vl_f32_info>;
defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
avx512vl_f64_info>, VEX_W;
def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
(VBROADCASTSSZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
(VBROADCASTSDZm addr:$src)>;
multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
SDPatternOperator OpNode,
RegisterClass SrcRC> {
let ExeDomain = _.ExeDomain in
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins SrcRC:$src),
"vpbroadcast"##_.Suffix, "$src", "$src",
(_.VT (OpNode SrcRC:$src))>, T8PD, EVEX;
}
multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name,
X86VectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC, SubRegIndex Subreg> {
let ExeDomain = _.ExeDomain in
defm r : AVX512_maskable_custom<opc, MRMSrcReg,
(outs _.RC:$dst), (ins GR32:$src),
!con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
!con((ins _.KRCWM:$mask), (ins GR32:$src)),
"vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
"$src0 = $dst">, T8PD, EVEX;
def : Pat <(_.VT (OpNode SrcRC:$src)),
(!cast<Instruction>(Name#r)
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
(!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
(!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
}
multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, _.info512, OpNode, SrcRC,
Subreg>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, _.info256, OpNode,
SrcRC, Subreg>, EVEX_V256;
defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, _.info128, OpNode,
SrcRC, Subreg>, EVEX_V128;
}
}
multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
SDPatternOperator OpNode,
RegisterClass SrcRC, Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_int_broadcast_reg<opc, _.info512, OpNode, SrcRC>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_int_broadcast_reg<opc, _.info256, OpNode, SrcRC>, EVEX_V256;
defm Z128 : avx512_int_broadcast_reg<opc, _.info128, OpNode, SrcRC>, EVEX_V128;
}
}
defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
HasBWI>;
defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
X86VBroadcast, GR32, HasAVX512>;
defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
X86VBroadcast, GR64, HasAVX512>, VEX_W;
def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
(VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
(VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
// Provide aliases for broadcast from the same register class that
// automatically does the extract.
multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
X86VectorVTInfo SrcInfo> {
def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
(EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
}
multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _, Predicate prd> {
let Predicates = [prd] in {
defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
EVEX_V512;
// Defined separately to avoid redefinition.
defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
}
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
EVEX_V256;
defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
EVEX_V128;
}
}
defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
avx512vl_i8_info, HasBWI>;
defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
avx512vl_i16_info, HasBWI>;
defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
avx512vl_i32_info, HasAVX512>;
defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
avx512vl_i64_info, HasAVX512>, VEX_W;
multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
(_Dst.VT (X86SubVBroadcast
(_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
AVX5128IBase, EVEX;
}
let Predicates = [HasAVX512] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
(VPBROADCASTQZm addr:$src)>;
}
let Predicates = [HasVLX, HasBWI] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
(VPBROADCASTQZ128m addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
(VPBROADCASTQZ256m addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWZ128m addr:$src)>;
def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWZ256m addr:$src)>;
def : Pat<(v8i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWZ128m addr:$src)>;
def : Pat<(v16i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWZ256m addr:$src)>;
}
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST SUBVECTORS
//
defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
v16i32_info, v4i32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT4>;
defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
v16f32_info, v4f32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT4>;
defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
v8i64_info, v4i64x_info>, VEX_W,
EVEX_V512, EVEX_CD8<64, CD8VT4>;
defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
v8f64_info, v4f64x_info>, VEX_W,
EVEX_V512, EVEX_CD8<64, CD8VT4>;
let Predicates = [HasAVX512] in {
def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
(VBROADCASTI64X4rm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
(VBROADCASTI64X4rm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
(VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v4f64 VR256X:$src), 1)>;
def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v4i64 VR256X:$src), 1)>;
def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v16i16 VR256X:$src), 1)>;
def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v32i8 VR256X:$src), 1)>;
def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4rm addr:$src)>;
}
let Predicates = [HasVLX] in {
defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
v8i32x_info, v4i32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VT4>;
defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
v8f32x_info, v4f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VT4>;
def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4Z256rm addr:$src)>;
def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
(VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v4f32 VR128X:$src), 1)>;
def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v4i32 VR128X:$src), 1)>;
def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v8i16 VR128X:$src), 1)>;
def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v16i8 VR128X:$src), 1)>;
}
let Predicates = [HasVLX, HasDQI] in {
defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
v4i64x_info, v2i64x_info>, VEX_W,
EVEX_V256, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
v4f64x_info, v2f64x_info>, VEX_W,
EVEX_V256, EVEX_CD8<64, CD8VT2>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
(VINSERTF64x2Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2f64 VR128X:$src), 1)>;
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
(VINSERTI64x2Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2i64 VR128X:$src), 1)>;
}
let Predicates = [HasVLX, NoDQI] in {
def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4Z256rm addr:$src)>;
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
(VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2f64 VR128X:$src), 1)>;
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2i64 VR128X:$src), 1)>;
}
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4rm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
(VBROADCASTF64X4rm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
(VBROADCASTI64X4rm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
(VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8f32 VR256X:$src), 1)>;
def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8i32 VR256X:$src), 1)>;
}
let Predicates = [HasDQI] in {
defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
v8i64_info, v2i64x_info>, VEX_W,
EVEX_V512, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti32x8",
v16i32_info, v8i32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
v8f64_info, v2f64x_info>, VEX_W,
EVEX_V512, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
v16f32_info, v8f32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
(VINSERTF32x8Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8f32 VR256X:$src), 1)>;
def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
(VINSERTI32x8Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8i32 VR256X:$src), 1)>;
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
let Predicates = [HasDQI] in
defm Z : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info512, _Src.info128>,
EVEX_V512;
let Predicates = [HasDQI, HasVLX] in
defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info256, _Src.info128>,
EVEX_V256;
}
multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
let Predicates = [HasDQI, HasVLX] in
defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info128, _Src.info128>,
EVEX_V128;
}
defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
avx512vl_i32_info, avx512vl_i64_info>;
defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
avx512vl_f32_info, avx512vl_f64_info>;
let Predicates = [HasVLX] in {
def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
(VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
(VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
}
def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
(VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
(VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
(VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
(VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST MASK TO VECTOR REGISTER
//---
multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, RegisterClass KRC> {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
}
multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
let Predicates = [HasCDI] in
defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
let Predicates = [HasCDI, HasVLX] in {
defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
}
}
defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
avx512vl_i32_info, VK16>;
defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
avx512vl_i64_info, VK8>, VEX_W;
//===----------------------------------------------------------------------===//
// -- VPERMI2 - 3 source operands form --
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
// The index operand in the pattern should really be an integer type. However,
// if we do that and it happens to come from a bitcast, then it becomes
// difficult to find the bitcast needed to convert the index to the
// destination type for the passthru since it will be folded with the bitcast
// of the index operand.
defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>, EVEX_4V,
AVX5128IBase;
defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
(_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
EVEX_4V, AVX5128IBase;
}
}
multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(_.VT (X86VPermi2X _.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1>, AVX5128IBase, EVEX_4V, EVEX_B;
}
multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo> {
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
}
}
multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo,
Predicate Prd> {
let Predicates = [Prd] in
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
let Predicates = [Prd, HasVLX] in {
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
}
}
defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d",
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q",
avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w",
avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b",
avx512vl_i8_info, HasVBMI>,
EVEX_CD8<8, CD8VF>;
defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
// VPERMT2
multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins IdxVT.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
EVEX_4V, AVX5128IBase;
defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins IdxVT.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
(bitconvert (_.LdFrag addr:$src3)))), 1>,
EVEX_4V, AVX5128IBase;
}
}
multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(_.VT (X86VPermt2 _.RC:$src1,
IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1>, AVX5128IBase, EVEX_4V, EVEX_B;
}
multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo ShuffleMask> {
defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
ShuffleMask.info512>,
avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info512,
ShuffleMask.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
ShuffleMask.info128>,
avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info128,
ShuffleMask.info128>, EVEX_V128;
defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
ShuffleMask.info256>,
avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info256,
ShuffleMask.info256>, EVEX_V256;
}
}
multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo Idx,
Predicate Prd> {
let Predicates = [Prd] in
defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
Idx.info512>, EVEX_V512;
let Predicates = [Prd, HasVLX] in {
defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
Idx.info128>, EVEX_V128;
defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
Idx.info256>, EVEX_V256;
}
}
defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d",
avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q",
avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w",
avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b",
avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
EVEX_CD8<8, CD8VF>;
defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps",
avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
//
multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
[]>, EVEX_4V;
def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_K;
def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ;
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>;
}
}
}
multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
let mayLoad = 1, hasSideEffects = 0 in {
def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
[]>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, "}"),
[]>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
}
}
multiclass blendmask_dq <bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo> {
defm Z : avx512_blendmask <opc, OpcodeStr, VTInfo.info512>,
avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
defm Z256 : avx512_blendmask<opc, OpcodeStr, VTInfo.info256>,
avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
defm Z128 : avx512_blendmask<opc, OpcodeStr, VTInfo.info128>,
avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
}
}
multiclass blendmask_bw <bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo> {
let Predicates = [HasBWI] in
defm Z : avx512_blendmask <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
defm Z256 : avx512_blendmask <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
defm Z128 : avx512_blendmask <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
}
}
defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", avx512vl_f32_info>;
defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", avx512vl_f64_info>, VEX_W;
defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", avx512vl_i32_info>;
defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", avx512vl_i64_info>, VEX_W;
defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>;
defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;
//===----------------------------------------------------------------------===//
// Compare Instructions
//===----------------------------------------------------------------------===//
// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>{
defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc)>, EVEX_4V;
let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(OpNodeRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc,
(i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs VK1:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
let mayLoad = 1 in
defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
EVEX_4V, EVEX_B;
}// let isAsmParserOnly = 1, hasSideEffects = 0
let isCodeGenOnly = 1 in {
let isCommutable = 1 in
def rr : AVX512Ii8<0xC2, MRMSrcReg,
(outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
!strconcat("vcmp${cc}", _.Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
_.FRC:$src2,
imm:$cc))],
IIC_SSE_ALU_F32S_RR>, EVEX_4V;
def rm : AVX512Ii8<0xC2, MRMSrcMem,
(outs _.KRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
!strconcat("vcmp${cc}", _.Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2),
imm:$cc))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
}
}
let Predicates = [HasAVX512] in {
let ExeDomain = SSEPackedSingle in
defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
AVX512XSIi8Base;
let ExeDomain = SSEPackedDouble in
defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
AVX512XDIi8Base, VEX_W;
}
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, bit IsCommutable> {
let isCommutable = IsCommutable in
def rr : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
IIC_SSE_ALU_F32P_RR>, EVEX_4V;
def rm : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2)))))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V;
let isCommutable = IsCommutable in
def rrk : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
def rmk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert
(_.LdFrag addr:$src2))))))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
}
multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, bit IsCommutable> :
avx512_icmp_packed<opc, OpcodeStr, OpNode, _, IsCommutable> {
def rmb : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
"|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
def rmbk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2),
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
}
multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo, Predicate prd,
bit IsCommutable = 0> {
let Predicates = [prd] in
defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512,
IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256,
IsCommutable>, EVEX_V256;
defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128,
IsCommutable>, EVEX_V128;
}
}
multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
Predicate prd, bit IsCommutable = 0> {
let Predicates = [prd] in
defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
IsCommutable>, EVEX_V256;
defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
IsCommutable>, EVEX_V128;
}
}
defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
avx512vl_i8_info, HasBWI, 1>,
EVEX_CD8<8, CD8VF>;
defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
avx512vl_i16_info, HasBWI, 1>,
EVEX_CD8<16, CD8VF>;
defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
avx512vl_i32_info, HasAVX512, 1>,
EVEX_CD8<32, CD8VF>;
defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
avx512vl_i64_info, HasAVX512, 1>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
avx512vl_i8_info, HasBWI>,
EVEX_CD8<8, CD8VF>;
defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
avx512vl_i16_info, HasBWI>,
EVEX_CD8<16, CD8VF>;
defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
avx512vl_i32_info, HasAVX512>,
EVEX_CD8<32, CD8VF>;
defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
avx512vl_i64_info, HasAVX512>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
SDNode OpNode, string InstrStr,
list<Predicate> Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask,
_.RC:$src1, _.RC:$src2),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and (_.KVT _.KRCWM:$mask),
(_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert
(_.LdFrag addr:$src2))))))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
_.RC:$src1, addr:$src2),
NewInf.KRC)>;
}
}
multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
SDNode OpNode, string InstrStr,
list<Predicate> Preds>
: avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and (_.KVT _.KRCWM:$mask),
(_.KVT (OpNode (_.VT _.RC:$src1),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask,
_.RC:$src1, addr:$src2),
NewInf.KRC)>;
}
}
// VPCMPEQB - i8
defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQBZ256", [HasBWI, HasVLX]>;
// VPCMPEQW - i16
defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm,
"VPCMPEQWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm,
"VPCMPEQWZ", [HasBWI]>;
// VPCMPEQD - i32
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm,
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm,
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm,
"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm,
"VPCMPEQDZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm,
"VPCMPEQDZ", [HasAVX512]>;
// VPCMPEQQ - i64
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm,
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm,
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm,
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm,
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm,
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm,
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
"VPCMPEQQZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm,
"VPCMPEQQZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm,
"VPCMPEQQZ", [HasAVX512]>;
// VPCMPGTB - i8
defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTBZ256", [HasBWI, HasVLX]>;
// VPCMPGTW - i16
defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm,
"VPCMPGTWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm,
"VPCMPGTWZ", [HasBWI]>;
// VPCMPGTD - i32
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm,
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm,
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm,
"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm,
"VPCMPGTDZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm,
"VPCMPGTDZ", [HasAVX512]>;
// VPCMPGTQ - i64
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm,
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm,
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm,
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm,
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm,
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm,
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm,
"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm,
"VPCMPGTQZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm,
"VPCMPGTQZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm,
"VPCMPGTQZ", [HasAVX512]>;
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
X86VectorVTInfo _> {
let isCommutable = 1 in
def rri : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
imm:$cc))],
IIC_SSE_ALU_F32P_RR>, EVEX_4V;
def rmi : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V;
let isCommutable = 1 in
def rrik : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
imm:$cc)))],
IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
def rmik : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rri_alt : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
"$dst, $src1, $src2, $cc}"),
[], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
let mayLoad = 1 in
def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
"$dst, $src1, $src2, $cc}"),
[], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
u8imm:$cc),
!strconcat("vpcmp", Suffix,
"\t{$cc, $src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2, $cc}"),
[], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
let mayLoad = 1 in
def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
u8imm:$cc),
!strconcat("vpcmp", Suffix,
"\t{$cc, $src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2, $cc}"),
[], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
}
}
multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
X86VectorVTInfo _> :
avx512_icmp_cc<opc, Suffix, OpNode, _> {
def rmib : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
imm:$cc))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
def rmibk : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2, AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
imm:$cc)))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
u8imm:$cc),
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
[], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
[], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
}
}
multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
}
}
multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
EVEX_V256;
defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
EVEX_V128;
}
}
defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
HasBWI>, EVEX_CD8<8, CD8VF>;
defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
HasBWI>, EVEX_CD8<8, CD8VF>;
defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
HasAVX512>, EVEX_CD8<32, CD8VF>;
defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
HasAVX512>, EVEX_CD8<32, CD8VF>;
defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
SDNode OpNode, string InstrStr,
list<Predicate> Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
_.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and (_.KVT _.KRCWM:$mask),
(_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert
(_.LdFrag addr:$src2))),
imm:$cc)))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
_.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
}
}
multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
SDNode OpNode, string InstrStr,
list<Predicate> Preds>
: avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and (_.KVT _.KRCWM:$mask),
(_.KVT (OpNode (_.VT _.RC:$src1),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
imm:$cc)))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask,
_.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
}
}
// VPCMPB - i8
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm,
"VPCMPBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm,
"VPCMPBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm,
"VPCMPBZ256", [HasBWI, HasVLX]>;
// VPCMPW - i16
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm,
"VPCMPWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm,
"VPCMPWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm,
"VPCMPWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm,
"VPCMPWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm,
"VPCMPWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm,
"VPCMPWZ", [HasBWI]>;
// VPCMPD - i32
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm,
"VPCMPDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm,
"VPCMPDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm,
"VPCMPDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm,
"VPCMPDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm,
"VPCMPDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm,
"VPCMPDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm,
"VPCMPDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm,
"VPCMPDZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm,
"VPCMPDZ", [HasAVX512]>;
// VPCMPQ - i64
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm,
"VPCMPQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm,
"VPCMPQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm,
"VPCMPQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm,
"VPCMPQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm,
"VPCMPQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm,
"VPCMPQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm,
"VPCMPQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm,
"VPCMPQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm,
"VPCMPQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm,
"VPCMPQZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm,
"VPCMPQZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm,
"VPCMPQZ", [HasAVX512]>;
// VPCMPUB - i8
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu,
"VPCMPUBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu,
"VPCMPUBZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu,
"VPCMPUBZ256", [HasBWI, HasVLX]>;
// VPCMPUW - i16
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu,
"VPCMPUWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu,
"VPCMPUWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu,
"VPCMPUWZ128", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu,
"VPCMPUWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu,
"VPCMPUWZ256", [HasBWI, HasVLX]>;
defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu,
"VPCMPUWZ", [HasBWI]>;
// VPCMPUD - i32
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu,
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu,
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu,
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu,
"VPCMPUDZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu,
"VPCMPUDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu,
"VPCMPUDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu,
"VPCMPUDZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu,
"VPCMPUDZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu,
"VPCMPUDZ", [HasAVX512]>;
// VPCMPUQ - i64
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu,
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu,
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu,
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu,
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu,
"VPCMPUQZ128", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu,
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu,
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu,
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu,
"VPCMPUQZ256", [HasAVX512, HasVLX]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu,
"VPCMPUQZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu,
"VPCMPUQZ", [HasAVX512]>;
defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu,
"VPCMPUQZ", [HasAVX512]>;
multiclass avx512_vcmp_common<X86VectorVTInfo _> {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
(X86cmpm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc), 1>;
defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
(X86cmpm (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(X86cmpm (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
imm:$cc)>,EVEX_B;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">;
let mayLoad = 1 in {
defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">;
defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
}
}
}
multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
// comparison code form (VCMP[EQ/LT/LE/...]
defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(X86cmpmRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc,
(i32 FROUND_NO_EXC))>, EVEX_B;
let isAsmParserOnly = 1, hasSideEffects = 0 in {
defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $cc">, EVEX_B;
}
}
multiclass avx512_vcmp<AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcmp_common<_.info512>,
avx512_vcmp_sae<_.info512>, EVEX_V512;
}
let Predicates = [HasAVX512,HasVLX] in {
defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128;
defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256;
}
}
defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
string InstrStr, list<Predicate> Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (X86cmpm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (X86cmpm (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (X86cmpm (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
imm:$cc)),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
}
}
multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
string InstrStr, list<Predicate> Preds>
: avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> {
let Predicates = Preds in
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc,
(i32 FROUND_NO_EXC))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
}
// VCMPPS - f32
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ",
[HasAVX512]>;
defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ",
[HasAVX512]>;
// VCMPPD - f64
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256",
[HasAVX512, HasVLX]>;
defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ",
[HasAVX512]>;
defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ",
[HasAVX512]>;
defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ",
[HasAVX512]>;
// ----------------------------------------------------------------
// FPClass
//handle fpclass instruction mask = op(reg_scalar,imm)
// op(mem_scalar,imm)
multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, Predicate prd> {
let Predicates = [prd] in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2)))], NoItinerary>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2)))], NoItinerary>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
}
}
//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
// fpclass(reg_vec, mem_vec, imm)
// fpclass(reg_vec, broadcast(eltVt), imm)
multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string mem, string broadcast>{
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2)))], NoItinerary>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##mem#
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2)))], NoItinerary>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##mem#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
_.BroadcastStr##", $dst|$dst, ${src1}"
##_.BroadcastStr##", $src2}",
[(set _.KRC:$dst,(OpNode
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2)))], NoItinerary>,EVEX_B;
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
_.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
_.BroadcastStr##", $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))))], NoItinerary>,
EVEX_B, EVEX_K;
}
multiclass avx512_vector_fpclass_all<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd,
string broadcast>{
let Predicates = [prd] in {
defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info512, "{z}",
broadcast>, EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info128, "{x}",
broadcast>, EVEX_V128;
defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info256, "{y}",
broadcast>, EVEX_V256;
}
}
multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
f32x_info, prd>, EVEX_CD8<32, CD8VT1>;
defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W;
}
defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
//-----------------------------------------------------------------
// Mask register copy, including
// - copy between mask registers
// - load/store mask registers
// - copy from GPR to mask register and vice versa
//
multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
string OpcodeStr, RegisterClass KRC,
ValueType vvt, X86MemOperand x86memop> {
let hasSideEffects = 0 in
def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set KRC:$dst, (vvt (load addr:$src)))]>;
def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store KRC:$src, addr:$dst)]>;
}
multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
string OpcodeStr,
RegisterClass KRC, RegisterClass GRC> {
let hasSideEffects = 0 in {
def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
}
}
let Predicates = [HasDQI] in
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
VEX, PD;
let Predicates = [HasAVX512] in
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
VEX, PS;
let Predicates = [HasBWI] in {
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
VEX, PD, VEX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
VEX, XD;
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
VEX, PS, VEX_W;
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
VEX, XD, VEX_W;
}
// GR from/to mask register
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
(KMOVWrk VK16:$src)>;
def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
(COPY_TO_REGCLASS VK16:$src, GR32)>;
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
(KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
(COPY_TO_REGCLASS VK8:$src, GR32)>;
def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
(COPY_TO_REGCLASS GR32:$src, VK32)>;
def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
(COPY_TO_REGCLASS VK32:$src, GR32)>;
def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
(COPY_TO_REGCLASS GR64:$src, VK64)>;
def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
(COPY_TO_REGCLASS VK64:$src, GR64)>;
// Load/store kreg
let Predicates = [HasDQI] in {
def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
(KMOVBmk addr:$dst, VK8:$src)>;
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
(KMOVBkm addr:$src)>;
def : Pat<(store VK4:$src, addr:$dst),
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
def : Pat<(store VK2:$src, addr:$dst),
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
def : Pat<(store VK1:$src, addr:$dst),
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
def : Pat<(v2i1 (load addr:$src)),
(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
def : Pat<(v4i1 (load addr:$src)),
(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
}
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(store VK1:$src, addr:$dst),
(MOV8mr addr:$dst,
(i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
sub_8bit)))>;
def : Pat<(store VK2:$src, addr:$dst),
(MOV8mr addr:$dst,
(i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
sub_8bit)))>;
def : Pat<(store VK4:$src, addr:$dst),
(MOV8mr addr:$dst,
(i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
sub_8bit)))>;
def : Pat<(store VK8:$src, addr:$dst),
(MOV8mr addr:$dst,
(i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
sub_8bit)))>;
def : Pat<(v8i1 (load addr:$src)),
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
def : Pat<(v2i1 (load addr:$src)),
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
def : Pat<(v4i1 (load addr:$src)),
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
}
let Predicates = [HasAVX512] in {
def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
(KMOVWmk addr:$dst, VK16:$src)>;
def : Pat<(v1i1 (load addr:$src)),
(COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
(KMOVWkm addr:$src)>;
}
let Predicates = [HasBWI] in {
def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
(KMOVDmk addr:$dst, VK32:$src)>;
def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
(KMOVDkm addr:$src)>;
def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
(KMOVQmk addr:$dst, VK64:$src)>;
def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
(KMOVQkm addr:$src)>;
}
let Predicates = [HasAVX512] in {
multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
def : Pat<(maskVT (scalar_to_vector GR32:$src)),
(COPY_TO_REGCLASS GR32:$src, maskRC)>;
def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))),
(COPY_TO_REGCLASS maskRC:$src, GR32)>;
def : Pat<(maskVT (scalar_to_vector GR8:$src)),
(COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))),
(COPY_TO_REGCLASS maskRC:$src, GR32)>;
}
defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
def : Pat<(X86kshiftr (X86kshiftl (v1i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
(COPY_TO_REGCLASS
(KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
GR8:$src, sub_8bit), (i32 1))), VK1)>;
def : Pat<(X86kshiftr (X86kshiftl (v16i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
(COPY_TO_REGCLASS
(KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
GR8:$src, sub_8bit), (i32 1))), VK16)>;
def : Pat<(X86kshiftr (X86kshiftl (v8i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
(COPY_TO_REGCLASS
(KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
GR8:$src, sub_8bit), (i32 1))), VK8)>;
}
// Mask unary operation
// - KNOT
multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
RegisterClass KRC, SDPatternOperator OpNode,
Predicate prd> {
let Predicates = [prd] in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set KRC:$dst, (OpNode KRC:$src))]>;
}
multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode> {
defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
HasDQI>, VEX, PD;
defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
HasAVX512>, VEX, PS;
defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
HasBWI>, VEX, PD, VEX_W;
defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
HasBWI>, VEX, PS, VEX_W;
}
defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>;
// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
let Predicates = [HasAVX512, NoDQI] in
def : Pat<(vnot VK8:$src),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
def : Pat<(vnot VK4:$src),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
def : Pat<(vnot VK2:$src),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
// Mask binary operation
// - KAND, KANDN, KOR, KXNOR, KXOR
multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
RegisterClass KRC, SDPatternOperator OpNode,
Predicate prd, bit IsCommutable> {
let Predicates = [prd], isCommutable = IsCommutable in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
}
multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, bit IsCommutable,
Predicate prdW = HasAVX512> {
defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
prdW, IsCommutable>, VEX_4V, VEX_L, PS;
defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
}
def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
// These nodes use 'vnot' instead of 'not' to support vectors.
def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>;
defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>;
defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>;
defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>;
defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;
multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
Instruction Inst> {
// With AVX512F, 8-bit mask is promoted to 16-bit mask,
// for the DQI set, this type is legal and KxxxB instruction is used
let Predicates = [NoDQI] in
def : Pat<(VOpNode VK8:$src1, VK8:$src2),
(COPY_TO_REGCLASS
(Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
(COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
// All types smaller than 8 bits require conversion anyway
def : Pat<(OpNode VK1:$src1, VK1:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK1:$src1, VK16),
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
def : Pat<(VOpNode VK2:$src1, VK2:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK2:$src1, VK16),
(COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
def : Pat<(VOpNode VK4:$src1, VK4:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK4:$src1, VK16),
(COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
}
defm : avx512_binop_pat<and, and, KANDWrr>;
defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
defm : avx512_binop_pat<or, or, KORWrr>;
defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
defm : avx512_binop_pat<xor, xor, KXORWrr>;
// Mask unpacking
multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
RegisterClass KRCSrc, Predicate prd> {
let Predicates = [prd] in {
let hasSideEffects = 0 in
def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
(ins KRC:$src1, KRC:$src2),
"kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
VEX_4V, VEX_L;
def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
(!cast<Instruction>(NAME##rr)
(COPY_TO_REGCLASS KRCSrc:$src2, KRC),
(COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
}
}
defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;
// Mask bit testing
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
SDNode OpNode, Predicate prd> {
let Predicates = [prd], Defs = [EFLAGS] in
def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
}
multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
Predicate prdW = HasAVX512> {
defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
VEX, PD;
defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
VEX, PS;
defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
VEX, PS, VEX_W;
defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
VEX, PD, VEX_W;
}
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
// Mask shift
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
SDNode OpNode> {
let Predicates = [HasAVX512] in
def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
!strconcat(OpcodeStr,
"\t{$imm, $src, $dst|$dst, $src, $imm}"),
[(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
}
multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
SDNode OpNode> {
defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
VEX, TAPD, VEX_W;
let Predicates = [HasDQI] in
defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
VEX, TAPD;
let Predicates = [HasBWI] in {
defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
VEX, TAPD, VEX_W;
defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
VEX, TAPD;
}
}
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr)
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
(i8 8)), (i8 8))>;
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v8i1 (and VK8:$mask,
(OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk)
(COPY_TO_REGCLASS VK8:$mask, VK16),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
(i8 8)), (i8 8))>;
}
multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
AVX512VLVectorVTInfo _> {
def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri)
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc),
(i8 8)), (i8 8))>;
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v8i1 (and VK8:$mask,
(OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik)
(COPY_TO_REGCLASS VK8:$mask, VK16),
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc),
(i8 8)), (i8 8))>;
}
let Predicates = [HasAVX512, NoVLX] in {
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
}
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
let Predicates = [HasAVX512] in
let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
[(set KRC:$dst, (VT Val))]>;
}
multiclass avx512_mask_setop_w<PatFrag Val> {
defm W : avx512_mask_setop<VK16, v16i1, Val>;
defm D : avx512_mask_setop<VK32, v32i1, Val>;
defm Q : avx512_mask_setop<VK64, v64i1, Val>;
}
defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512] in {
def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
}
// Patterns for kmask insert_subvector/extract_subvector to/from index=0
multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
RegisterClass RC, ValueType VT> {
def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
(subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
(VT (COPY_TO_REGCLASS subRC:$src, RC))>;
}
defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
def : Pat<(v2i1 (extract_subvector (v4i1 VK4:$src), (iPTR 2))),
(v2i1 (COPY_TO_REGCLASS
(KSHIFTRWri (COPY_TO_REGCLASS VK4:$src, VK16), (i8 2)),
VK2))>;
def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 4))),
(v4i1 (COPY_TO_REGCLASS
(KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (i8 4)),
VK4))>;
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))),
(v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>;
def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
(v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>;
// Patterns for kmask shift
multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
(VT (COPY_TO_REGCLASS
(KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
(I8Imm $imm)),
RC))>;
def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
(VT (COPY_TO_REGCLASS
(KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
(I8Imm $imm)),
RC))>;
}
defm : mask_shift_lowering<VK8, v8i1>, Requires<[HasAVX512, NoDQI]>;
defm : mask_shift_lowering<VK4, v4i1>, Requires<[HasAVX512]>;
defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;
//===----------------------------------------------------------------------===//
// AVX-512 - Aligned and unaligned load and store
//
multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
PatFrag ld_frag, PatFrag mload,
SDPatternOperator SelectOprr = vselect> {
let hasSideEffects = 0 in {
def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
_.ExeDomain>, EVEX;
def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
"${dst} {${mask}} {z}, $src}"),
[(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
(_.VT _.RC:$src),
_.ImmAllZerosV)))], _.ExeDomain>,
EVEX, EVEX_KZ;
let canFoldAsLoad = 1, isReMaterializable = 1,
SchedRW = [WriteLoad] in
def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
_.ExeDomain>, EVEX;
let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
"${dst} {${mask}}, $src1}"),
[(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
(_.VT _.RC:$src1),
(_.VT _.RC:$src0))))], _.ExeDomain>,
EVEX, EVEX_K;
let SchedRW = [WriteLoad] in
def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
"${dst} {${mask}}, $src1}"),
[(set _.RC:$dst, (_.VT
(vselect _.KRCWM:$mask,
(_.VT (bitconvert (ld_frag addr:$src1))),
(_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K;
}
let SchedRW = [WriteLoad] in
def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src),
OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
"${dst} {${mask}} {z}, $src}",
[(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
(_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
_.ExeDomain>, EVEX, EVEX_KZ;
}
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
(!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
(!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
(!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
_.KRCWM:$mask, addr:$ptr)>;
}
multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _,
Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.AlignedLdFrag,
masked_load_aligned512>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.AlignedLdFrag,
masked_load_aligned256>, EVEX_V256;
defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.AlignedLdFrag,
masked_load_aligned128>, EVEX_V128;
}
}
multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _,
Predicate prd,
SDPatternOperator SelectOprr = vselect> {
let Predicates = [prd] in
defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
masked_load_unaligned, SelectOprr>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
masked_load_unaligned, SelectOprr>, EVEX_V256;
defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
masked_load_unaligned, SelectOprr>, EVEX_V128;
}
}
multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
PatFrag st_frag, PatFrag mstore, string Name> {
let hasSideEffects = 0 in {
def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
[], _.ExeDomain>, EVEX, FoldGenData<Name#rr>;
def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
"${dst} {${mask}}, $src}",
[], _.ExeDomain>, EVEX, EVEX_K, FoldGenData<Name#rrk>;
def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
"${dst} {${mask}} {z}, $src}",
[], _.ExeDomain>, EVEX, EVEX_KZ, FoldGenData<Name#rrkz>;
}
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(st_frag (_.VT _.RC:$src), addr:$dst)], _.ExeDomain>, EVEX;
def mrk : AVX512PI<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
[], _.ExeDomain>, EVEX, EVEX_K;
def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
(!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
_.KRCWM:$mask, _.RC:$src)>;
}
multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _, Predicate prd,
string Name> {
let Predicates = [prd] in
defm Z : avx512_store<opc, OpcodeStr, _.info512, store,
masked_store_unaligned, Name#Z>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store,
masked_store_unaligned, Name#Z256>, EVEX_V256;
defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store,
masked_store_unaligned, Name#Z128>, EVEX_V128;
}
}
multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _, Predicate prd,
string Name> {
let Predicates = [prd] in
defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore512,
masked_store_aligned512, Name#Z>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore256,
masked_store_aligned256, Name#Z256>, EVEX_V256;
defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore,
masked_store_aligned128, Name#Z128>, EVEX_V128;
}
}
defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
HasAVX512>,
avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
HasAVX512, "VMOVAPS">,
PS, EVEX_CD8<32, CD8VF>;
defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
HasAVX512>,
avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
HasAVX512, "VMOVAPD">,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
null_frag>,
avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
"VMOVUPS">,
PS, EVEX_CD8<32, CD8VF>;
defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
null_frag>,
avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
"VMOVUPD">,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
HasAVX512>,
avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
HasAVX512, "VMOVDQA32">,
PD, EVEX_CD8<32, CD8VF>;
defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
HasAVX512>,
avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
HasAVX512, "VMOVDQA64">,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
HasBWI, "VMOVDQU8">,
XD, EVEX_CD8<8, CD8VF>;
defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
HasBWI, "VMOVDQU16">,
XD, VEX_W, EVEX_CD8<16, CD8VF>;
defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
null_frag>,
avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
HasAVX512, "VMOVDQU32">,
XS, EVEX_CD8<32, CD8VF>;
defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
null_frag>,
avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
HasAVX512, "VMOVDQU64">,
XS, VEX_W, EVEX_CD8<64, CD8VF>;
// Special instructions to help with spilling when we don't have VLX. We need
// to load or store from a ZMM register instead. These are converted in
// expandPostRAPseudos.
let isReMaterializable = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
"", []>;
def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
"", []>;
def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
"", []>;
def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
"", []>;
}
let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
"", []>;
def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
"", []>;
def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
"", []>;
def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
"", []>;
}
def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
(v8i64 VR512:$src))),
(VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
VK8), VR512:$src)>;
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
(v16i32 VR512:$src))),
(VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
// These patterns exist to prevent the above patterns from introducing a second
// mask inversion when one already exists.
def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
(bc_v8i64 (v16i32 immAllZerosV)),
(v8i64 VR512:$src))),
(VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
(v16i32 immAllZerosV),
(v16i32 VR512:$src))),
(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
// available. Use a 512-bit operation and extract.
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
(v8f32 VR256X:$src0))),
(EXTRACT_SUBREG
(v16f32
(VMOVAPSZrrk
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
(COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
sub_ymm)>;
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
(v8i32 VR256X:$src0))),
(EXTRACT_SUBREG
(v16i32
(VMOVDQA32Zrrk
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
(COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
sub_ymm)>;
}
let Predicates = [HasVLX, NoBWI] in {
// 128-bit load/store without BWI.
def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
(VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
(VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
(VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
(VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
// 256-bit load/store without BWI.
def : Pat<(alignedstore256 (v16i16 VR256X:$src), addr:$dst),
(VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
def : Pat<(alignedstore256 (v32i8 VR256X:$src), addr:$dst),
(VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
(VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
(VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
}
let Predicates = [HasVLX] in {
// Special patterns for storing subvector extracts of lower 128-bits of 256.
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
def : Pat<(alignedstore (v2f64 (extract_subvector
(v4f64 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(alignedstore (v4f32 (extract_subvector
(v8f32 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(alignedstore (v2i64 (extract_subvector
(v4i64 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(alignedstore (v4i32 (extract_subvector
(v8i32 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(alignedstore (v8i16 (extract_subvector
(v16i16 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(alignedstore (v16i8 (extract_subvector
(v32i8 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(store (v2f64 (extract_subvector
(v4f64 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(store (v4f32 (extract_subvector
(v8f32 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(store (v2i64 (extract_subvector
(v4i64 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(store (v4i32 (extract_subvector
(v8i32 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(store (v8i16 (extract_subvector
(v16i16 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
def : Pat<(store (v16i8 (extract_subvector
(v32i8 VR256X:$src), (iPTR 0))), addr:$dst),
(VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
// Special patterns for storing subvector extracts of lower 128-bits of 512.
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
def : Pat<(alignedstore (v2f64 (extract_subvector
(v8f64 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(alignedstore (v4f32 (extract_subvector
(v16f32 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(alignedstore (v2i64 (extract_subvector
(v8i64 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(alignedstore (v4i32 (extract_subvector
(v16i32 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(alignedstore (v8i16 (extract_subvector
(v32i16 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(alignedstore (v16i8 (extract_subvector
(v64i8 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(store (v2f64 (extract_subvector
(v8f64 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(store (v4f32 (extract_subvector
(v16f32 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(store (v2i64 (extract_subvector
(v8i64 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(store (v4i32 (extract_subvector
(v16i32 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(store (v8i16 (extract_subvector
(v32i16 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
def : Pat<(store (v16i8 (extract_subvector
(v64i8 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
// Special patterns for storing subvector extracts of lower 256-bits of 512.
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
def : Pat<(alignedstore256 (v4f64 (extract_subvector
(v8f64 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
- def : Pat<(alignedstore (v8f32 (extract_subvector
- (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
+ def : Pat<(alignedstore256 (v8f32 (extract_subvector
+ (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(alignedstore256 (v4i64 (extract_subvector
(v8i64 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQA64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(alignedstore256 (v8i32 (extract_subvector
(v16i32 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQA32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(alignedstore256 (v16i16 (extract_subvector
(v32i16 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQA32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(alignedstore256 (v32i8 (extract_subvector
(v64i8 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQA32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(store (v4f64 (extract_subvector
(v8f64 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVUPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(store (v8f32 (extract_subvector
(v16f32 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVUPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(store (v4i64 (extract_subvector
(v8i64 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQU64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(store (v8i32 (extract_subvector
(v16i32 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQU32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(store (v16i16 (extract_subvector
(v32i16 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQU32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(store (v32i8 (extract_subvector
(v64i8 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
}
// Move Int Doubleword to Packed Double Int
//
let ExeDomain = SSEPackedInt in {
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
EVEX;
def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
IIC_SSE_MOVDQ>, EVEX, VEX_W;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
(ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>,
EVEX, VEX_W, EVEX_CD8<64, CD8VT1>;
let isCodeGenOnly = 1 in {
def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64X:$dst, (bitconvert GR64:$src))],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64X:$src))],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
EVEX_CD8<64, CD8VT1>;
}
} // ExeDomain = SSEPackedInt
// Move Int Doubleword to Single Scalar
//
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert GR32:$src))],
IIC_SSE_MOVDQ>, EVEX;
def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
// Move doubleword from xmm register to r/m32
//
let ExeDomain = SSEPackedInt in {
def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
(iPTR 0)))], IIC_SSE_MOVD_ToGP>,
EVEX;
def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(store (i32 (extractelt (v4i32 VR128X:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
EVEX, EVEX_CD8<32, CD8VT1>;
} // ExeDomain = SSEPackedInt
// Move quadword from xmm1 register to r/m64
//
let ExeDomain = SSEPackedInt in {
def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
Requires<[HasAVX512, In64BitMode]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
Requires<[HasAVX512, In64BitMode]>;
def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
(ins i64mem:$dst, VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
addr:$dst)], IIC_SSE_MOVDQ>,
EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
let hasSideEffects = 0 in
def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src),
"vmovq.s\t{$src, $dst|$dst, $src}",[]>,
EVEX, VEX_W;
} // ExeDomain = SSEPackedInt
// Move Scalar Single to Double Int
//
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
(ins FR32X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32X:$src))],
IIC_SSE_MOVD_ToGP>, EVEX;
def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, FR32X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
// Move Quadword Int to Packed Quadword Int
//
let ExeDomain = SSEPackedInt in {
def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
(ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
} // ExeDomain = SSEPackedInt
//===----------------------------------------------------------------------===//
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
multiclass avx512_move_scalar<string asm, SDNode OpNode,
X86VectorVTInfo _> {
def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.FRC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
(scalar_to_vector _.FRC:$src2))))],
_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
"$dst {${mask}} {z}, $src1, $src2}"),
[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
(_.VT (OpNode _.RC:$src1,
(scalar_to_vector _.FRC:$src2))),
_.ImmAllZerosV)))],
_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ;
let Constraints = "$src0 = $dst" in
def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
(_.VT (OpNode _.RC:$src1,
(scalar_to_vector _.FRC:$src2))),
(_.VT _.RC:$src0))))],
_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K;
let canFoldAsLoad = 1, isReMaterializable = 1 in
def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
_.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
let mayLoad = 1, hasSideEffects = 0 in {
let Constraints = "$src0 = $dst" in
def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
!strconcat(asm, "\t{$src, $dst {${mask}}|",
"$dst {${mask}}, $src}"),
[], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K;
def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src),
!strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
"$dst {${mask}} {z}, $src}"),
[], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ;
}
def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
EVEX;
let mayStore = 1, hasSideEffects = 0 in
def mrk: AVX512PI<0x11, MRMDestMem, (outs),
(ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
!strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
[], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K;
}
defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
PatLeaf ZeroFP, X86VectorVTInfo _> {
def : Pat<(_.VT (OpNode _.RC:$src0,
(_.VT (scalar_to_vector
(_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
(_.EltVT _.FRC:$src1),
(_.EltVT _.FRC:$src2))))))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrk)
(COPY_TO_REGCLASS _.FRC:$src2, _.RC),
(COPY_TO_REGCLASS GR32:$mask, VK1WM),
(_.VT _.RC:$src0), _.FRC:$src1),
_.RC)>;
def : Pat<(_.VT (OpNode _.RC:$src0,
(_.VT (scalar_to_vector
(_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
(_.EltVT _.FRC:$src1),
(_.EltVT ZeroFP))))))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrkz)
(COPY_TO_REGCLASS GR32:$mask, VK1WM),
(_.VT _.RC:$src0), _.FRC:$src1),
_.RC)>;
}
multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
dag Mask, RegisterClass MaskRC> {
def : Pat<(masked_store addr:$dst, Mask,
(_.info512.VT (insert_subvector undef,
(_.info256.VT (insert_subvector undef,
(_.info128.VT _.info128.RC:$src),
(iPTR 0))),
(iPTR 0)))),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
}
multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
AVX512VLVectorVTInfo _,
dag Mask, RegisterClass MaskRC,
SubRegIndex subreg> {
def : Pat<(masked_store addr:$dst, Mask,
(_.info512.VT (insert_subvector undef,
(_.info256.VT (insert_subvector undef,
(_.info128.VT _.info128.RC:$src),
(iPTR 0))),
(iPTR 0)))),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
}
multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
dag Mask, RegisterClass MaskRC> {
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
(_.info512.VT (bitconvert
(v16i32 immAllZerosV))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
addr:$srcAddr)>;
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
(_.info512.VT (insert_subvector undef,
(_.info256.VT (insert_subvector undef,
(_.info128.VT (X86vzmovl _.info128.RC:$src)),
(iPTR 0))),
(iPTR 0))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
addr:$srcAddr)>;
}
multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
AVX512VLVectorVTInfo _,
dag Mask, RegisterClass MaskRC,
SubRegIndex subreg> {
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
(_.info512.VT (bitconvert
(v16i32 immAllZerosV))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
addr:$srcAddr)>;
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
(_.info512.VT (insert_subvector undef,
(_.info256.VT (insert_subvector undef,
(_.info128.VT (X86vzmovl _.info128.RC:$src)),
(iPTR 0))),
(iPTR 0))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
addr:$srcAddr)>;
}
defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
(COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
(VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
(COPY_TO_REGCLASS VR128X:$src, FR32X))>;
let hasSideEffects = 0 in {
def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, FR32X:$src2),
"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], NoItinerary>, XS, EVEX_4V, VEX_LIG,
FoldGenData<"VMOVSSZrr">;
let Constraints = "$src0 = $dst" in
def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
VR128X:$src1, FR32X:$src2),
"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
"$dst {${mask}}, $src1, $src2}",
[], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG,
FoldGenData<"VMOVSSZrrk">;
def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2),
"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, $src1, $src2}",
[], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
FoldGenData<"VMOVSSZrrkz">;
def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, FR64X:$src2),
"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W,
FoldGenData<"VMOVSDZrr">;
let Constraints = "$src0 = $dst" in
def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
VR128X:$src1, FR64X:$src2),
"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
"$dst {${mask}}, $src1, $src2}",
[], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG,
VEX_W, FoldGenData<"VMOVSDZrrk">;
def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins f64x_info.KRCWM:$mask, VR128X:$src1,
FR64X:$src2),
"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, $src1, $src2}",
[], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
VEX_W, FoldGenData<"VMOVSDZrrkz">;
}
let Predicates = [HasAVX512] in {
let AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128X then do a
// MOVS{S,D} to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
(VMOVSSZrr (v4f32 (AVX512_128_SET0)), FR32X:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
(VMOVSSZrr (v4f32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
(VMOVSSZrr (v4i32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
(VMOVSDZrr (v2f64 (AVX512_128_SET0)), FR64X:$src)>;
}
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
(VMOVSSZrr (v4f32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
(VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
(SUBREG_TO_REG (i32 0),
(VMOVSSZrr (v4f32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
(SUBREG_TO_REG (i32 0),
(VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
def : Pat<(v4f32 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
// MOVSDrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
// Represent the same patterns above but in the form they appear for
// 256-bit types
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
// Represent the same patterns above but in the form they appear for
// 512-bit types
def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
def : Pat<(v16f32 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
def : Pat<(v8f64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
}
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
FR32X:$src)), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
(v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
(SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
FR64X:$src)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
// Move low f64 and clear high bits.
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
(VMOVSDZrr (v2f64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
(SUBREG_TO_REG (i32 0),
(VMOVSDZrr (v2f64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
(SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
(SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
// Extract and store.
def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
addr:$dst),
(VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
// Shuffle with VMOVSS
def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
(VMOVSSZrr (v4i32 VR128X:$src1),
(COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
(VMOVSSZrr (v4f32 VR128X:$src1),
(COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
// 256-bit variants
def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
(SUBREG_TO_REG (i32 0),
(VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
(EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
sub_xmm)>;
def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
(SUBREG_TO_REG (i32 0),
(VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
(EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
sub_xmm)>;
// Shuffle with VMOVSD
def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
// 256-bit variants
def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
(SUBREG_TO_REG (i32 0),
(VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
(EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
sub_xmm)>;
def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
(SUBREG_TO_REG (i32 0),
(VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
(EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
sub_xmm)>;
def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
}
let AddedComplexity = 15 in
def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst, (v2i64 (X86vzmovl
(v2i64 VR128X:$src))))],
IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
let Predicates = [HasAVX512] in {
let AddedComplexity = 15 in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(VMOVDI2PDIZrr GR32:$src)>;
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
(VMOV64toPQIZrr GR64:$src)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
}
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
let AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzload addr:$src)),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v8i32 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(VMOVQI2PQIZrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
(VMOVZPQILo2PQIZrr VR128X:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)),
(VMOVQI2PQIZrm addr:$src)>;
def : Pat<(v4i64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
}
// Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
def : Pat<(v16i32 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
def : Pat<(v8i64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
}
//===----------------------------------------------------------------------===//
// AVX-512 - Non-temporals
//===----------------------------------------------------------------------===//
let SchedRW = [WriteLoad] in {
def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
(ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
[], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
(ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
[], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
EVEX_CD8<64, CD8VF>;
def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
(ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
[], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
EVEX_CD8<64, CD8VF>;
}
}
multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
PatFrag st_frag = alignednontemporalstore,
InstrItinClass itin = IIC_SSE_MOVNT> {
let SchedRW = [WriteStore], AddedComplexity = 400 in
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(st_frag (_.VT _.RC:$src), addr:$dst)],
_.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
}
multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo> {
let Predicates = [HasAVX512] in
defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
}
}
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
let Predicates = [HasAVX512], AddedComplexity = 400 in {
def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
(VMOVNTDQZmr addr:$dst, VR512:$src)>;
def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
(VMOVNTDQZmr addr:$dst, VR512:$src)>;
def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
(VMOVNTDQZmr addr:$dst, VR512:$src)>;
def : Pat<(v8f64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZrm addr:$src)>;
def : Pat<(v16f32 (alignednontemporalload addr:$src)),
(VMOVNTDQAZrm addr:$src)>;
def : Pat<(v8i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZrm addr:$src)>;
def : Pat<(v16i32 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZrm addr:$src)>;
def : Pat<(v32i16 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZrm addr:$src)>;
def : Pat<(v64i8 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZrm addr:$src)>;
}
let Predicates = [HasVLX], AddedComplexity = 400 in {
def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
(VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
(VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
(VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
def : Pat<(v4f64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(v8f32 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(v4i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(v8i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(v16i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(v32i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
def : Pat<(v2f64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ128rm addr:$src)>;
def : Pat<(v4f32 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ128rm addr:$src)>;
def : Pat<(v2i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ128rm addr:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZ128rm addr:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZ128rm addr:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZ128rm addr:$src)>;
}
//===----------------------------------------------------------------------===//
// AVX-512 - Integer arithmetic
//
multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, OpndItins itins,
bit IsCommutable = 0> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
itins.rr, IsCommutable>,
AVX512BIBase, EVEX_4V;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)))),
itins.rm>,
AVX512BIBase, EVEX_4V;
}
multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, OpndItins itins,
bit IsCommutable = 0> :
avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(_.VT (OpNode _.RC:$src1,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)))),
itins.rm>,
AVX512BIBase, EVEX_4V, EVEX_B;
}
multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo, OpndItins itins,
Predicate prd, bit IsCommutable = 0> {
let Predicates = [prd] in
defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
IsCommutable>, EVEX_V256;
defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
IsCommutable>, EVEX_V128;
}
}
multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo, OpndItins itins,
Predicate prd, bit IsCommutable = 0> {
let Predicates = [prd] in
defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
IsCommutable>, EVEX_V256;
defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
IsCommutable>, EVEX_V128;
}
}
multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
itins, prd, IsCommutable>,
VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
}
multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>;
}
multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>;
}
multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
SDNode OpNode, OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
IsCommutable>;
defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
IsCommutable>;
}
multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
SDNode OpNode, OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
IsCommutable>;
defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
IsCommutable>;
}
multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
bits<8> opc_d, bits<8> opc_q,
string OpcodeStr, SDNode OpNode,
OpndItins itins, bit IsCommutable = 0> {
defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
itins, HasAVX512, IsCommutable>,
avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
itins, HasBWI, IsCommutable>;
}
multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
SDNode OpNode,X86VectorVTInfo _Src,
X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
bit IsCommutable = 0> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1","$src1, $src2",
(_Dst.VT (OpNode
(_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2))),
itins.rr, IsCommutable>,
AVX512BIBase, EVEX_4V;
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
(bitconvert (_Src.LdFrag addr:$src2)))),
itins.rm>,
AVX512BIBase, EVEX_4V;
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
OpcodeStr,
"${src2}"##_Brdct.BroadcastStr##", $src1",
"$src1, ${src2}"##_Brdct.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Brdct.VT (X86VBroadcast
(_Brdct.ScalarLdFrag addr:$src2)))))),
itins.rm>,
AVX512BIBase, EVEX_4V, EVEX_B;
}
defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
SSE_INTALU_ITINS_P, 1>;
defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
SSE_INTALU_ITINS_P, 0>;
defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
SSE_INTALU_ITINS_P, HasBWI, 0>;
defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
SSE_INTALU_ITINS_P, HasBWI, 0>;
defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
HasBWI, 1>;
defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
HasBWI, 1>;
defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
HasBWI, 1>, T8PD;
defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
SSE_INTALU_ITINS_P, HasBWI, 1>;
multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
let Predicates = [prd] in
defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
_SrcVTInfo.info512, _DstVTInfo.info512,
v8i64_info, IsCommutable>,
EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
let Predicates = [HasVLX, prd] in {
defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
_SrcVTInfo.info256, _DstVTInfo.info256,
v4i64x_info, IsCommutable>,
EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
_SrcVTInfo.info128, _DstVTInfo.info128,
v2i64x_info, IsCommutable>,
EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
}
}
defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
avx512vl_i32_info, avx512vl_i64_info,
X86pmuldq, HasAVX512, 1>,T8PD;
defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
avx512vl_i32_info, avx512vl_i64_info,
X86pmuludq, HasAVX512, 1>;
defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
avx512vl_i8_info, avx512vl_i8_info,
X86multishift, HasVBMI, 0>, T8PD;
multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _Src, X86VectorVTInfo _Dst> {
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr,
"${src2}"##_Src.BroadcastStr##", $src1",
"$src1, ${src2}"##_Src.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Src.VT (X86VBroadcast
(_Src.ScalarLdFrag addr:$src2))))))>,
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
}
multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
SDNode OpNode,X86VectorVTInfo _Src,
X86VectorVTInfo _Dst, bit IsCommutable = 0> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1","$src1, $src2",
(_Dst.VT (OpNode
(_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2))),
NoItinerary, IsCommutable>,
EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
(bitconvert (_Src.LdFrag addr:$src2))))>,
EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>;
}
multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasBWI] in
defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
v32i16_info>,
avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
v32i16_info>, EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
v16i16x_info>,
avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
v16i16x_info>, EVEX_V256;
defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
v8i16x_info>,
avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
v8i16x_info>, EVEX_V128;
}
}
multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasBWI] in
defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
v64i8_info>, EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
v32i8x_info>, EVEX_V256;
defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
v16i8x_info>, EVEX_V128;
}
}
multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
SDNode OpNode, AVX512VLVectorVTInfo _Src,
AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
let Predicates = [HasBWI] in
defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
_Dst.info512, IsCommutable>, EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
_Dst.info256, IsCommutable>, EVEX_V256;
defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
_Dst.info128, IsCommutable>, EVEX_V128;
}
}
defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD;
defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase;
defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
let Predicates = [HasDQI, NoVLX] in {
def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
(EXTRACT_SUBREG
(VPMULLQZrr
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
sub_ymm)>;
def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG
(VPMULLQZrr
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
sub_xmm)>;
}
//===----------------------------------------------------------------------===//
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, bit IsCommutable = 0> {
defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
(bitconvert (_.VT _.RC:$src2)))),
(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
_.RC:$src2)))),
IIC_SSE_BIT_P_RR, IsCommutable>,
AVX512BIBase, EVEX_4V;
defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
(bitconvert (_.LdFrag addr:$src2)))),
(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)))))),
IIC_SSE_BIT_P_RM>,
AVX512BIBase, EVEX_4V;
}
multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, bit IsCommutable = 0> :
avx512_logic_rm<opc, OpcodeStr, OpNode, _, IsCommutable> {
defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(_.i64VT (OpNode _.RC:$src1,
(bitconvert
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))),
(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
(bitconvert
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))))),
IIC_SSE_BIT_P_RM>,
AVX512BIBase, EVEX_4V, EVEX_B;
}
multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo,
bit IsCommutable = 0> {
let Predicates = [HasAVX512] in
defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
IsCommutable>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
IsCommutable>, EVEX_V256;
defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
IsCommutable>, EVEX_V128;
}
}
multiclass avx512_logic_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit IsCommutable = 0> {
defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
IsCommutable>, EVEX_CD8<32, CD8VF>;
}
multiclass avx512_logic_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit IsCommutable = 0> {
defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
IsCommutable>,
VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
SDNode OpNode, bit IsCommutable = 0> {
defm Q : avx512_logic_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, IsCommutable>;
defm D : avx512_logic_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, IsCommutable>;
}
defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 1>;
defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, 1>;
defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 1>;
defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
//===----------------------------------------------------------------------===//
multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode, SDNode VecNode, OpndItins itins,
bit IsCommutable> {
let ExeDomain = _.ExeDomain in {
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1, _.RC:$src2,
(i32 FROUND_CURRENT))),
itins.rr>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))),
itins.rm>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
itins.rr> {
let isCommutable = IsCommutable;
}
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))], itins.rm>;
}
}
}
multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
let ExeDomain = _.ExeDomain in
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$rc)), itins.rr, IsCommutable>,
EVEX_B, EVEX_RC;
}
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode, SDNode VecNode, SDNode SaeNode,
OpndItins itins, bit IsCommutable> {
let ExeDomain = _.ExeDomain in {
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1, _.RC:$src2)),
itins.rr>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2)),
itins.rm>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
itins.rr> {
let isCommutable = IsCommutable;
}
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))], itins.rm>;
}
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 FROUND_NO_EXC))>, EVEX_B;
}
}
multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode VecNode,
SizeItins itins, bit IsCommutable> {
defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
itins.s, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
itins.s, IsCommutable>,
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
itins.d, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
itins.d, IsCommutable>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode VecNode, SDNode SaeNode,
SizeItins itins, bit IsCommutable> {
defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
VecNode, SaeNode, itins.s, IsCommutable>,
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
VecNode, SaeNode, itins.d, IsCommutable>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
SSE_ALU_ITINS_S, 0>;
defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
SSE_ALU_ITINS_S, 0>;
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
itins.rr> {
let isCommutable = 1;
}
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))], itins.rm>;
}
}
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86VectorVTInfo _, OpndItins itins,
bit IsCommutable> {
let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
IsCommutable>, EVEX_4V;
let mayLoad = 1 in {
defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
EVEX_4V;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))),
itins.rm>, EVEX_4V, EVEX_B;
}
}
}
multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
EVEX_4V, EVEX_B, EVEX_RC;
}
multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
EVEX_4V, EVEX_B;
}
multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Predicate prd, SizeItins itins,
bit IsCommutable = 0> {
let Predicates = [prd] in {
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
itins.s, IsCommutable>, EVEX_V512, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
// Define only if AVX512VL feature is present.
let Predicates = [prd, HasVLX] in {
defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
itins.s, IsCommutable>, EVEX_V128, PS,
EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
itins.s, IsCommutable>, EVEX_V256, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
}
multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
SSE_ALU_ITINS_P, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
SSE_MUL_ITINS_P, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
SSE_ALU_ITINS_P, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
SSE_ALU_ITINS_P, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>;
let isCodeGenOnly = 1 in {
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
SSE_ALU_ITINS_P, 1>;
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
SSE_ALU_ITINS_P, 1>;
}
defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
SSE_ALU_ITINS_P, 1>;
defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
SSE_ALU_ITINS_P, 0>;
defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
SSE_ALU_ITINS_P, 1>;
defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
SSE_ALU_ITINS_P, 1>;
// Patterns catch floating point selects with bitcasted integer logic ops.
multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
X86VectorVTInfo _, Predicate prd> {
let Predicates = [prd] in {
// Masked register-register logical operations.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
_.RC:$src2)>;
// Masked register-memory logical operations.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert (_.i64VT (OpNode _.RC:$src1,
(load addr:$src2)))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
addr:$src2)>;
// Register-broadcast logical operations.
def : Pat<(_.i64VT (OpNode _.RC:$src1,
(bitconvert (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))),
(!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert
(_.i64VT (OpNode _.RC:$src1,
(bitconvert (_.VT
(X86VBroadcast
(_.ScalarLdFrag addr:$src2))))))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert
(_.i64VT (OpNode _.RC:$src1,
(bitconvert (_.VT
(X86VBroadcast
(_.ScalarLdFrag addr:$src2))))))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
}
}
multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
}
defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
let Predicates = [HasVLX,HasDQI] in {
// Use packed logical operations for scalar ops.
def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
(COPY_TO_REGCLASS (VANDPDZ128rr
(COPY_TO_REGCLASS FR64X:$src1, VR128X),
(COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
(COPY_TO_REGCLASS (VORPDZ128rr
(COPY_TO_REGCLASS FR64X:$src1, VR128X),
(COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
(COPY_TO_REGCLASS (VXORPDZ128rr
(COPY_TO_REGCLASS FR64X:$src1, VR128X),
(COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
(COPY_TO_REGCLASS (VANDNPDZ128rr
(COPY_TO_REGCLASS FR64X:$src1, VR128X),
(COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
(COPY_TO_REGCLASS (VANDPSZ128rr
(COPY_TO_REGCLASS FR32X:$src1, VR128X),
(COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
(COPY_TO_REGCLASS (VORPSZ128rr
(COPY_TO_REGCLASS FR32X:$src1, VR128X),
(COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
(COPY_TO_REGCLASS (VXORPSZ128rr
(COPY_TO_REGCLASS FR32X:$src1, VR128X),
(COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
(COPY_TO_REGCLASS (VANDNPSZ128rr
(COPY_TO_REGCLASS FR32X:$src1, VR128X),
(COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
}
multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, EVEX_4V;
defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, EVEX_4V;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
EVEX_4V, EVEX_B;
}
}
multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1,
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT))>;
}
}
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f32x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
EVEX_4V,EVEX_CD8<32, CD8VT1>;
defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f64x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
EVEX_V128, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v2f64x_info>,
EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f64x_info>,
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let isCommutable = 1 in
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
EVEX_4V;
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))))>,
EVEX_4V,
EVEX_CD8<_.EltSize, CD8VF>;
}
multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))))>,
EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
X86VectorVTInfo _, string Suffix> {
def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
(_.KVT (COPY_TO_REGCLASS
(!cast<Instruction>(NAME # Suffix # "Zrr")
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src1, _.SubRegIdx),
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src2, _.SubRegIdx)),
_.KRC))>;
}
multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _, string Suffix> {
let Predicates = [HasAVX512] in
defm Z : avx512_vptest<opc, OpcodeStr, OpNode, _.info512>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, _.info256>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, _.info128>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
}
let Predicates = [HasAVX512, NoVLX] in {
defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
}
}
multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode,
avx512vl_i32_info, "D">;
defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode,
avx512vl_i64_info, "Q">, VEX_W;
}
multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasBWI] in {
defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, v32i16_info>,
EVEX_V512, VEX_W;
defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, v64i8_info>,
EVEX_V512;
}
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, v16i16x_info>,
EVEX_V256, VEX_W;
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, v8i16x_info>,
EVEX_V128, VEX_W;
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, v32i8x_info>,
EVEX_V256;
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, v16i8x_info>,
EVEX_V128;
}
let Predicates = [HasAVX512, NoVLX] in {
defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
}
}
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
SDNode OpNode> :
avx512_vptest_wb <opc_wb, OpcodeStr, OpNode>,
avx512_vptest_dq<opc_dq, OpcodeStr, OpNode>;
defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm>, T8PD;
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8XS;
//===----------------------------------------------------------------------===//
// AVX-512 Shift instructions
//===----------------------------------------------------------------------===//
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
SSE_INTSHIFT_ITINS_P.rr>;
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))),
SSE_INTSHIFT_ITINS_P.rm>;
}
}
multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
SSE_INTSHIFT_ITINS_P.rm>, EVEX_B;
}
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
// src2 is always 128-bit
let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
EVEX_4V;
}
}
multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType SrcVT, PatFrag bc_frag,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
VTInfo.info512>, EVEX_V512,
EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
VTInfo.info256>, EVEX_V256,
EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
VTInfo.info128>, EVEX_V128,
EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
}
}
multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
string OpcodeStr, SDNode OpNode> {
defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
avx512vl_i32_info, HasAVX512>;
defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
avx512vl_i64_info, HasAVX512>, VEX_W;
defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, v8i16, bc_v8i16,
avx512vl_i16_info, HasBWI>;
}
multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo> {
let Predicates = [HasAVX512] in
defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
VTInfo.info512>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
VTInfo.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
VTInfo.info256>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
VTInfo.info256>, EVEX_V256;
defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
VTInfo.info128>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
VTInfo.info128>, EVEX_V128;
}
}
multiclass avx512_shift_rmi_w<bits<8> opcw,
Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode> {
let Predicates = [HasBWI] in
defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
v32i16_info>, EVEX_V512;
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
v16i16x_info>, EVEX_V256;
defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
v8i16x_info>, EVEX_V128;
}
}
multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode> {
defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
}
defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V;
defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;
defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri>, AVX512BIi8Base, EVEX_4V;
defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPSRAQZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
VR128X:$src2)), sub_ymm)>;
def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPSRAQZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
VR128X:$src2)), sub_xmm)>;
def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPSRAQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>;
def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPSRAQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>;
}
//===-------------------------------------------------------------------===//
// Variable Bit Shifts
//===-------------------------------------------------------------------===//
multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
(_.VT (bitconvert (_.LdFrag addr:$src2))))),
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
EVEX_CD8<_.EltSize, CD8VF>;
}
}
multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))))),
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
}
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
}
}
multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode,
avx512vl_i32_info>;
defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode,
avx512vl_i64_info>, VEX_W;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
SDNode OpNode, list<Predicate> p> {
let Predicates = p in {
def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
(_.info256.VT _.info256.RC:$src2))),
(EXTRACT_SUBREG
(!cast<Instruction>(OpcodeStr#"Zrr")
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
sub_ymm)>;
def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
(_.info128.VT _.info128.RC:$src2))),
(EXTRACT_SUBREG
(!cast<Instruction>(OpcodeStr#"Zrr")
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
sub_xmm)>;
}
}
multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasBWI] in
defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, v32i16_info>,
EVEX_V512, VEX_W;
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, v16i16x_info>,
EVEX_V256, VEX_W;
defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, v8i16x_info>,
EVEX_V128, VEX_W;
}
}
defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
avx512_var_shift_w<0x12, "vpsllvw", shl>;
defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
avx512_var_shift_w<0x11, "vpsravw", sra>;
defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
avx512_var_shift_w<0x10, "vpsrlvw", srl>;
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
// Special handing for handling VPSRAV intrinsics.
multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
list<Predicate> p> {
let Predicates = p in {
def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
_.RC:$src2)>;
def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rm)
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
_.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
_.RC:$src1, addr:$src2)>;
}
}
multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
list<Predicate> p> :
avx512_var_shift_int_lowering<InstrStr, _, p> {
let Predicates = p in {
def : Pat<(_.VT (X86vsrav _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
_.RC:$src1, addr:$src2)>;
}
}
defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPROLVQZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
sub_xmm)>;
def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPROLVQZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
sub_ymm)>;
def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
(EXTRACT_SUBREG (v16i32
(VPROLVDZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
sub_xmm)>;
def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(EXTRACT_SUBREG (v16i32
(VPROLVDZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
sub_ymm)>;
def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPROLQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>;
def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPROLQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>;
def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPROLDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>;
def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPROLDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>;
}
// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPRORVQZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
sub_xmm)>;
def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPRORVQZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
sub_ymm)>;
def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
(EXTRACT_SUBREG (v16i32
(VPRORVDZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
sub_xmm)>;
def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(EXTRACT_SUBREG (v16i32
(VPRORVDZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
sub_ymm)>;
def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPRORQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>;
def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPRORQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>;
def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPRORDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>;
def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPRORDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>;
}
//===-------------------------------------------------------------------===//
// 1-src variable permutation VPERMW/D/Q
//===-------------------------------------------------------------------===//
multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in
defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
}
multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo> {
let Predicates = [HasAVX512] in
defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
VTInfo.info512>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
VTInfo.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in
defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
VTInfo.info256>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
VTInfo.info256>, EVEX_V256;
}
multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
Predicate prd, SDNode OpNode,
AVX512VLVectorVTInfo _> {
let Predicates = [prd] in
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
EVEX_V512 ;
let Predicates = [HasVLX, prd] in {
defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
EVEX_V256 ;
defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
EVEX_V128 ;
}
}
defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
avx512vl_i16_info>, VEX_W;
defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
avx512vl_i8_info>;
defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
avx512vl_i32_info>;
defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
avx512vl_i64_info>, VEX_W;
defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
avx512vl_f32_info>;
defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
avx512vl_f64_info>, VEX_W;
defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
X86VPermi, avx512vl_i64_info>,
EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
X86VPermi, avx512vl_f64_info>,
EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - VPERMIL
//===----------------------------------------------------------------------===//
multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, X86VectorVTInfo Ctrl> {
defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
(Ctrl.VT Ctrl.RC:$src2)))>,
T8PD, EVEX_4V;
defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode
_.RC:$src1,
(Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(_.VT (OpNode
_.RC:$src1,
(Ctrl.VT (X86VBroadcast
(Ctrl.ScalarLdFrag addr:$src2)))))>,
T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
}
multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
let Predicates = [HasAVX512] in {
defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512,
Ctrl.info512>, EVEX_V512;
}
let Predicates = [HasAVX512, HasVLX] in {
defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128,
Ctrl.info128>, EVEX_V128;
defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256,
Ctrl.info256>, EVEX_V256;
}
}
multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>;
defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
X86VPermilpi, _>,
EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
let ExeDomain = SSEPackedSingle in
defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
avx512vl_i32_info>;
let ExeDomain = SSEPackedDouble in
defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
avx512vl_i64_info>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
//===----------------------------------------------------------------------===//
defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
X86PShufd, avx512vl_i32_info>,
EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
X86PShufhw>, EVEX, AVX512XSIi8Base;
defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
X86PShuflw>, EVEX, AVX512XDIi8Base;
multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
let Predicates = [HasBWI] in
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
let Predicates = [HasVLX, HasBWI] in {
defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
}
}
defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
//===----------------------------------------------------------------------===//
// Move Low to High and High to Low packed FP Instructions
//===----------------------------------------------------------------------===//
def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
IIC_SSE_MOV_LH>, EVEX_4V;
def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
IIC_SSE_MOV_LH>, EVEX_4V;
let Predicates = [HasAVX512] in {
// MOVLHPS patterns
def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
(VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
(VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
// MOVHLPS patterns
def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
(VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
}
//===----------------------------------------------------------------------===//
// VMOVHPS/PD VMOVLPS Instructions
// All patterns was taken from SSS implementation.
//===----------------------------------------------------------------------===//
multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, f64mem:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,
(OpNode _.RC:$src1,
(_.VT (bitconvert
(v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
IIC_SSE_MOV_LH>, EVEX_4V;
}
defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd,
v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
let Predicates = [HasAVX512] in {
// VMOVHPS patterns
def : Pat<(X86Movlhps VR128X:$src1,
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
(VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128X:$src1,
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
(VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
// VMOVHPD patterns
def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
// VMOVLPS patterns
def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
(VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))),
(VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
// VMOVLPD patterns
def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Movsd VR128X:$src1,
(v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
}
def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovhps\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
(bc_v2f64 (v4f32 VR128X:$src))),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
EVEX, EVEX_CD8<32, CD8VT2>;
def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovlps\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>,
EVEX, EVEX_CD8<32, CD8VT2>;
def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (v2f64 VR128X:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>,
EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
let Predicates = [HasAVX512] in {
// VMOVHPD patterns
def : Pat<(store (f64 (extractelt
(v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
(iPTR 0))), addr:$dst),
(VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
// VMOVLPS patterns
def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
addr:$src1),
(VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
def : Pat<(store (v4i32 (X86Movlps
(bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1),
(VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
// VMOVLPD patterns
def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
addr:$src1),
(VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
addr:$src1),
(VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
}
//===----------------------------------------------------------------------===//
// FMA - Fused Multiply Operations
//
multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
AVX512FMA3Base;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
AVX512FMA3Base;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src2,
_.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
AVX512FMA3Base, EVEX_B;
}
// Additional pattern for folding broadcast nodes in other orders.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, _.RC:$src2,
(X86VBroadcast (_.ScalarLdFrag addr:$src3))),
_.RC:$src1)),
(!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
}
multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
}
multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
string Suff> {
let Predicates = [HasAVX512] in {
defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeRnd > {
defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
avx512vl_f32_info, "PS">;
defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
avx512vl_f64_info, "PD">, VEX_W;
}
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
AVX512FMA3Base;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
AVX512FMA3Base;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
"$src2, ${src3}"##_.BroadcastStr,
(_.VT (OpNode _.RC:$src2,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
_.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B;
}
// Additional patterns for folding broadcast nodes in other orders.
def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1)),
(!cast<Instruction>(NAME#Suff#_.ZSuffix#mb) _.RC:$src1,
_.RC:$src2, addr:$src3)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1),
_.RC:$src1)),
(!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1),
_.ImmAllZerosV)),
(!cast<Instruction>(NAME#Suff#_.ZSuffix#mbkz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
}
multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), 1, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
}
multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
string Suff> {
let Predicates = [HasAVX512] in {
defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeRnd > {
defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
avx512vl_f32_info, "PS">;
defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
avx512vl_f64_info, "PD">, VEX_W;
}
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
AVX512FMA3Base;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src3), _.RC:$src2)), 1, 0>,
AVX512FMA3Base;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
"$src2, ${src3}"##_.BroadcastStr,
(_.VT (OpNode _.RC:$src1,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
_.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B;
}
// Additional patterns for folding broadcast nodes in other orders.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2),
_.RC:$src1)),
(!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
}
multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))), 1, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
}
multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
string Suff> {
let Predicates = [HasAVX512] in {
defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeRnd > {
defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
avx512vl_f32_info, "PS">;
defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
avx512vl_f64_info, "PD">, VEX_W;
}
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
// Scalar FMA
let Constraints = "$src1 = $dst" in {
multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
dag RHS_r, dag RHS_m > {
defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>, AVX512FMA3Base;
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", RHS_VEC_m, 1, 1>, AVX512FMA3Base;
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb, 1, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
let isCodeGenOnly = 1, isCommutable = 1 in {
def r : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[RHS_r]>;
def m : AVX512FMA3<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[RHS_m]>;
}// isCodeGenOnly = 1
}
}// Constraints = "$src1 = $dst"
multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
SDNode OpNodeRnds3, X86VectorVTInfo _ , string SUFF> {
let ExeDomain = _.ExeDomain in {
defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
// Operands for intrinsic are in 123 order to preserve passthu
// semantics.
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2,
_.ScalarIntMemCPat:$src3, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
_.FRC:$src3))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
(_.ScalarLdFrag addr:$src3))))>;
defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
(_.VT (OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
_.RC:$src1, (i32 FROUND_CURRENT))),
(_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
_.FRC:$src1))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
(_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>;
defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
_.RC:$src2, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
_.FRC:$src2))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
}
}
multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
SDNode OpNodeRnds3> {
let Predicates = [HasAVX512] in {
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnds1, OpNodeRnds3, f32x_info, "SS">,
EVEX_CD8<32, CD8VT1>, VEX_LIG;
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnds1, OpNodeRnds3, f64x_info, "SD">,
EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
}
}
defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnds1,
X86FmaddRnds3>;
defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnds1,
X86FmsubRnds3>;
defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd,
X86FnmaddRnds1, X86FnmaddRnds3>;
defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub,
X86FnmsubRnds1, X86FnmsubRnds3>;
//===----------------------------------------------------------------------===//
// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
//===----------------------------------------------------------------------===//
let Constraints = "$src1 = $dst" in {
multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
AVX512FMA3Base;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
AVX512FMA3Base;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
}
}
} // Constraints = "$src1 = $dst"
multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _> {
let Predicates = [HasIFMA] in {
defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info512>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasIFMA] in {
defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info256>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info128>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
avx512vl_i64_info>, VEX_W;
defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
avx512vl_i64_info>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from sign integer to float/double
//===----------------------------------------------------------------------===//
multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
X86VectorVTInfo DstVT, X86MemOperand x86memop,
PatFrag ld_frag, string asm> {
let hasSideEffects = 0 in {
def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
EVEX_4V;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
EVEX_4V;
} // hasSideEffects = 0
let isCodeGenOnly = 1 in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
(ins DstVT.RC:$src1, SrcRC:$src2),
!strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set DstVT.RC:$dst,
(OpNode (DstVT.VT DstVT.RC:$src1),
SrcRC:$src2,
(i32 FROUND_CURRENT)))]>, EVEX_4V;
def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
(ins DstVT.RC:$src1, x86memop:$src2),
!strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set DstVT.RC:$dst,
(OpNode (DstVT.VT DstVT.RC:$src1),
(ld_frag addr:$src2),
(i32 FROUND_CURRENT)))]>, EVEX_4V;
}//isCodeGenOnly = 1
}
multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
X86VectorVTInfo DstVT, string asm> {
def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
(ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
!strconcat(asm,
"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
[(set DstVT.RC:$dst,
(OpNode (DstVT.VT DstVT.RC:$src1),
SrcRC:$src2,
(i32 imm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC;
}
multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
X86VectorVTInfo DstVT, X86MemOperand x86memop,
PatFrag ld_frag, string asm> {
defm NAME : avx512_vcvtsi_round<opc, OpNode, SrcRC, DstVT, asm>,
avx512_vcvtsi<opc, OpNode, SrcRC, DstVT, x86memop, ld_frag, asm>,
VEX_LIG;
}
let Predicates = [HasAVX512] in {
defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
XS, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
XD, EVEX_CD8<32, CD8VT1>;
defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp GR32:$src)),
(VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f32 (sint_to_fp GR64:$src)),
(VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
def : Pat<(f64 (sint_to_fp GR32:$src)),
(VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f64 (sint_to_fp GR64:$src)),
(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32,
v4f32x_info, i32mem, loadi32,
"cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
XS, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info,
i32mem, loadi32, "cvtusi2sd{l}">,
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
(VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
(VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
(VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
(VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (uint_to_fp GR32:$src)),
(VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f32 (uint_to_fp GR64:$src)),
(VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
def : Pat<(f64 (uint_to_fp GR32:$src)),
(VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f64 (uint_to_fp GR64:$src)),
(VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
}
//===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from float/double to integer
//===----------------------------------------------------------------------===//
multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT ,
X86VectorVTInfo DstVT, SDNode OpNode, string asm> {
let Predicates = [HasAVX512] in {
def rr : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
EVEX, VEX_LIG;
def rb : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
EVEX, VEX_LIG, EVEX_B, EVEX_RC;
def rm : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode
(SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
(i32 FROUND_CURRENT)))]>,
EVEX, VEX_LIG;
} // Predicates = [HasAVX512]
}
// Convert float/double to signed/unsigned int 32/64
defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
X86cvts2si, "cvtss2si">,
XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
X86cvts2si, "cvtss2si">,
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info,
X86cvts2usi, "cvtss2usi">,
XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info,
X86cvts2usi, "cvtss2usi">, XS, VEX_W,
EVEX_CD8<32, CD8VT1>;
defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
X86cvts2si, "cvtsd2si">,
XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
X86cvts2si, "cvtsd2si">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info,
X86cvts2usi, "cvtsd2usi">,
XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
X86cvts2usi, "cvtsd2usi">, XD, VEX_W,
EVEX_CD8<64, CD8VT1>;
// The SSE version of these instructions are disabled for AVX512.
// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
let Predicates = [HasAVX512] in {
def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
(VCVTSS2SIZrr VR128X:$src)>;
def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
(VCVTSS2SIZrm sse_load_f32:$src)>;
def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
(VCVTSS2SI64Zrr VR128X:$src)>;
def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
(VCVTSS2SI64Zrm sse_load_f32:$src)>;
def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
(VCVTSD2SIZrr VR128X:$src)>;
def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
(VCVTSD2SIZrm sse_load_f64:$src)>;
def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
(VCVTSD2SI64Zrr VR128X:$src)>;
def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
(VCVTSD2SI64Zrm sse_load_f64:$src)>;
} // HasAVX512
let Predicates = [HasAVX512] in {
def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
(VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
(VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
(VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
(VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
(VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
(VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
(VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
(VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
(VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
(VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
} // Predicates = [HasAVX512]
// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
// which produce unnecessary vmovs{s,d} instructions
let Predicates = [HasAVX512] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
} // Predicates = [HasAVX512]
// Convert float/double to signed/unsigned int 32/64 with truncation
multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDNode OpNode,
SDNode OpNodeRnd, string aliasStr>{
let Predicates = [HasAVX512] in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
let hasSideEffects = 0 in
def rb : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
[]>, EVEX, EVEX_B;
def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
EVEX;
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
def : InstAlias<asm # aliasStr # "\t\t{{sae}, $src, $dst|$dst, $src, {sae}}",
(!cast<Instruction>(NAME # "rb") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rm") _DstRC.RC:$dst,
_SrcRC.ScalarMemOp:$src), 0>;
let isCodeGenOnly = 1 in {
def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
(i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
def rb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
!strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
[(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
(i32 FROUND_NO_EXC)))]>,
EVEX,VEX_LIG , EVEX_B;
let mayLoad = 1, hasSideEffects = 0 in
def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
(ins _SrcRC.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[]>, EVEX, VEX_LIG;
} // isCodeGenOnly = 1
} //HasAVX512
}
defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
fp_to_sint, X86cvtts2IntRnd, "{l}">,
XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
fp_to_sint, X86cvtts2IntRnd, "{q}">,
VEX_W, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
fp_to_sint, X86cvtts2IntRnd, "{l}">,
XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
fp_to_sint, X86cvtts2IntRnd, "{q}">,
VEX_W, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
fp_to_uint, X86cvtts2UIntRnd, "{l}">,
XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
fp_to_uint, X86cvtts2UIntRnd, "{q}">,
XS,VEX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
fp_to_uint, X86cvtts2UIntRnd, "{l}">,
XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
fp_to_uint, X86cvtts2UIntRnd, "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasAVX512] in {
def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
(VCVTTSS2SIZrr_Int VR128X:$src)>;
def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
(VCVTTSS2SIZrm_Int ssmem:$src)>;
def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
(VCVTTSS2SI64Zrr_Int VR128X:$src)>;
def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
(VCVTTSS2SI64Zrm_Int ssmem:$src)>;
def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
(VCVTTSD2SIZrr_Int VR128X:$src)>;
def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
(VCVTTSD2SIZrm_Int sdmem:$src)>;
def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
(VCVTTSD2SI64Zrr_Int VR128X:$src)>;
def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
(VCVTTSD2SI64Zrm_Int sdmem:$src)>;
} // HasAVX512
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
//===----------------------------------------------------------------------===//
multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode> {
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT _Src.ScalarIntMemCPat:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _Src.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
}
// Scalar Coversion with SAE - suppress all exceptions
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2),
(i32 FROUND_NO_EXC)))>,
EVEX_4V, VEX_LIG, EVEX_B;
}
// Scalar Conversion with rounding control (RC)
multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
EVEX_B, EVEX_RC;
}
multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
SDNode OpNodeRnd, X86VectorVTInfo _src,
X86VectorVTInfo _dst> {
let Predicates = [HasAVX512] in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
}
}
multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
SDNode OpNodeRnd, X86VectorVTInfo _src,
X86VectorVTInfo _dst> {
let Predicates = [HasAVX512] in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
EVEX_CD8<32, CD8VT1>, XS;
}
}
defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
X86froundRnd, f64x_info, f32x_info>;
defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
X86fpextRnd,f32x_info, f64x_info >;
def : Pat<(f64 (fpextend FR32X:$src)),
(VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (fpextend (loadf32 addr:$src))),
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (extloadf32 addr:$src)),
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512, OptForSize]>;
def : Pat<(f64 (extloadf32 addr:$src)),
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Requires<[HasAVX512, OptForSpeed]>;
def : Pat<(f32 (fpround FR64X:$src)),
(VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
(VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector
(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
(VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
//===----------------------------------------------------------------------===//
// AVX-512 Vector convert from signed/unsigned integer to float/double
// and from float/double to signed/unsigned integer
//===----------------------------------------------------------------------===//
multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode,
string Broadcast = _.BroadcastStr,
string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src), OpcodeStr, "$src", "$src",
(_.VT (OpNode (_Src.VT _Src.RC:$src)))>, EVEX;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
(_.VT (OpNode (_Src.VT
(bitconvert (_Src.LdFrag addr:$src)))))>, EVEX;
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _Src.ScalarMemOp:$src), OpcodeStr,
"${src}"##Broadcast, "${src}"##Broadcast,
(_.VT (OpNode (_Src.VT
(X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
))>, EVEX, EVEX_B;
}
// Coversion with SAE - suppress all exceptions
multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src), OpcodeStr,
"{sae}, $src", "$src, {sae}",
(_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
(i32 FROUND_NO_EXC)))>,
EVEX, EVEX_B;
}
// Conversion with rounding control (RC)
multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
"$rc, $src", "$src, $rc",
(_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
EVEX, EVEX_B, EVEX_RC;
}
// Extend Float to Double
multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fpextend>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
X86vfpextRnd>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
X86vfpext, "{1to2}", "", f64mem>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend>,
EVEX_V256;
}
}
// Truncate Double to Float
multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
X86vfproundRnd>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
X86vfpround, "{1to2}", "{x}">, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
"{1to4}", "{y}">, EVEX_V256;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
}
}
defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">,
VEX_W, PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">,
PS, EVEX_CD8<32, CD8VH>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
let Predicates = [HasVLX] in {
let AddedComplexity = 15 in
def : Pat<(X86vzmovl (v2f64 (bitconvert
(v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
(VCVTPD2PSZ128rr VR128X:$src)>;
def : Pat<(v2f64 (extloadv2f32 addr:$src)),
(VCVTPS2PDZ128rm addr:$src)>;
def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDZ256rm addr:$src)>;
}
// Convert Signed/Unsigned Doubleword to Double
multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNode128> {
// No rounding in this op
let Predicates = [HasAVX512] in
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode>,
EVEX_V512;
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
OpNode128, "{1to2}", "", i64mem>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode>,
EVEX_V256;
}
}
// Convert Signed/Unsigned Doubleword to Float
multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeRnd> {
let Predicates = [HasAVX512] in
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
OpNodeRnd>, EVEX_V512;
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode>,
EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Doubleword with truncation
multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
OpNodeRnd>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Doubleword
multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
OpNodeRnd>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
EVEX_V256;
}
}
// Convert Double to Signed/Unsigned Doubleword with truncation
multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNode128, SDNode OpNodeRnd> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
OpNodeRnd>, EVEX_V512;
}
let Predicates = [HasVLX] in {
// we need "x"/"y" suffixes in order to distinguish between 128 and 256
// memory forms of these instructions in Asm Parser. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
OpNode128, "{1to2}", "{x}">, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
"{1to4}", "{y}">, EVEX_V256;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
}
}
// Convert Double to Signed/Unsigned Doubleword
multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
OpNodeRnd>, EVEX_V512;
}
let Predicates = [HasVLX] in {
// we need "x"/"y" suffixes in order to distinguish between 128 and 256
// memory forms of these instructions in Asm Parcer. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
"{1to2}", "{x}">, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
"{1to4}", "{y}">, EVEX_V256;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
}
}
// Convert Double to Signed/Unsigned Quardword
multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
OpNodeRnd>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
EVEX_V256;
}
}
// Convert Double to Signed/Unsigned Quardword with truncation
multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
OpNodeRnd>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
EVEX_V256;
}
}
// Convert Signed/Unsigned Quardword to Double
multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
OpNodeRnd>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode>,
EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Quardword
multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
OpNodeRnd>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
"{1to2}", "", f64mem>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Quardword with truncation
multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNode128, SDNode OpNodeRnd> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
OpNodeRnd>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
"{1to2}", "", f64mem>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
EVEX_V256;
}
}
// Convert Signed/Unsigned Quardword to Float
multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNode128, SDNode OpNodeRnd> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
OpNodeRnd>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
// we need "x"/"y" suffixes in order to distinguish between 128 and 256
// memory forms of these instructions in Asm Parcer. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
"{1to2}", "{x}">, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
"{1to4}", "{y}">, EVEX_V256;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
}
}
defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP>,
XS, EVEX_CD8<32, CD8VH>;
defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
X86VSintToFpRnd>,
PS, EVEX_CD8<32, CD8VF>;
defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
X86cvttp2siRnd>,
XS, EVEX_CD8<32, CD8VF>;
defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
X86cvttp2siRnd>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
X86cvttp2uiRnd>, PS,
EVEX_CD8<32, CD8VF>;
defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
X86cvttp2ui, X86cvttp2uiRnd>, PS, VEX_W,
EVEX_CD8<64, CD8VF>;
defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86VUintToFP>,
XS, EVEX_CD8<32, CD8VH>;
defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
X86VUintToFpRnd>, XD,
EVEX_CD8<32, CD8VF>;
defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
X86cvtp2IntRnd>, PD, EVEX_CD8<32, CD8VF>;
defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
X86cvtp2IntRnd>, XD, VEX_W,
EVEX_CD8<64, CD8VF>;
defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
X86cvtp2UIntRnd>,
PS, EVEX_CD8<32, CD8VF>;
defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
X86cvtp2UIntRnd>, VEX_W,
PS, EVEX_CD8<64, CD8VF>;
defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
X86cvtp2IntRnd>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
X86cvtp2IntRnd>, PD, EVEX_CD8<32, CD8VH>;
defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
X86cvtp2UIntRnd>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
X86cvtp2UIntRnd>, PD, EVEX_CD8<32, CD8VH>;
defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
X86cvttp2siRnd>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
X86cvttp2siRnd>, PD, EVEX_CD8<32, CD8VH>;
defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
X86cvttp2uiRnd>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
X86cvttp2uiRnd>, PD, EVEX_CD8<32, CD8VH>;
defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
X86VSintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
X86VUintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
X86VSintToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
X86VUintToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
(EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_xmm)>;
def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
(EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src, sub_xmm)))), sub_xmm)>;
def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
(v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_ymm)>;
def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
(v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
}
let Predicates = [HasAVX512, HasVLX] in {
let AddedComplexity = 15 in {
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
(VCVTPD2DQZ128rr VR128X:$src)>;
def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))))),
(VCVTPD2UDQZ128rr VR128X:$src)>;
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
(VCVTTPD2DQZ128rr VR128X:$src)>;
def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))))),
(VCVTTPD2UDQZ128rr VR128X:$src)>;
}
}
let Predicates = [HasAVX512] in {
def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
(VCVTPD2PSZrm addr:$src)>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
}
let Predicates = [HasDQI, HasVLX] in {
let AddedComplexity = 15 in {
def : Pat<(X86vzmovl (v2f64 (bitconvert
(v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
(VCVTQQ2PSZ128rr VR128X:$src)>;
def : Pat<(X86vzmovl (v2f64 (bitconvert
(v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
(VCVTUQQ2PSZ128rr VR128X:$src)>;
}
}
let Predicates = [HasDQI, NoVLX] in {
def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
(v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_ymm)>;
def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
(v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_ymm)>;
def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
(EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_xmm)>;
def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
(EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_xmm)>;
def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
}
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86MemOperand x86memop, PatFrag ld_frag> {
defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
"vcvtph2ps", "$src", "$src",
(X86cvtph2ps (_src.VT _src.RC:$src),
(i32 FROUND_CURRENT))>, T8PD;
defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
"vcvtph2ps", "$src", "$src",
(X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
(i32 FROUND_CURRENT))>, T8PD;
}
multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
"vcvtph2ps", "{sae}, $src", "$src, {sae}",
(X86cvtph2ps (_src.VT _src.RC:$src),
(i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
}
let Predicates = [HasAVX512] in {
defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
}
}
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86MemOperand x86memop> {
defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph", "$src2, $src1", "$src1, $src2",
(X86cvtps2ph (_src.VT _src.RC:$src1),
(i32 imm:$src2)),
NoItinerary, 0, 0, X86select>, AVX512AIi8Base;
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
(i32 imm:$src2))),
addr:$dst)]>;
let hasSideEffects = 0, mayStore = 1 in
def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[]>, EVEX_K;
}
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
let hasSideEffects = 0 in
defm rb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
(outs _dest.RC:$dst),
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
[]>, EVEX_B, AVX512AIi8Base;
}
let Predicates = [HasAVX512] in {
defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
}
}
// Patterns for matching conversions from float to half-float and vice versa.
let Predicates = [HasVLX] in {
// Use MXCSR.RC for rounding instead of explicitly specifying the default
// rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
// configurations we support (the default). However, falling back to MXCSR is
// more consistent with other instructions, which are always controlled by it.
// It's encoded as 0b100.
def : Pat<(fp_to_f16 FR32X:$src),
(i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
(COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
def : Pat<(f16_to_fp GR16:$src),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
(COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
(VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
}
// Patterns for matching float to half-float conversion when AVX512 is supported
// but F16C isn't. In that case we have to use 512-bit vectors.
let Predicates = [HasAVX512, NoVLX, NoF16C] in {
def : Pat<(fp_to_f16 FR32X:$src),
(i16 (EXTRACT_SUBREG
(VMOVPDI2DIZrr
(v8i16 (EXTRACT_SUBREG
(VCVTPS2PHZrr
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
(v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
sub_xmm), 4), sub_xmm))), sub_16bit))>;
def : Pat<(f16_to_fp GR16:$src),
(f32 (COPY_TO_REGCLASS
(v4f32 (EXTRACT_SUBREG
(VCVTPH2PSZrr
(INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
(v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)),
sub_xmm)), sub_xmm)), FR32X))>;
def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
(f32 (COPY_TO_REGCLASS
(v4f32 (EXTRACT_SUBREG
(VCVTPH2PSZrr
(VCVTPS2PHZrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
(v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
sub_xmm), 4)), sub_xmm)), FR32X))>;
}
// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
[], IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
Sched<[WriteFAdd]>;
}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss">,
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd">,
AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss">,
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd">,
AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
"ucomiss">, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
"ucomisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
let Pattern = []<dag> in {
defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
"comiss">, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
"comisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let isCodeGenOnly = 1 in {
defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
sse_load_f64, "ucomisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
sse_load_f32, "comiss">, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
sse_load_f64, "comisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
}
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))))>, EVEX_4V;
}
}
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>,
EVEX_CD8<32, CD8VT1>, T8PD;
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>,
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>,
EVEX_CD8<32, CD8VT1>, T8PD;
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.FloatVT
(bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
EVEX, T8PD, EVEX_B;
}
}
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
OpNode, v4f32x_info>,
EVEX_V128, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
OpNode, v8f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
OpNode, v2f64x_info>,
EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
OpNode, v4f64x_info>,
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode> {
let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 FROUND_CURRENT))>;
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 FROUND_NO_EXC))>, EVEX_B;
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT))>;
}
}
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode>,
EVEX_CD8<32, CD8VT1>;
defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode>,
EVEX_CD8<64, CD8VT1>, VEX_W;
}
let Predicates = [HasERI] in {
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V;
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
}
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode> {
let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.FloatVT
(bitconvert (_.LdFrag addr:$src))),
(i32 FROUND_CURRENT))>;
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))),
(i32 FROUND_CURRENT))>, EVEX_B;
}
}
multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode> {
let ExeDomain = _.ExeDomain in
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr,
"{sae}, $src", "$src, {sae}",
(OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
}
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
}
}
let Predicates = [HasERI] in {
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX;
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX;
}
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
SDNode OpNodeRnd, X86VectorVTInfo _>{
let ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
(_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>,
EVEX, EVEX_B, EVEX_RC;
}
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(_.FloatVT (OpNode _.RC:$src))>, EVEX;
defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.FloatVT
(bitconvert (_.LdFrag addr:$src))))>, EVEX;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
EVEX, EVEX_B;
}
}
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
v8f64_info>,
EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
OpNode, v4f32x_info>,
EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
OpNode, v8f32x_info>,
EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, v2f64x_info>,
EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, v4f64x_info>,
EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
}
}
multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
SDNode OpNodeRnd> {
defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
let ExeDomain = _.ExeDomain in {
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNodeRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 FROUND_CURRENT))>;
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNodeRnd (_.VT _.RC:$src1),
(_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT))>;
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(OpNodeRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$rc))>,
EVEX_B, EVEX_RC;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
let mayLoad = 1 in
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
}
}
def : Pat<(_.EltVT (OpNode _.FRC:$src)),
(!cast<Instruction>(NAME#SUFF#Zr)
(_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
def : Pat<(_.EltVT (OpNode (load addr:$src))),
(!cast<Instruction>(NAME#SUFF#Zm)
(_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>;
}
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt,
X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt,
X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
}
defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;
defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
let Predicates = [HasAVX512] in {
def : Pat<(f32 (X86frsqrt FR32X:$src)),
(COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
def : Pat<(f32 (X86frsqrt (load addr:$src))),
(COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[OptForSize]>;
def : Pat<(f32 (X86frcp FR32X:$src)),
(COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
def : Pat<(f32 (X86frcp (load addr:$src))),
(COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[OptForSize]>;
}
multiclass
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1),
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
}
let Predicates = [HasAVX512] in {
def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x9))), _.FRC)>;
def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xa))), _.FRC)>;
def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xb))), _.FRC)>;
def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>;
def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0x9))), _.FRC)>;
def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0xa))), _.FRC)>;
def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0xb))), _.FRC)>;
def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0x4))), _.FRC)>;
def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0xc))), _.FRC)>;
}
}
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
//-------------------------------------------------
// Integer truncate and extend operations
//-------------------------------------------------
multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo,
X86MemOperand x86memop> {
let ExeDomain = DestInfo.ExeDomain in
defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
EVEX, T8XS;
// for intrinsic patter match
def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
undef)),
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
SrcInfo.RC:$src1)>;
def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
DestInfo.ImmAllZerosV)),
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
SrcInfo.RC:$src1)>;
def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
DestInfo.RC:$src0)),
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0,
DestInfo.KRCWM:$mask ,
SrcInfo.RC:$src1)>;
let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
ExeDomain = DestInfo.ExeDomain in {
def mr : AVX512XS8I<opc, MRMDestMem, (outs),
(ins x86memop:$dst, SrcInfo.RC:$src),
OpcodeStr # "\t{$src, $dst|$dst, $src}",
[]>, EVEX;
def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
(ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
[]>, EVEX, EVEX_K;
}//mayStore = 1, mayLoad = 1, hasSideEffects = 0
}
multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
X86VectorVTInfo DestInfo,
PatFrag truncFrag, PatFrag mtruncFrag > {
def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
addr:$dst, SrcInfo.RC:$src)>;
def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
(SrcInfo.VT SrcInfo.RC:$src)),
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
}
multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
Predicate prd = HasAVX512>{
let Predicates = [HasVLX, prd] in {
defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
DestInfoZ128, x86memopZ128>,
avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
truncFrag, mtruncFrag>, EVEX_V128;
defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
DestInfoZ256, x86memopZ256>,
avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
truncFrag, mtruncFrag>, EVEX_V256;
}
let Predicates = [prd] in
defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
DestInfoZ, x86memopZ>,
avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
truncFrag, mtruncFrag>, EVEX_V512;
}
multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
}
multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
}
multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
}
multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
}
multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
}
multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
}
defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc,
truncstorevi8, masked_truncstorevi8>;
defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs,
truncstore_s_vi8, masked_truncstore_s_vi8>;
defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
truncstore_us_vi8, masked_truncstore_us_vi8>;
defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc,
truncstorevi16, masked_truncstorevi16>;
defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs,
truncstore_s_vi16, masked_truncstore_s_vi16>;
defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
truncstore_us_vi16, masked_truncstore_us_vi16>;
defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc,
truncstorevi32, masked_truncstorevi32>;
defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs,
truncstore_s_vi32, masked_truncstore_s_vi32>;
defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
truncstore_us_vi32, masked_truncstore_us_vi32>;
defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc,
truncstorevi8, masked_truncstorevi8>;
defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs,
truncstore_s_vi8, masked_truncstore_s_vi8>;
defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
truncstore_us_vi8, masked_truncstore_us_vi8>;
defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc,
truncstorevi16, masked_truncstorevi16>;
defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs,
truncstore_s_vi16, masked_truncstore_s_vi16>;
defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
truncstore_us_vi16, masked_truncstore_us_vi16>;
defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc,
truncstorevi8, masked_truncstorevi8>;
defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs,
truncstore_s_vi8, masked_truncstore_s_vi8>;
defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
truncstore_us_vi8, masked_truncstore_us_vi8>;
let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
(v8i16 (EXTRACT_SUBREG
(v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src, sub_ymm)))), sub_xmm))>;
def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
(v4i32 (EXTRACT_SUBREG
(v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src, sub_ymm)))), sub_xmm))>;
}
let Predicates = [HasBWI, NoVLX] in {
def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
(v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src, sub_ymm))), sub_xmm))>;
}
multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
let ExeDomain = DestInfo.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
EVEX;
defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
(ins x86memop:$src), OpcodeStr ,"$src", "$src",
(DestInfo.VT (LdFrag addr:$src))>,
EVEX;
}
}
multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasBWI] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v8i16x_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v16i16x_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
}
let Predicates = [HasBWI] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v32i16_info,
v32i8x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512;
}
}
multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
v16i8x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
v16i8x_info, i64mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512;
}
}
multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
v16i8x_info, i16mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
v16i8x_info, i32mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
v16i8x_info, i64mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512;
}
}
multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
v8i16x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
v16i16x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512;
}
}
multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
v8i16x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
v8i16x_info, i64mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512;
}
}
multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
v4i32x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
v4i32x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
v8i32x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
}
}
defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z">;
defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z">;
defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z">;
defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z">;
defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z">;
defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z">;
defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s">;
defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s">;
defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s">;
defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">;
defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">;
defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">;
// EXTLOAD patterns, implemented using vpmovz
multiclass avx512_ext_lowering<string InstrStr, X86VectorVTInfo To,
X86VectorVTInfo From, PatFrag LdFrag> {
def : Pat<(To.VT (LdFrag addr:$src)),
(!cast<Instruction>("VPMOVZX"#InstrStr#"rm") addr:$src)>;
def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src), To.RC:$src0)),
(!cast<Instruction>("VPMOVZX"#InstrStr#"rmk") To.RC:$src0,
To.KRC:$mask, addr:$src)>;
def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src),
To.ImmAllZerosV)),
(!cast<Instruction>("VPMOVZX"#InstrStr#"rmkz") To.KRC:$mask,
addr:$src)>;
}
let Predicates = [HasVLX, HasBWI] in {
defm : avx512_ext_lowering<"BWZ128", v8i16x_info, v16i8x_info, extloadvi8>;
defm : avx512_ext_lowering<"BWZ256", v16i16x_info, v16i8x_info, extloadvi8>;
}
let Predicates = [HasBWI] in {
defm : avx512_ext_lowering<"BWZ", v32i16_info, v32i8x_info, extloadvi8>;
}
let Predicates = [HasVLX, HasAVX512] in {
defm : avx512_ext_lowering<"BDZ128", v4i32x_info, v16i8x_info, extloadvi8>;
defm : avx512_ext_lowering<"BDZ256", v8i32x_info, v16i8x_info, extloadvi8>;
defm : avx512_ext_lowering<"BQZ128", v2i64x_info, v16i8x_info, extloadvi8>;
defm : avx512_ext_lowering<"BQZ256", v4i64x_info, v16i8x_info, extloadvi8>;
defm : avx512_ext_lowering<"WDZ128", v4i32x_info, v8i16x_info, extloadvi16>;
defm : avx512_ext_lowering<"WDZ256", v8i32x_info, v8i16x_info, extloadvi16>;
defm : avx512_ext_lowering<"WQZ128", v2i64x_info, v8i16x_info, extloadvi16>;
defm : avx512_ext_lowering<"WQZ256", v4i64x_info, v8i16x_info, extloadvi16>;
defm : avx512_ext_lowering<"DQZ128", v2i64x_info, v4i32x_info, extloadvi32>;
defm : avx512_ext_lowering<"DQZ256", v4i64x_info, v4i32x_info, extloadvi32>;
}
let Predicates = [HasAVX512] in {
defm : avx512_ext_lowering<"BDZ", v16i32_info, v16i8x_info, extloadvi8>;
defm : avx512_ext_lowering<"BQZ", v8i64_info, v16i8x_info, extloadvi8>;
defm : avx512_ext_lowering<"WDZ", v16i32_info, v16i16x_info, extloadvi16>;
defm : avx512_ext_lowering<"WQZ", v8i64_info, v8i16x_info, extloadvi16>;
defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>;
}
multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
SDNode InVecOp, PatFrag ExtLoad16> {
// 128-bit patterns
let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
}
// 256-bit patterns
let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
}
// 512-bit patterns
let Predicates = [HasBWI] in {
def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
}
let Predicates = [HasAVX512] in {
def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
}
}
defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag GatherNode> {
let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
ExeDomain = _.ExeDomain in
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
(ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
!strconcat(OpcodeStr#_.Suffix,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[(set _.RC:$dst, _.KRCWM:$mask_wb,
(GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
vectoraddr:$src2))]>, EVEX, EVEX_K,
EVEX_CD8<_.EltSize, CD8VT1>;
}
multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
}
}
multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
mgatherv16i32>, EVEX_V512;
defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
mgatherv8i64>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
vy256xmem, mgatherv8i32>, EVEX_V256;
defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
vy128xmem, mgatherv4i64>, EVEX_V256;
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
vx128xmem, mgatherv4i32>, EVEX_V128;
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
vx64xmem, X86mgatherv2i64>, EVEX_V128;
}
}
defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag ScatterNode> {
let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
(ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
!strconcat(OpcodeStr#_.Suffix,
"\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
[(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
_.KRCWM:$mask, vectoraddr:$dst))]>,
EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}
multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
}
}
multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
mscatterv16i32>, EVEX_V512;
defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
mscatterv8i64>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
vy256xmem, mscatterv8i32>, EVEX_V256;
defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
vy128xmem, mscatterv4i64>, EVEX_V256;
defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
vx128xmem, mscatterv4i32>, EVEX_V128;
defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
vx64xmem, mscatterv2i64>, EVEX_V128;
}
}
defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
// prefetch
multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
RegisterClass KRC, X86MemOperand memop> {
let Predicates = [HasPFI], hasSideEffects = 1 in
def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
!strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
[]>, EVEX, EVEX_K;
}
defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
// Helper fragments to match sext vXi1 to vXiY.
def v64i1sextv64i8 : PatLeaf<(v64i8
(X86vsext
(v64i1 (X86pcmpgtm
(bc_v64i8 (v16i32 immAllZerosV)),
VR512:$src))))>;
def v32i1sextv32i16 : PatLeaf<(v32i16 (X86vsrai VR512:$src, (i8 15)))>;
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
!strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
[(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
X86VectorVTInfo _> {
def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
(X86Info.VT (EXTRACT_SUBREG
(_.VT (!cast<Instruction>(NAME#"Zrr")
(_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
X86Info.SubRegIdx))>;
}
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
string OpcodeStr, Predicate prd> {
let Predicates = [prd] in
defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
}
let Predicates = [prd, NoVLX] in {
defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
}
}
defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))]>, EVEX;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
X86VectorVTInfo _> {
def : Pat<(_.KVT (X86cvt2mask (_.VT _.RC:$src))),
(_.KVT (COPY_TO_REGCLASS
(!cast<Instruction>(NAME#"Zrr")
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src, _.SubRegIdx)),
_.KRC))>;
}
multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
EVEX_V256;
defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
EVEX_V128;
}
let Predicates = [prd, NoVLX] in {
defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
}
}
defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
avx512vl_i8_info, HasBWI>;
defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
avx512vl_i16_info, HasBWI>, VEX_W;
defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
avx512vl_i32_info, HasDQI>;
defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
avx512vl_i64_info, HasDQI>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - COMPRESS and EXPAND
//
multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
let mayStore = 1, hasSideEffects = 0 in
def mr : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.RC:$src),
OpcodeStr # "\t{$src, $dst|$dst, $src}",
[]>, EVEX_CD8<_.EltSize, CD8VT1>;
def mrk : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
[]>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}
multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
(_.VT _.RC:$src)),
(!cast<Instruction>(NAME#_.ZSuffix##mrk)
addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
}
multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo> {
defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr>,
compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr>,
compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr>,
compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
}
}
defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>,
EVEX;
defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>,
EVEX, VEX_W;
defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>,
EVEX;
defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,
EVEX, VEX_W;
// expand
multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86expand (_.VT (bitconvert
(_.LdFrag addr:$src1)))))>,
AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
}
multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
(!cast<Instruction>(NAME#_.ZSuffix##rmkz)
_.KRCWM:$mask, addr:$src)>;
def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
(_.VT _.RC:$src0))),
(!cast<Instruction>(NAME#_.ZSuffix##rmk)
_.RC:$src0, _.KRCWM:$mask, addr:$src)>;
}
multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo> {
defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>,
expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>,
expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>,
expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
}
}
defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", avx512vl_i32_info>,
EVEX;
defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", avx512vl_i64_info>,
EVEX, VEX_W;
defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
EVEX;
defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
EVEX, VEX_W;
//handle instruction reg_vec1 = op(reg_vec,imm)
// op(mem_vec,imm)
// op(broadcast(eltVt),imm)
//all instruction created with FROUND_CURRENT
multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
let ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2),
(i32 FROUND_CURRENT))>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2),
(i32 FROUND_CURRENT))>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr##", $src2",
(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2),
(i32 FROUND_CURRENT))>, EVEX_B;
}
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
let ExeDomain = _.ExeDomain in
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
"$src1, {sae}, $src2",
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2),
(i32 FROUND_NO_EXC))>, EVEX_B;
}
multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
let Predicates = [prd] in {
defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
EVEX_V128;
defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
EVEX_V256;
}
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm)
//all instruction created with FROUND_CURRENT
multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
let ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>, EVEX_B;
}
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
let ExeDomain = DestInfo.ExeDomain in {
defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT SrcInfo.RC:$src2),
(i8 imm:$src3)))>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
(i8 imm:$src3)))>;
}
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm)
multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>:
avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
let ExeDomain = _.ExeDomain in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B;
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_scalar,imm)
//all instruction created with FROUND_CURRENT
multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>;
defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>;
}
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
let ExeDomain = _.ExeDomain in
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3),
(i32 FROUND_NO_EXC))>, EVEX_B;
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3),
(i32 FROUND_NO_EXC))>, EVEX_B;
}
multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
let Predicates = [prd] in {
defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
EVEX_V128;
defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
EVEX_V256;
}
}
multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
let Predicates = [HasBWI] in {
defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
}
let Predicates = [HasBWI, HasVLX] in {
defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info256,
SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
}
}
multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode>{
let Predicates = [HasAVX512] in {
defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
}
let Predicates = [HasAVX512, HasVLX] in {
defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
}
}
multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
X86VectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
let Predicates = [prd] in {
defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, _>,
avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNode, _>;
}
}
multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
bits<8> opcPs, bits<8> opcPd, SDNode OpNode, Predicate prd>{
defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
opcPs, OpNode, prd>, EVEX_CD8<32, CD8VF>;
defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W;
}
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
X86VReduce, HasDQI>, AVX512AIi8Base, EVEX;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
X86VRndScale, HasAVX512>, AVX512AIi8Base, EVEX;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
X86VGetMant, HasAVX512>, AVX512AIi8Base, EVEX;
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, HasDQI>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
0x50, X86VRange, HasDQI>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info,
0x51, X86VRange, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
0x51, X86VRange, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
0x57, X86Reduces, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
0x57, X86Reduces, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
0x27, X86GetMants, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
0x27, X86GetMants, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode = X86Shuf128>{
let Predicates = [HasAVX512] in {
defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
}
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
}
}
let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (ffloor VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
def : Pat<(v16f32 (fnearbyint VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
def : Pat<(v16f32 (fceil VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
def : Pat<(v16f32 (frint VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
def : Pat<(v16f32 (ftrunc VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
def : Pat<(v8f64 (ffloor VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
def : Pat<(v8f64 (fnearbyint VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
def : Pat<(v8f64 (fceil VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
def : Pat<(v8f64 (frint VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
def : Pat<(v8f64 (ftrunc VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
}
defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
let Predicates = [HasAVX512] in {
// Provide fallback in case the load node that is used in the broadcast
// patterns above is used by additional users, which prevents the pattern
// selection.
def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
(VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
0)>;
def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
(VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
0)>;
def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
(VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
0)>;
def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
(VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
0)>;
def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
(VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
0)>;
def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
(VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
0)>;
}
multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I> {
defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
AVX512AIi8Base, EVEX_4V;
}
defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>,
EVEX_CD8<32, CD8VF>;
defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>,
EVEX_CD8<64, CD8VF>, VEX_W;
multiclass avx512_vpalignr_lowering<X86VectorVTInfo _ , list<Predicate> p>{
let Predicates = p in
def NAME#_.VTName#rri:
Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
(!cast<Instruction>(NAME#_.ZSuffix#rri)
_.RC:$src1, _.RC:$src2, imm:$imm)>;
}
multiclass avx512_vpalignr_lowering_common<AVX512VLVectorVTInfo _>:
avx512_vpalignr_lowering<_.info512, [HasBWI]>,
avx512_vpalignr_lowering<_.info128, [HasBWI, HasVLX]>,
avx512_vpalignr_lowering<_.info256, [HasBWI, HasVLX]>;
defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
avx512vl_i8_info, avx512vl_i8_info>,
avx512_vpalignr_lowering_common<avx512vl_i16_info>,
avx512_vpalignr_lowering_common<avx512vl_i32_info>,
avx512_vpalignr_lowering_common<avx512vl_f32_info>,
avx512_vpalignr_lowering_common<avx512vl_i64_info>,
avx512_vpalignr_lowering_common<avx512vl_f64_info>,
EVEX_CD8<8, CD8VF>;
defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr,
"$src1", "$src1",
(_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1), OpcodeStr,
"$src1", "$src1",
(_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
}
}
multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> :
avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1), OpcodeStr,
"${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr,
(_.VT (OpNode (X86VBroadcast
(_.ScalarLdFrag addr:$src1))))>,
EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
}
multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
EVEX_V256;
defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>,
EVEX_V128;
}
}
multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
EVEX_V256;
defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
EVEX_V128;
}
}
multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
SDNode OpNode, Predicate prd> {
defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, avx512vl_i64_info,
prd>, VEX_W;
defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, avx512vl_i32_info,
prd>;
}
multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
SDNode OpNode, Predicate prd> {
defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, avx512vl_i16_info, prd>;
defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, avx512vl_i8_info, prd>;
}
multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
bits<8> opc_d, bits<8> opc_q,
string OpcodeStr, SDNode OpNode> {
defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
HasAVX512>,
avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
HasBWI>;
}
defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v4i64 (abs VR256X:$src)),
(EXTRACT_SUBREG
(VPABSQZrr
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
sub_ymm)>;
def : Pat<(v2i64 (abs VR128X:$src)),
(EXTRACT_SUBREG
(VPABSQZrr
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
sub_xmm)>;
}
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
}
defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
let Predicates = [HasCDI, NoVLX] in {
def : Pat<(v4i64 (ctlz VR256X:$src)),
(EXTRACT_SUBREG
(VPLZCNTQZrr
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
sub_ymm)>;
def : Pat<(v2i64 (ctlz VR128X:$src)),
(EXTRACT_SUBREG
(VPLZCNTQZrr
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
sub_xmm)>;
def : Pat<(v8i32 (ctlz VR256X:$src)),
(EXTRACT_SUBREG
(VPLZCNTDZrr
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
sub_ymm)>;
def : Pat<(v4i32 (ctlz VR128X:$src)),
(EXTRACT_SUBREG
(VPLZCNTDZrr
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
sub_xmm)>;
}
//===---------------------------------------------------------------------===//
// Counts number of ones - VPOPCNTD and VPOPCNTQ
//===---------------------------------------------------------------------===//
multiclass avx512_unary_rmb_popcnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo VTInfo> {
let Predicates = [HasVPOPCNTDQ] in
defm Z : avx512_unary_rmb<opc, OpcodeStr, ctpop, VTInfo>, EVEX_V512;
}
// Use 512bit version to implement 128/256 bit.
multiclass avx512_unary_lowering<SDNode OpNode, AVX512VLVectorVTInfo _, Predicate prd> {
let Predicates = [prd] in {
def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
(EXTRACT_SUBREG
(!cast<Instruction>(NAME # "Zrr")
(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
_.info256.RC:$src1,
_.info256.SubRegIdx)),
_.info256.SubRegIdx)>;
def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
(EXTRACT_SUBREG
(!cast<Instruction>(NAME # "Zrr")
(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
_.info128.RC:$src1,
_.info128.SubRegIdx)),
_.info128.SubRegIdx)>;
}
}
defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", v16i32_info>,
avx512_unary_lowering<ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>,
avx512_unary_lowering<ctpop, avx512vl_i64_info, HasVPOPCNTDQ>, VEX_W;
//===---------------------------------------------------------------------===//
// Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, avx512vl_f32_info,
HasAVX512>, XS;
}
defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
//===----------------------------------------------------------------------===//
// AVX-512 - MOVDDUP
//===----------------------------------------------------------------------===//
multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
(_.VT (OpNode (_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src)))))>,
EVEX, EVEX_CD8<_.EltSize, CD8VH>;
}
}
multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo> {
defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
EVEX_V256;
defm Z128 : avx512_movddup_128<opc, OpcodeStr, OpNode, VTInfo.info128>,
EVEX_V128;
}
}
multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode,
avx512vl_f64_info>, XD, VEX_W;
}
defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
let Predicates = [HasVLX] in {
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(VMOVDDUPZ128rm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
(VMOVDDUPZ128rm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
(bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
(COPY_TO_REGCLASS FR64X:$src, VR128X))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
(bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}
//===----------------------------------------------------------------------===//
// AVX-512 - Unpack Instructions
//===----------------------------------------------------------------------===//
defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
SSE_ALU_ITINS_S>;
defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
SSE_ALU_ITINS_S>;
defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
SSE_INTALU_ITINS_P, HasBWI>;
defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
SSE_INTALU_ITINS_P, HasBWI>;
defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
SSE_INTALU_ITINS_P, HasBWI>;
defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
SSE_INTALU_ITINS_P, HasBWI>;
defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
SSE_INTALU_ITINS_P, HasAVX512>;
defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
SSE_INTALU_ITINS_P, HasAVX512>;
defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
SSE_INTALU_ITINS_P, HasAVX512>;
defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
SSE_INTALU_ITINS_P, HasAVX512>;
//===----------------------------------------------------------------------===//
// AVX-512 - Extract & Insert Integer Instructions
//===----------------------------------------------------------------------===//
multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
def mr : AVX512Ii8<opc, MRMDestMem, (outs),
(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (_.EltVT (trunc (assertzext (OpNode (_.VT _.RC:$src1),
imm:$src2)))),
addr:$dst)]>,
EVEX, EVEX_CD8<_.EltSize, CD8VT1>;
}
multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
let Predicates = [HasBWI] in {
def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst,
(X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
EVEX, TAPD;
defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
}
}
multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
let Predicates = [HasBWI] in {
def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst,
(X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
EVEX, PD;
let hasSideEffects = 0 in
def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
EVEX, TAPD, FoldGenData<NAME#rr>;
defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
}
}
multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
RegisterClass GRC> {
let Predicates = [HasDQI] in {
def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GRC:$dst,
(extractelt (_.VT _.RC:$src1), imm:$src2))]>,
EVEX, TAPD;
def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (_.VT _.RC:$src1),
imm:$src2),addr:$dst)]>,
EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD;
}
}
defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>;
defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>;
defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, PatFrag LdFrag> {
def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
}
multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, PatFrag LdFrag> {
let Predicates = [HasBWI] in {
def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V;
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
}
}
multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, RegisterClass GRC> {
let Predicates = [HasDQI] in {
def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, GRC:$src2, u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
EVEX_4V, TAPD;
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
_.ScalarLdFrag>, TAPD;
}
}
defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
extloadi8>, TAPD;
defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
extloadi16>, PD;
defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
//===----------------------------------------------------------------------===//
// VSHUFPS - VSHUFPD Operations
//===----------------------------------------------------------------------===//
multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
AVX512VLVectorVTInfo VTInfo_FP>{
defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp>,
EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
AVX512AIi8Base, EVEX_4V;
}
defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - Byte shift Left/Right
//===----------------------------------------------------------------------===//
multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
Format MRMm, string OpcodeStr, X86VectorVTInfo _>{
def rr : AVX512<opc, MRMr,
(outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>;
def rm : AVX512<opc, MRMm,
(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))))]>;
}
multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
Format MRMm, string OpcodeStr, Predicate prd>{
let Predicates = [prd] in
defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
OpcodeStr, v64i8_info>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
OpcodeStr, v32i8x_info>, EVEX_V256;
defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
OpcodeStr, v16i8x_info>, EVEX_V128;
}
}
defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
HasBWI>, AVX512PDIi8Base, EVEX_4V;
defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
HasBWI>, AVX512PDIi8Base, EVEX_4V;
multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
string OpcodeStr, X86VectorVTInfo _dst,
X86VectorVTInfo _src>{
def rr : AVX512BI<opc, MRMSrcReg,
(outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _dst.RC:$dst,(_dst.VT
(OpNode (_src.VT _src.RC:$src1),
(_src.VT _src.RC:$src2))))]>;
def rm : AVX512BI<opc, MRMSrcMem,
(outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _dst.RC:$dst,(_dst.VT
(OpNode (_src.VT _src.RC:$src1),
(_src.VT (bitconvert
(_src.LdFrag addr:$src2))))))]>;
}
multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
string OpcodeStr, Predicate prd> {
let Predicates = [prd] in
defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v8i64_info,
v64i8_info>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v4i64x_info,
v32i8x_info>, EVEX_V256;
defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v2i64x_info,
v16i8x_info>, EVEX_V128;
}
}
defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
HasBWI>, EVEX_4V;
// Transforms to swizzle an immediate to enable better matching when
// memory operand isn't in the right place.
def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
// Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
uint8_t Imm = N->getZExtValue();
// Swap bits 1/4 and 3/6.
uint8_t NewImm = Imm & 0xa5;
if (Imm & 0x02) NewImm |= 0x10;
if (Imm & 0x10) NewImm |= 0x02;
if (Imm & 0x08) NewImm |= 0x40;
if (Imm & 0x40) NewImm |= 0x08;
return getI8Imm(NewImm, SDLoc(N));
}]>;
def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
// Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
uint8_t Imm = N->getZExtValue();
// Swap bits 2/4 and 3/5.
uint8_t NewImm = Imm & 0xc3;
if (Imm & 0x04) NewImm |= 0x10;
if (Imm & 0x10) NewImm |= 0x04;
if (Imm & 0x08) NewImm |= 0x20;
if (Imm & 0x20) NewImm |= 0x08;
return getI8Imm(NewImm, SDLoc(N));
}]>;
def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
// Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
uint8_t Imm = N->getZExtValue();
// Swap bits 1/2 and 5/6.
uint8_t NewImm = Imm & 0x99;
if (Imm & 0x02) NewImm |= 0x04;
if (Imm & 0x04) NewImm |= 0x02;
if (Imm & 0x20) NewImm |= 0x40;
if (Imm & 0x40) NewImm |= 0x20;
return getI8Imm(NewImm, SDLoc(N));
}]>;
def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
// Convert a VPTERNLOG immediate by moving operand 1 to the end.
uint8_t Imm = N->getZExtValue();
// Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
uint8_t NewImm = Imm & 0x81;
if (Imm & 0x02) NewImm |= 0x04;
if (Imm & 0x04) NewImm |= 0x10;
if (Imm & 0x08) NewImm |= 0x40;
if (Imm & 0x10) NewImm |= 0x02;
if (Imm & 0x20) NewImm |= 0x08;
if (Imm & 0x40) NewImm |= 0x20;
return getI8Imm(NewImm, SDLoc(N));
}]>;
def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
// Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
uint8_t Imm = N->getZExtValue();
// Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
uint8_t NewImm = Imm & 0x81;
if (Imm & 0x02) NewImm |= 0x10;
if (Imm & 0x04) NewImm |= 0x02;
if (Imm & 0x08) NewImm |= 0x20;
if (Imm & 0x10) NewImm |= 0x04;
if (Imm & 0x20) NewImm |= 0x40;
if (Imm & 0x40) NewImm |= 0x08;
return getI8Imm(NewImm, SDLoc(N));
}]>;
multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT _.RC:$src3),
(i8 imm:$src4)), 1, 1>, AVX512AIi8Base, EVEX_4V;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (bitconvert (_.LdFrag addr:$src3))),
(i8 imm:$src4)), 1, 0>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
"$src2, ${src3}"##_.BroadcastStr##", $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
(i8 imm:$src4)), 1, 0>, EVEX_B,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
}// Constraints = "$src1 = $dst"
// Additional patterns for matching passthru operand in other positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
// Additional patterns for matching loads in other positions.
def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
(!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4))),
(!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
// Additional patterns for matching zero masking with loads in other
// positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
// Additional patterns for matching masked loads with different
// operand orders.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1,
(bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src1, (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
// Additional patterns for matching broadcasts in other positions.
def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
(!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4))),
(!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
// Additional patterns for matching zero masking with broadcasts in other
// positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
(VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
(VPTERNLOG132_imm8 imm:$src4))>;
// Additional patterns for matching masked broadcasts with different
// operand orders.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
(i8 imm:$src4)), _.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src1, (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
}
multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
let Predicates = [HasAVX512] in
defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128;
defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256;
}
}
defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - FixupImm
//===----------------------------------------------------------------------===//
multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.IntVT _.RC:$src3),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.IntVT (bitconvert (_.LdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
"$src2, ${src3}"##_.BroadcastStr##", $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>, EVEX_B;
} // Constraints = "$src1 = $dst"
}
multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
"$src2, $src3, {sae}, $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.IntVT _.RC:$src3),
(i32 imm:$src4),
(i32 FROUND_NO_EXC))>, EVEX_B;
}
}
multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, X86VectorVTInfo _src3VT> {
let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>;
defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
"$src2, $src3, {sae}, $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3),
(i32 imm:$src4),
(i32 FROUND_NO_EXC))>, EVEX_B;
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT (scalar_to_vector
(_src3VT.ScalarLdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>;
}
}
multiclass avx512_fixupimm_packed_all<AVX512VLVectorVTInfo _Vec>{
let Predicates = [HasAVX512] in
defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
AVX512AIi8Base, EVEX_4V, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info128>,
AVX512AIi8Base, EVEX_4V, EVEX_V128;
defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info256>,
AVX512AIi8Base, EVEX_4V, EVEX_V256;
}
}
defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
f32x_info, v4i32x_info>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
f64x_info, v2i64x_info>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VFIXUPIMMPS : avx512_fixupimm_packed_all<avx512vl_f32_info>,
EVEX_CD8<32, CD8VF>;
defm VFIXUPIMMPD : avx512_fixupimm_packed_all<avx512vl_f64_info>,
EVEX_CD8<64, CD8VF>, VEX_W;
// Patterns used to select SSE scalar fp arithmetic instructions from
// either:
//
// (1) a scalar fp operation followed by a blend
//
// The effect is that the backend no longer emits unnecessary vector
// insert instructions immediately after SSE scalar fp instructions
// like addss or mulss.
//
// For example, given the following code:
// __m128 foo(__m128 A, __m128 B) {
// A[0] += B[0];
// return A;
// }
//
// Previously we generated:
// addss %xmm0, %xmm1
// movss %xmm1, %xmm0
//
// We now generate:
// addss %xmm1, %xmm0
//
// (2) a vector packed single/double fp operation followed by a vector insert
//
// The effect is that the backend converts the packed fp instruction
// followed by a vector insert into a single SSE scalar fp instruction.
//
// For example, given the following code:
// __m128 foo(__m128 A, __m128 B) {
// __m128 C = A + B;
// return (__m128) {c[0], a[1], a[2], a[3]};
// }
//
// Previously we generated:
// addps %xmm0, %xmm1
// movss %xmm1, %xmm0
//
// We now generate:
// addss %xmm1, %xmm0
// TODO: Some canonicalization in lowering would simplify the number of
// patterns we have to try to match.
multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [HasAVX512] in {
// extracted scalar math op with insert via movss
def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
(Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
FR32X:$src))))),
(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32X:$src, VR128X))>;
// extracted scalar math op with insert via blend
def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
(Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
FR32X:$src))), (i8 1))),
(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32X:$src, VR128X))>;
// vector math op with insert via movss
def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
(Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
// vector math op with insert via blend
def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst),
(Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)), (i8 1))),
(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
// extracted masked scalar math op with insert via movss
def : Pat<(X86Movss (v4f32 VR128X:$src1),
(scalar_to_vector
(X86selects VK1WM:$mask,
(Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
FR32X:$src2),
FR32X:$src0))),
(!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
VK1WM:$mask, v4f32:$src1,
(COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
}
}
defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [HasAVX512] in {
// extracted scalar math op with insert via movsd
def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
(Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
FR64X:$src))))),
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64X:$src, VR128X))>;
// extracted scalar math op with insert via blend
def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
(Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
FR64X:$src))), (i8 1))),
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64X:$src, VR128X))>;
// vector math op with insert via movsd
def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
(Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
// vector math op with insert via blend
def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst),
(Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)), (i8 1))),
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
// extracted masked scalar math op with insert via movss
def : Pat<(X86Movsd (v2f64 VR128X:$src1),
(scalar_to_vector
(X86selects VK1WM:$mask,
(Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
FR64X:$src2),
FR64X:$src0))),
(!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
VK1WM:$mask, v2f64:$src1,
(COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
}
}
defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
Index: head/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
===================================================================
--- head/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td (revision 322854)
+++ head/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td (revision 322855)
@@ -1,2687 +1,275 @@
//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the machine model for Sandy Bridge to support instruction
// scheduling and other instruction cost heuristics.
//
//===----------------------------------------------------------------------===//
def SandyBridgeModel : SchedMachineModel {
// All x86 instructions are modeled as a single micro-op, and SB can decode 4
// instructions per cycle.
// FIXME: Identify instructions that aren't a single fused micro-op.
let IssueWidth = 4;
let MicroOpBufferSize = 168; // Based on the reorder buffer.
let LoadLatency = 4;
let MispredictPenalty = 16;
// Based on the LSD (loop-stream detector) queue size.
let LoopMicroOpBufferSize = 28;
- // This flag is set to allow the scheduler to assign
- // a default model to unrecognized opcodes.
+ // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
+ // the scheduler to assign a default model to unrecognized opcodes.
let CompleteModel = 0;
}
let SchedModel = SandyBridgeModel in {
// Sandy Bridge can issue micro-ops to 6 different ports in one cycle.
// Ports 0, 1, and 5 handle all computation.
def SBPort0 : ProcResource<1>;
def SBPort1 : ProcResource<1>;
def SBPort5 : ProcResource<1>;
// Ports 2 and 3 are identical. They handle loads and the address half of
// stores.
def SBPort23 : ProcResource<2>;
// Port 4 gets the data half of stores. Store data can be available later than
// the store address, but since we don't model the latency of stores, we can
// ignore that.
def SBPort4 : ProcResource<1>;
// Many micro-ops are capable of issuing on multiple ports.
-def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>;
def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>;
def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>;
def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
// 54 Entry Unified Scheduler
def SBPortAny : ProcResGroup<[SBPort0, SBPort1, SBPort23, SBPort4, SBPort5]> {
let BufferSize=54;
}
// Integer division issued on port 0.
def SBDivider : ProcResource<1>;
// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 4>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
// This multiclass defines the resource usage for variants with and without
// folded loads.
multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
ProcResourceKind ExePort,
int Lat> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
// Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
// latency.
def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> {
let Latency = !add(Lat, 4);
}
}
// A folded store needs a cycle on port 4 for the store data, but it does not
// need an extra port 2/3 cycle to recompute the address.
def : WriteRes<WriteRMW, [SBPort4]>;
def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 4; }
def : WriteRes<WriteMove, [SBPort015]>;
def : WriteRes<WriteZero, []>;
defm : SBWriteResPair<WriteALU, SBPort015, 1>;
defm : SBWriteResPair<WriteIMul, SBPort1, 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : SBWriteResPair<WriteShift, SBPort05, 1>;
defm : SBWriteResPair<WriteJump, SBPort5, 1>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
// the port to read all inputs. We don't model that.
def : WriteRes<WriteLEA, [SBPort15]>;
// This is quite rough, latency depends on the dividend.
def : WriteRes<WriteIDiv, [SBPort0, SBDivider]> {
let Latency = 25;
let ResourceCycles = [1, 10];
}
def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> {
let Latency = 29;
let ResourceCycles = [1, 1, 10];
}
// Scalar and vector floating point.
defm : SBWriteResPair<WriteFAdd, SBPort1, 3>;
defm : SBWriteResPair<WriteFMul, SBPort0, 5>;
-defm : SBWriteResPair<WriteFDiv, SBPort0, 24>;
+defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles.
defm : SBWriteResPair<WriteFRcp, SBPort0, 5>;
defm : SBWriteResPair<WriteFRsqrt, SBPort0, 5>;
-defm : SBWriteResPair<WriteFSqrt, SBPort0, 14>;
+defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>;
defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
defm : SBWriteResPair<WriteFShuffle, SBPort5, 1>;
defm : SBWriteResPair<WriteFBlend, SBPort05, 1>;
def : WriteRes<WriteFVarBlend, [SBPort0, SBPort5]> {
let Latency = 2;
let ResourceCycles = [1, 1];
}
def : WriteRes<WriteFVarBlendLd, [SBPort0, SBPort5, SBPort23]> {
let Latency = 6;
let ResourceCycles = [1, 1, 1];
}
// Vector integer operations.
-defm : SBWriteResPair<WriteVecShift, SBPort5, 1>;
-defm : SBWriteResPair<WriteVecLogic, SBPort5, 1>;
-defm : SBWriteResPair<WriteVecALU, SBPort1, 3>;
+defm : SBWriteResPair<WriteVecShift, SBPort05, 1>;
+defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
+defm : SBWriteResPair<WriteVecALU, SBPort15, 1>;
defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>;
-defm : SBWriteResPair<WriteShuffle, SBPort5, 1>;
+defm : SBWriteResPair<WriteShuffle, SBPort15, 1>;
defm : SBWriteResPair<WriteBlend, SBPort15, 1>;
def : WriteRes<WriteVarBlend, [SBPort1, SBPort5]> {
let Latency = 2;
let ResourceCycles = [1, 1];
}
def : WriteRes<WriteVarBlendLd, [SBPort1, SBPort5, SBPort23]> {
let Latency = 6;
let ResourceCycles = [1, 1, 1];
}
-def : WriteRes<WriteMPSAD, [SBPort0,SBPort15]> {
- let Latency = 5;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+def : WriteRes<WriteMPSAD, [SBPort0, SBPort1, SBPort5]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 1, 1];
}
-def : WriteRes<WriteMPSADLd, [SBPort0,SBPort23,SBPort15]> {
- let Latency = 11;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
+def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 1, 1, 1];
}
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
// HADD, HSUB PS/PD
// x,x / v,v,v.
def : WriteRes<WriteFHAdd, [SBPort1]> {
let Latency = 3;
}
// x,m / v,v,m.
def : WriteRes<WriteFHAddLd, [SBPort1, SBPort23]> {
let Latency = 7;
let ResourceCycles = [1, 1];
}
// PHADD|PHSUB (S) W/D.
// v <- v,v.
def : WriteRes<WritePHAdd, [SBPort15]>;
// v <- v,m.
def : WriteRes<WritePHAddLd, [SBPort15, SBPort23]> {
let Latency = 5;
let ResourceCycles = [1, 1];
}
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
def : WriteRes<WritePCmpIStrM, [SBPort015]> {
let Latency = 11;
let ResourceCycles = [3];
}
def : WriteRes<WritePCmpIStrMLd, [SBPort015, SBPort23]> {
let Latency = 11;
let ResourceCycles = [3, 1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [SBPort015]> {
let Latency = 11;
let ResourceCycles = [8];
}
def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
let Latency = 11;
let ResourceCycles = [7, 1];
}
// Packed Compare Implicit Length Strings, Return Index
-def : WriteRes<WritePCmpIStrI, [SBPort0]> {
- let Latency = 11;
- let NumMicroOps = 3;
+def : WriteRes<WritePCmpIStrI, [SBPort015]> {
+ let Latency = 3;
let ResourceCycles = [3];
}
-def : WriteRes<WritePCmpIStrILd, [SBPort0,SBPort23]> {
- let Latency = 17;
- let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+def : WriteRes<WritePCmpIStrILd, [SBPort015, SBPort23]> {
+ let Latency = 3;
+ let ResourceCycles = [3, 1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [SBPort015]> {
let Latency = 4;
let ResourceCycles = [8];
}
def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
let Latency = 4;
let ResourceCycles = [7, 1];
}
// AES Instructions.
-def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+def : WriteRes<WriteAESDecEnc, [SBPort015]> {
+ let Latency = 8;
+ let ResourceCycles = [2];
}
-def : WriteRes<WriteAESDecEncLd, [SBPort5,SBPort23,SBPort015]> {
- let Latency = 13;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+def : WriteRes<WriteAESDecEncLd, [SBPort015, SBPort23]> {
+ let Latency = 8;
+ let ResourceCycles = [2, 1];
}
-def : WriteRes<WriteAESIMC, [SBPort5]> {
- let Latency = 12;
- let NumMicroOps = 2;
+def : WriteRes<WriteAESIMC, [SBPort015]> {
+ let Latency = 8;
let ResourceCycles = [2];
}
-def : WriteRes<WriteAESIMCLd, [SBPort5,SBPort23]> {
- let Latency = 18;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+def : WriteRes<WriteAESIMCLd, [SBPort015, SBPort23]> {
+ let Latency = 8;
+ let ResourceCycles = [2, 1];
}
def : WriteRes<WriteAESKeyGen, [SBPort015]> {
let Latency = 8;
let ResourceCycles = [11];
}
def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> {
let Latency = 8;
let ResourceCycles = [10, 1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [SBPort015]> {
let Latency = 14;
let ResourceCycles = [18];
}
def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> {
let Latency = 14;
let ResourceCycles = [17, 1];
}
def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
def : WriteRes<WriteFence, [SBPort23, SBPort4]>;
def : WriteRes<WriteNop, []>;
// AVX2 is not supported on that architecture, but we should define the basic
// scheduling resources anyway.
defm : SBWriteResPair<WriteFShuffle256, SBPort0, 1>;
defm : SBWriteResPair<WriteShuffle256, SBPort0, 1>;
defm : SBWriteResPair<WriteVarVecShift, SBPort0, 1>;
-
-// Remaining SNB instrs.
-
-def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSrr")>;
-
-def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup1], (instregex "COMP_FST0r")>;
-def: InstRW<[SBWriteResGroup1], (instregex "COM_FST0r")>;
-def: InstRW<[SBWriteResGroup1], (instregex "UCOM_FPr")>;
-def: InstRW<[SBWriteResGroup1], (instregex "UCOM_Fr")>;
-
-def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup2], (instregex "ANDNPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ANDNPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ANDPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ANDPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP")>;
-def: InstRW<[SBWriteResGroup2], (instregex "FFREE")>;
-def: InstRW<[SBWriteResGroup2], (instregex "FINCSTP")>;
-def: InstRW<[SBWriteResGroup2], (instregex "FNOP")>;
-def: InstRW<[SBWriteResGroup2], (instregex "INSERTPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "JMP64r")>;
-def: InstRW<[SBWriteResGroup2], (instregex "LD_Frr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOV64toPQIrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVAPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVAPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVDDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVDI2PDIrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVHLPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVLHPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVSDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVSHDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVSLDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVSSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVUPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVUPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ORPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ORPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "RETQ")>;
-def: InstRW<[SBWriteResGroup2], (instregex "SHUFPDrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "SHUFPSrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ST_FPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ST_Frr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VEXTRACTF128rr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VINSERTF128rr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VINSERTPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VORPDYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VORPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VORPSYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VORPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrm")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrm")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDYrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSYrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VXORPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VXORPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "XORPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "XORPSrr")>;
-
-def SBWriteResGroup3 : SchedWriteRes<[SBPort01]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup3], (instregex "LEA64_32r")>;
-
-def SBWriteResGroup4 : SchedWriteRes<[SBPort0]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup4], (instregex "BLENDPDrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BLENDPSrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BT32ri8")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BT32rr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTC32ri8")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTC32rr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTR32ri8")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTR32rr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTS32ri8")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTS32rr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "CDQ")>;
-def: InstRW<[SBWriteResGroup4], (instregex "CQO")>;
-def: InstRW<[SBWriteResGroup4], (instregex "LAHF")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SAHF")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SAR32ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SAR8ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETAEr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETBr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETEr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETGEr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETGr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETLEr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETLr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETNEr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETNOr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETNPr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETNSr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETOr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETPr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETSr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHL32ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHL64r1")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHL8r1")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHL8ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHR32ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHR8ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDYrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSYrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQAYrr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQArr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUYrr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUrr")>;
-
-def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup5], (instregex "KORTESTBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSDrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSWrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PADDQirr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNR64irr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSHUFBrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNBrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNDrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNWrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PABSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PABSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PABSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PACKSSDWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PACKSSWBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PACKUSDWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PACKUSWBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDUSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDUSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PALIGNRrri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PAVGBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PAVGWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PBLENDWrri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXUBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXUDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXUWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINUBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINUDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINUWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFDri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFHWri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFLWri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSIGNBrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSIGNDrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSIGNWrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSLLDQri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSRLDQri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHQDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLQDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VMASKMOVPSYrm")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPABSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPABSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPABSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSDWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSWBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSDWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSWBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPADDBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPADDDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPADDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPALIGNRrri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPAVGBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPAVGWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPBLENDWrri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINUBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINUDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINUWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFDri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFLWri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNBrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNDrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNWrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSLLDQri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSRLDQri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLQDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLWDrr")>;
-
-def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup6], (instregex "ADD32ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "ADD32rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "ADD8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "ADD8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "AND32ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "AND64ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "AND64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "AND8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "AND8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CBW")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMC")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMP16ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMP32i32")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMP64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMP8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMP8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CWDE")>;
-def: InstRW<[SBWriteResGroup6], (instregex "DEC64r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "DEC8r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "INC64r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "INC8r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVD64from64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOV32rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOV8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOV8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVDQArr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVDQUrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVPQI2QIrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr16")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr16")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "NEG64r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "NEG8r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "NOT64r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "NOT8r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "OR64ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "OR64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "OR8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "OR8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "PANDNrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "PANDrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "PORrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "PXORrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "STC")>;
-def: InstRW<[SBWriteResGroup6], (instregex "SUB64ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "SUB64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "SUB8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "SUB8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "TEST64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "TEST8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "TEST8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQI2QIrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VPANDNrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VPANDrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VPORrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VPXORrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "XOR32rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "XOR64ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "XOR8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "XOR8rr")>;
-
-def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> {
- let Latency = 2;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPDrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPSrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "MOVPDI2DIrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "MOVPQIto64rr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "PMOVMSKBrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDYrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPSrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "VMOVPDI2DIrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "VMOVPQIto64rr")>;
-
-def SBWriteResGroup9 : SchedWriteRes<[SBPort0]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0")>;
-def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPSrr0")>;
-def: InstRW<[SBWriteResGroup9], (instregex "ROL32ri")>;
-def: InstRW<[SBWriteResGroup9], (instregex "ROL8ri")>;
-def: InstRW<[SBWriteResGroup9], (instregex "ROR32ri")>;
-def: InstRW<[SBWriteResGroup9], (instregex "ROR8ri")>;
-def: InstRW<[SBWriteResGroup9], (instregex "SETAr")>;
-def: InstRW<[SBWriteResGroup9], (instregex "SETBEr")>;
-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDYrr")>;
-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDrr")>;
-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSYrr")>;
-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSrr")>;
-
-def SBWriteResGroup10 : SchedWriteRes<[SBPort15]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SBWriteResGroup10], (instregex "VPBLENDVBrr")>;
-
-def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SBWriteResGroup11], (instregex "SCASB")>;
-def: InstRW<[SBWriteResGroup11], (instregex "SCASL")>;
-def: InstRW<[SBWriteResGroup11], (instregex "SCASQ")>;
-def: InstRW<[SBWriteResGroup11], (instregex "SCASW")>;
-
-def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup12], (instregex "COMISDrr")>;
-def: InstRW<[SBWriteResGroup12], (instregex "COMISSrr")>;
-def: InstRW<[SBWriteResGroup12], (instregex "UCOMISDrr")>;
-def: InstRW<[SBWriteResGroup12], (instregex "UCOMISSrr")>;
-def: InstRW<[SBWriteResGroup12], (instregex "VCOMISDrr")>;
-def: InstRW<[SBWriteResGroup12], (instregex "VCOMISSrr")>;
-def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISDrr")>;
-def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISSrr")>;
-
-def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup13], (instregex "CVTPS2PDrr")>;
-def: InstRW<[SBWriteResGroup13], (instregex "PTESTrr")>;
-def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDYrr")>;
-def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDrr")>;
-def: InstRW<[SBWriteResGroup13], (instregex "VPTESTYrr")>;
-def: InstRW<[SBWriteResGroup13], (instregex "VPTESTrr")>;
-
-def SBWriteResGroup14 : SchedWriteRes<[SBPort0,SBPort15]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup14], (instregex "PSLLDrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSLLQrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSLLWrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSRADrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSRAWrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSRLDrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSRLQrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSRLWrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "VPSRADrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "VPSRAWrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "VPSRLDrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "VPSRLQrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "VPSRLWrr")>;
-
-def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup15], (instregex "FNSTSW16r")>;
-
-def SBWriteResGroup16 : SchedWriteRes<[SBPort1,SBPort0]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup16], (instregex "BSWAP32r")>;
-
-def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup17], (instregex "PINSRBrr")>;
-def: InstRW<[SBWriteResGroup17], (instregex "PINSRDrr")>;
-def: InstRW<[SBWriteResGroup17], (instregex "PINSRQrr")>;
-def: InstRW<[SBWriteResGroup17], (instregex "PINSRWrri")>;
-def: InstRW<[SBWriteResGroup17], (instregex "VPINSRBrr")>;
-def: InstRW<[SBWriteResGroup17], (instregex "VPINSRDrr")>;
-def: InstRW<[SBWriteResGroup17], (instregex "VPINSRQrr")>;
-def: InstRW<[SBWriteResGroup17], (instregex "VPINSRWrri")>;
-
-def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
-
-def SBWriteResGroup19 : SchedWriteRes<[SBPort0,SBPort015]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup19], (instregex "ADC64ri8")>;
-def: InstRW<[SBWriteResGroup19], (instregex "ADC64rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "ADC8ri")>;
-def: InstRW<[SBWriteResGroup19], (instregex "ADC8rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVAE32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVB32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVE32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVG32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVGE32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVL32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVLE32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNE32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNO32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNP32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNS32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVO32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVP32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVS32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SBB32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SBB64ri8")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SBB8ri")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SBB8rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SHLD32rri8")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SHRD32rri8")>;
-
-def SBWriteResGroup20 : SchedWriteRes<[SBPort0]> {
- let Latency = 3;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMADDUBSWrr64")>;
-def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULHRSWrr64")>;
-def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULUDQirr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMADDUBSWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMADDWDrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULDQrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULHRSWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULHUWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULHWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULLDrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULLWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULUDQrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PSADBWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VMOVMSKPSYrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMADDUBSWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMADDWDrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMULDQrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMULHRSWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMULHWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMULLDrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMULLWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPSADBWrr")>;
-
-def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
- let Latency = 3;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup21], (instregex "ADDPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADDPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADDSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADDSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADD_FST0r")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADD_FrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "BSF32rr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "BSR32rr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CMPPDrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CMPSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r32")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r8")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MAXPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MAXPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MAXSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MAXSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MINPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MINPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MINSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MINSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPS2PIirr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTTPS2PIirr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MUL8r")>;
-def: InstRW<[SBWriteResGroup21], (instregex "POPCNT32rr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPDr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPSr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSDr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSSr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FPrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FST0r")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUB_FPrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUB_FST0r")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUB_FrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDPDYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDPSYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VBROADCASTF128")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDYrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSYrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMINPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMINPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMINSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMINSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPDr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPSr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSDr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSrr")>;
-
-def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
- let Latency = 3;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup22], (instregex "EXTRACTPSrr")>;
-def: InstRW<[SBWriteResGroup22], (instregex "VEXTRACTPSrr")>;
-
-def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> {
- let Latency = 3;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup23], (instregex "PEXTRBrr")>;
-def: InstRW<[SBWriteResGroup23], (instregex "PEXTRDrr")>;
-def: InstRW<[SBWriteResGroup23], (instregex "PEXTRQrr")>;
-def: InstRW<[SBWriteResGroup23], (instregex "PEXTRWri")>;
-def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRBrr")>;
-def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRDrr")>;
-def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRQrr")>;
-def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRWri")>;
-def: InstRW<[SBWriteResGroup23], (instregex "SHL64rCL")>;
-def: InstRW<[SBWriteResGroup23], (instregex "SHL8rCL")>;
-
-def SBWriteResGroup24 : SchedWriteRes<[SBPort15]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDSWrr64")>;
-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDWrr64")>;
-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDrr64")>;
-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBDrr64")>;
-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBSWrr64")>;
-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBWrr64")>;
-def: InstRW<[SBWriteResGroup24], (instregex "PHADDDrr")>;
-def: InstRW<[SBWriteResGroup24], (instregex "PHADDSWrr128")>;
-def: InstRW<[SBWriteResGroup24], (instregex "PHADDWrr")>;
-def: InstRW<[SBWriteResGroup24], (instregex "PHSUBDrr")>;
-def: InstRW<[SBWriteResGroup24], (instregex "PHSUBSWrr128")>;
-def: InstRW<[SBWriteResGroup24], (instregex "PHSUBWrr")>;
-def: InstRW<[SBWriteResGroup24], (instregex "VPHADDDrr")>;
-def: InstRW<[SBWriteResGroup24], (instregex "VPHADDSWrr128")>;
-def: InstRW<[SBWriteResGroup24], (instregex "VPHADDWrr")>;
-def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBDrr")>;
-def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBSWrr128")>;
-def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBWrr")>;
-
-def SBWriteResGroup25 : SchedWriteRes<[SBPort015]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SBWriteResGroup25], (instregex "LEAVE64")>;
-def: InstRW<[SBWriteResGroup25], (instregex "XADD32rr")>;
-def: InstRW<[SBWriteResGroup25], (instregex "XADD8rr")>;
-
-def SBWriteResGroup26 : SchedWriteRes<[SBPort0,SBPort015]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SBWriteResGroup26], (instregex "CMOVA32rr")>;
-def: InstRW<[SBWriteResGroup26], (instregex "CMOVBE32rr")>;
-
-def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup27], (instregex "MUL64r")>;
-
-def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup28], (instregex "CVTDQ2PDrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2DQrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2PSrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "CVTSD2SSrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SD64rr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SDrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "CVTTPD2DQrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPD2PIirr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPI2PDirr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTTPD2PIirr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDYrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQYrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSYrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SD64rr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SDrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQYrr")>;
-def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQrr")>;
-
-def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>;
-def: InstRW<[SBWriteResGroup29], (instregex "PAUSE")>;
-
-def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
- let Latency = 5;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup30], (instregex "MULPDrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "MULPSrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "MULSDrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "MULSSrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "MUL_FPrST0")>;
-def: InstRW<[SBWriteResGroup30], (instregex "MUL_FST0r")>;
-def: InstRW<[SBWriteResGroup30], (instregex "MUL_FrST0")>;
-def: InstRW<[SBWriteResGroup30], (instregex "PCMPGTQrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "PHMINPOSUWrr128")>;
-def: InstRW<[SBWriteResGroup30], (instregex "RCPPSr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "RCPSSr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "RSQRTPSr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "RSQRTSSr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "VMULPDYrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "VMULPDrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "VMULPSYrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "VMULPSrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "VMULSDrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "VMULSSrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "VPCMPGTQrr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "VPHMINPOSUWrr128")>;
-def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTPSr")>;
-def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTSSr")>;
-
-def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
- let Latency = 5;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup31], (instregex "MOV32rm")>;
-def: InstRW<[SBWriteResGroup31], (instregex "MOV8rm")>;
-def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm16")>;
-def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm8")>;
-def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm16")>;
-def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm8")>;
-def: InstRW<[SBWriteResGroup31], (instregex "PREFETCH")>;
-
-def SBWriteResGroup32 : SchedWriteRes<[SBPort0,SBPort1]> {
- let Latency = 5;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SIrr")>;
-
-def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
- let Latency = 5;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup33], (instregex "MOV64mr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOV8mr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVAPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVAPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVDQAmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVDQUmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVHPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVHPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVLPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVLPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTDQmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTI_64mr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTImr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVPDI2DImr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVPQI2QImr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVPQIto64mr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVSSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVUPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVUPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8")>;
-def: InstRW<[SBWriteResGroup33], (instregex "PUSH64r")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VEXTRACTF128mr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDYmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSYmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAYmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUYmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQYmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDYmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSYmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVPDI2DImr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQI2QImr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQIto64mr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVSDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVSSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDYmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSYmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSmr")>;
-
-def SBWriteResGroup34 : SchedWriteRes<[SBPort0,SBPort15]> {
- let Latency = 5;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SBWriteResGroup34], (instregex "MPSADBWrri")>;
-def: InstRW<[SBWriteResGroup34], (instregex "VMPSADBWrri")>;
-
-def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> {
- let Latency = 5;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SBWriteResGroup35], (instregex "CLI")>;
-def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SS64rr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SSrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "HADDPDrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "HADDPSrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "HSUBPDrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "HSUBPSrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SS64rr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SSrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSYrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDYrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSYrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSrr")>;
-
-def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
- let Latency = 5;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup36], (instregex "CALL64r")>;
-def: InstRW<[SBWriteResGroup36], (instregex "EXTRACTPSmr")>;
-def: InstRW<[SBWriteResGroup36], (instregex "VEXTRACTPSmr")>;
-
-def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> {
- let Latency = 5;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDYrm")>;
-def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDmr")>;
-def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPSmr")>;
-
-def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
- let Latency = 5;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup38], (instregex "SETAEm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETBm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETEm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETGEm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETGm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETLEm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETLm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETNEm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETNOm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETNPm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETNSm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETOm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETPm")>;
-def: InstRW<[SBWriteResGroup38], (instregex "SETSm")>;
-
-def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> {
- let Latency = 5;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup39], (instregex "PEXTRBmr")>;
-def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRBmr")>;
-def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRDmr")>;
-def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRWmr")>;
-
-def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
- let Latency = 5;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup40], (instregex "MOV8mi")>;
-def: InstRW<[SBWriteResGroup40], (instregex "STOSB")>;
-def: InstRW<[SBWriteResGroup40], (instregex "STOSL")>;
-def: InstRW<[SBWriteResGroup40], (instregex "STOSQ")>;
-def: InstRW<[SBWriteResGroup40], (instregex "STOSW")>;
-
-def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,3];
-}
-def: InstRW<[SBWriteResGroup41], (instregex "FNINIT")>;
-
-def SBWriteResGroup42 : SchedWriteRes<[SBPort0,SBPort015]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,3];
-}
-def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG32rr")>;
-def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG8rr")>;
-
-def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup43], (instregex "SETAm")>;
-def: InstRW<[SBWriteResGroup43], (instregex "SETBEm")>;
-
-def SBWriteResGroup44 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SBWriteResGroup44], (instregex "LDMXCSR")>;
-def: InstRW<[SBWriteResGroup44], (instregex "STMXCSR")>;
-def: InstRW<[SBWriteResGroup44], (instregex "VLDMXCSR")>;
-def: InstRW<[SBWriteResGroup44], (instregex "VSTMXCSR")>;
-
-def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SBWriteResGroup45], (instregex "PEXTRDmr")>;
-def: InstRW<[SBWriteResGroup45], (instregex "PEXTRQmr")>;
-def: InstRW<[SBWriteResGroup45], (instregex "VPEXTRQmr")>;
-def: InstRW<[SBWriteResGroup45], (instregex "PUSHF16")>;
-def: InstRW<[SBWriteResGroup45], (instregex "PUSHF64")>;
-
-def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>;
-
-def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
- let Latency = 5;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>;
-
-def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
- let Latency = 6;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup48], (instregex "LDDQUrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MMX_MOVD64from64rm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOV64toPQIrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVAPDrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVAPSrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVDDUPrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVDI2PDIrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVDQArm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVDQUrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVNTDQArm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVSHDUPrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVSLDUPrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVSSrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVUPDrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "MOVUPSrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "POP64r")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VBROADCASTSSrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUYrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOV64toPQIrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPDrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPSrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVDDUPrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVDI2PDIrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQArm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQUrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVNTDQArm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVQI2PQIrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVSDrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVSHDUPrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVSLDUPrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVSSrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPDrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPSrm")>;
-
-def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup49], (instregex "JMP64m")>;
-def: InstRW<[SBWriteResGroup49], (instregex "MOV64sm")>;
-
-def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort0]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup50], (instregex "BT64mi8")>;
-
-def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSBrm64")>;
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSDrm64")>;
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSWrm64")>;
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PALIGNR64irm")>;
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSHUFBrm64")>;
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNBrm64")>;
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNDrm64")>;
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNWrm64")>;
-
-def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup52], (instregex "ADD64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "ADD8rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "AND64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "AND8rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP64mi8")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP64mr")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP8mi")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP8mr")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP8rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "LODSL")>;
-def: InstRW<[SBWriteResGroup52], (instregex "LODSQ")>;
-def: InstRW<[SBWriteResGroup52], (instregex "OR64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "OR8rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "SUB64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "SUB8rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "XOR64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "XOR8rm")>;
-
-def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
- let Latency = 6;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SBWriteResGroup53], (instregex "POP64rmm")>;
-def: InstRW<[SBWriteResGroup53], (instregex "PUSH64rmm")>;
-def: InstRW<[SBWriteResGroup53], (instregex "ST_F32m")>;
-def: InstRW<[SBWriteResGroup53], (instregex "ST_F64m")>;
-def: InstRW<[SBWriteResGroup53], (instregex "ST_FP32m")>;
-def: InstRW<[SBWriteResGroup53], (instregex "ST_FP64m")>;
-def: InstRW<[SBWriteResGroup53], (instregex "ST_FP80m")>;
-
-def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
- let Latency = 7;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSSrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPDYrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPSYrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VMOVDDUPYrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQAYrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQUYrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VMOVSHDUPYrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VMOVSLDUPYrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPDYrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPSYrm")>;
-
-def SBWriteResGroup55 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup55], (instregex "CVTPS2PDrm")>;
-def: InstRW<[SBWriteResGroup55], (instregex "CVTSS2SDrm")>;
-def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDYrm")>;
-def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDrm")>;
-def: InstRW<[SBWriteResGroup55], (instregex "VCVTSS2SDrm")>;
-def: InstRW<[SBWriteResGroup55], (instregex "VTESTPDrm")>;
-def: InstRW<[SBWriteResGroup55], (instregex "VTESTPSrm")>;
-
-def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup56], (instregex "ANDNPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "ANDNPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "ANDPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "ANDPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "INSERTPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "MOVHPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "MOVHPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "MOVLPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "MOVLPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "ORPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "ORPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "SHUFPDrmi")>;
-def: InstRW<[SBWriteResGroup56], (instregex "SHUFPSrmi")>;
-def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VANDNPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VANDNPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VANDPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VANDPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VINSERTPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VORPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VORPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDmi")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDri")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSmi")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSri")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPDrmi")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPSrmi")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VXORPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VXORPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "XORPDrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "XORPSrm")>;
-
-def SBWriteResGroup57 : SchedWriteRes<[SBPort5,SBPort015]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup57], (instregex "AESDECLASTrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "AESDECrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "AESENCLASTrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "AESENCrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "KANDQrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "VAESDECLASTrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "VAESDECrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "VAESENCrr")>;
-
-def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort0]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup58], (instregex "BLENDPDrmi")>;
-def: InstRW<[SBWriteResGroup58], (instregex "BLENDPSrmi")>;
-def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPDrmi")>;
-def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPSrmi")>;
-def: InstRW<[SBWriteResGroup58], (instregex "VINSERTF128rm")>;
-
-def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PABSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PABSDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PABSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PACKSSDWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PACKSSWBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PACKUSDWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PACKUSWBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PADDBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PADDDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PADDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PADDSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PADDSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PADDUSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PADDUSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PADDWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PALIGNRrmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PAVGBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PAVGWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PBLENDWrmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PINSRBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PINSRDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PINSRQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PINSRWrmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMAXSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMAXSDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMAXSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMAXUBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMAXUDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMAXUWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMINSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMINSDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMINSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMINUBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMINUDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMINUWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSHUFBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSHUFDmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSHUFHWmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSHUFLWmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSIGNBrm128")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSIGNDrm128")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSIGNWrm128")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSUBBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSUBDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSUBQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSUBSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSUBSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PSUBWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHBWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHQDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHWDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLBWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLQDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLWDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPABSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPABSDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPABSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSDWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSWBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSDWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSWBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPADDBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPADDDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPADDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPADDSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPADDSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPADDWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPALIGNRrmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPAVGBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPAVGWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPBLENDWrmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPINSRBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPINSRDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPINSRQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPINSRWrmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMINSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMINSDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMINSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMINUBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMINUDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMINUWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFDmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFHWmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFLWmi")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNBrm128")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNDrm128")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNWrm128")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSBrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHBWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHQDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHWDrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLBWrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLQDQrm")>;
-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLWDrm")>;
-
-def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup60], (instregex "PANDNrm")>;
-def: InstRW<[SBWriteResGroup60], (instregex "PANDrm")>;
-def: InstRW<[SBWriteResGroup60], (instregex "PORrm")>;
-def: InstRW<[SBWriteResGroup60], (instregex "PXORrm")>;
-def: InstRW<[SBWriteResGroup60], (instregex "VPANDNrm")>;
-def: InstRW<[SBWriteResGroup60], (instregex "VPANDrm")>;
-def: InstRW<[SBWriteResGroup60], (instregex "VPORrm")>;
-def: InstRW<[SBWriteResGroup60], (instregex "VPXORrm")>;
-
-def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort0]> {
- let Latency = 7;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSr")>;
-def: InstRW<[SBWriteResGroup61], (instregex "VRSQRTPSYr")>;
-
-def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
- let Latency = 7;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SBWriteResGroup62], (instregex "VERRm")>;
-def: InstRW<[SBWriteResGroup62], (instregex "VERWm")>;
-
-def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
- let Latency = 7;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SBWriteResGroup63], (instregex "LODSB")>;
-def: InstRW<[SBWriteResGroup63], (instregex "LODSW")>;
-
-def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
- let Latency = 7;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup64], (instregex "FARJMP64")>;
-
-def SBWriteResGroup65 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> {
- let Latency = 7;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup65], (instregex "ADC64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "ADC8rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVAE64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVB64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVE64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVG64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVGE64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVL64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVLE64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNE64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNO64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNP64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNS64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVO64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVP64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVS64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "SBB64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "SBB8rm")>;
-
-def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> {
- let Latency = 7;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup66], (instregex "FNSTSWm")>;
-
-def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
- let Latency = 7;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SBWriteResGroup67], (instregex "SLDT32r")>;
-def: InstRW<[SBWriteResGroup67], (instregex "STR32r")>;
-
-def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
- let Latency = 7;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup68], (instregex "CALL64m")>;
-def: InstRW<[SBWriteResGroup68], (instregex "FNSTCW16m")>;
-
-def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
- let Latency = 7;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SBWriteResGroup69], (instregex "BTC64mi8")>;
-def: InstRW<[SBWriteResGroup69], (instregex "BTR64mi8")>;
-def: InstRW<[SBWriteResGroup69], (instregex "BTS64mi8")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SAR64mi")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SAR8mi")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHL64m1")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHL64mi")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHL8m1")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHL8mi")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHR64mi")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHR8mi")>;
-
-def SBWriteResGroup70 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
- let Latency = 7;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SBWriteResGroup70], (instregex "ADD64mi8")>;
-def: InstRW<[SBWriteResGroup70], (instregex "ADD64mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "ADD8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "ADD8mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "AND64mi8")>;
-def: InstRW<[SBWriteResGroup70], (instregex "AND64mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "AND8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "AND8mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "DEC64m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "DEC8m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "INC64m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "INC8m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "NEG64m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "NEG8m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "NOT64m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "NOT8m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "OR64mi8")>;
-def: InstRW<[SBWriteResGroup70], (instregex "OR64mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "OR8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "OR8mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "SUB64mi8")>;
-def: InstRW<[SBWriteResGroup70], (instregex "SUB64mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "TEST64rm")>;
-def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "TEST8rm")>;
-def: InstRW<[SBWriteResGroup70], (instregex "XOR64mi8")>;
-def: InstRW<[SBWriteResGroup70], (instregex "XOR64mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "XOR8mr")>;
-
-def SBWriteResGroup71 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMADDUBSWrm64")>;
-def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMULHRSWrm64")>;
-def: InstRW<[SBWriteResGroup71], (instregex "VTESTPDYrm")>;
-def: InstRW<[SBWriteResGroup71], (instregex "VTESTPSYrm")>;
-
-def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup72], (instregex "BSF64rm")>;
-def: InstRW<[SBWriteResGroup72], (instregex "BSR64rm")>;
-def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m16")>;
-def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m8")>;
-def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m")>;
-def: InstRW<[SBWriteResGroup72], (instregex "FCOM64m")>;
-def: InstRW<[SBWriteResGroup72], (instregex "FCOMP32m")>;
-def: InstRW<[SBWriteResGroup72], (instregex "FCOMP64m")>;
-def: InstRW<[SBWriteResGroup72], (instregex "MUL8m")>;
-
-def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VANDNPSYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VANDPDrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VANDPSrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VORPDYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VORPSYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDYri")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDmi")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSYri")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSmi")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPDYrmi")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPSYrmi")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPDrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPSrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPDYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPSYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VXORPDrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VXORPSrm")>;
-
-def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort0]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi")>;
-def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPSYrmi")>;
-
-def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort0]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPDrm0")>;
-def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPSrm0")>;
-def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPDrm")>;
-def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPSrm")>;
-def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm")>;
-def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPSrm")>;
-
-def SBWriteResGroup76 : SchedWriteRes<[SBPort23,SBPort15]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SBWriteResGroup76], (instregex "PBLENDVBrr0")>;
-def: InstRW<[SBWriteResGroup76], (instregex "VPBLENDVBrm")>;
-
-def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup77], (instregex "COMISDrm")>;
-def: InstRW<[SBWriteResGroup77], (instregex "COMISSrm")>;
-def: InstRW<[SBWriteResGroup77], (instregex "UCOMISDrm")>;
-def: InstRW<[SBWriteResGroup77], (instregex "UCOMISSrm")>;
-def: InstRW<[SBWriteResGroup77], (instregex "VCOMISDrm")>;
-def: InstRW<[SBWriteResGroup77], (instregex "VCOMISSrm")>;
-def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISDrm")>;
-def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISSrm")>;
-
-def SBWriteResGroup78 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup78], (instregex "PTESTrm")>;
-def: InstRW<[SBWriteResGroup78], (instregex "VPTESTrm")>;
-
-def SBWriteResGroup79 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup79], (instregex "PSLLDrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSLLQrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSLLWrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSRADrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSRAWrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSRLDrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSRLQrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSRLWrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSLLDri")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSLLQri")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSLLWri")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSRADrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSRAWrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSRLDrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSRLQrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSRLWrm")>;
-
-def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [1,3];
-}
-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDSWrm64")>;
-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDWrm64")>;
-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDrm64")>;
-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBDrm64")>;
-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBSWrm64")>;
-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBWrm64")>;
-
-def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [1,3];
-}
-def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG64rm")>;
-def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG8rm")>;
-
-def SBWriteResGroup82 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SBWriteResGroup82], (instregex "CMOVA64rm")>;
-def: InstRW<[SBWriteResGroup82], (instregex "CMOVBE64rm")>;
-
-def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [2,3];
-}
-def: InstRW<[SBWriteResGroup83], (instregex "CMPSB")>;
-def: InstRW<[SBWriteResGroup83], (instregex "CMPSL")>;
-def: InstRW<[SBWriteResGroup83], (instregex "CMPSQ")>;
-def: InstRW<[SBWriteResGroup83], (instregex "CMPSW")>;
-
-def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,2];
-}
-def: InstRW<[SBWriteResGroup84], (instregex "FLDCW16m")>;
-
-def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,2];
-}
-def: InstRW<[SBWriteResGroup85], (instregex "ROL64mi")>;
-def: InstRW<[SBWriteResGroup85], (instregex "ROL8mi")>;
-def: InstRW<[SBWriteResGroup85], (instregex "ROR64mi")>;
-def: InstRW<[SBWriteResGroup85], (instregex "ROR8mi")>;
-
-def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,2];
-}
-def: InstRW<[SBWriteResGroup86], (instregex "MOVSB")>;
-def: InstRW<[SBWriteResGroup86], (instregex "MOVSL")>;
-def: InstRW<[SBWriteResGroup86], (instregex "MOVSQ")>;
-def: InstRW<[SBWriteResGroup86], (instregex "MOVSW")>;
-def: InstRW<[SBWriteResGroup86], (instregex "XADD64rm")>;
-def: InstRW<[SBWriteResGroup86], (instregex "XADD8rm")>;
-
-def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
-}
-def: InstRW<[SBWriteResGroup87], (instregex "FARCALL64")>;
-
-def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SBWriteResGroup88], (instregex "SHLD64mri8")>;
-def: InstRW<[SBWriteResGroup88], (instregex "SHRD64mri8")>;
-
-def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup89], (instregex "MMX_PMULUDQirm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "PMADDUBSWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "PMADDWDrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "PMULDQrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "PMULHRSWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "PMULHUWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "PMULHWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "PMULLDrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "PMULLWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "PMULUDQrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "PSADBWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "VPMADDUBSWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "VPMADDWDrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "VPMULDQrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "VPMULHRSWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "VPMULHUWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "VPMULHWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "VPMULLDrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "VPMULLWrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "VPMULUDQrm")>;
-def: InstRW<[SBWriteResGroup89], (instregex "VPSADBWrm")>;
-
-def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup90], (instregex "ADDPDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "ADDPSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "ADDSDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "ADDSSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "CMPPDrmi")>;
-def: InstRW<[SBWriteResGroup90], (instregex "CMPPSrmi")>;
-def: InstRW<[SBWriteResGroup90], (instregex "CMPSSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "CVTDQ2PSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "CVTPS2DQrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SD64rm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "CVTTPS2DQrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MAXPDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MAXPSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MAXSDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MAXSSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MINPDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MINPSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MINSDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MINSSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTTPS2PIirm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "POPCNT64rm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPDm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPSm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSDm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSSm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "SUBPDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "SUBPSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "SUBSDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "SUBSSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VADDPDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VADDPSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VADDSDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VADDSSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VCMPPDrmi")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VCMPPSrmi")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VCMPSDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VCMPSSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VCVTDQ2PSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VCVTPS2DQrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SD64rm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VCVTTPS2DQrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VMAXPDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VMAXPSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VMAXSDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VMAXSSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VMINPDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VMINPSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VMINSDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VMINSSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPDm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPSm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSDm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSSm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VSUBPDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VSUBPSrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VSUBSDrm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "VSUBSSrm")>;
-
-def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort0]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm")>;
-def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPSYrm")>;
-def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDrm")>;
-def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPSrm")>;
-
-def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup92], (instregex "DPPDrri")>;
-def: InstRW<[SBWriteResGroup92], (instregex "VDPPDrri")>;
-
-def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SI64rm")>;
-def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SIrm")>;
-def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SI64rm")>;
-def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SIrm")>;
-def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SI64rm")>;
-def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SIrm")>;
-def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SI64rm")>;
-def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SIrm")>;
-def: InstRW<[SBWriteResGroup93], (instregex "MUL64m")>;
-
-def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup94], (instregex "VPTESTYrm")>;
-
-def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup95], (instregex "LD_F32m")>;
-def: InstRW<[SBWriteResGroup95], (instregex "LD_F64m")>;
-def: InstRW<[SBWriteResGroup95], (instregex "LD_F80m")>;
-
-def SBWriteResGroup96 : SchedWriteRes<[SBPort23,SBPort15]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,3];
-}
-def: InstRW<[SBWriteResGroup96], (instregex "PHADDDrm")>;
-def: InstRW<[SBWriteResGroup96], (instregex "PHADDSWrm128")>;
-def: InstRW<[SBWriteResGroup96], (instregex "PHADDWrm")>;
-def: InstRW<[SBWriteResGroup96], (instregex "PHSUBDrm")>;
-def: InstRW<[SBWriteResGroup96], (instregex "PHSUBSWrm128")>;
-def: InstRW<[SBWriteResGroup96], (instregex "PHSUBWrm")>;
-def: InstRW<[SBWriteResGroup96], (instregex "VPHADDDrm")>;
-def: InstRW<[SBWriteResGroup96], (instregex "VPHADDSWrm128")>;
-def: InstRW<[SBWriteResGroup96], (instregex "VPHADDWrm")>;
-def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBDrm")>;
-def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBSWrm128")>;
-def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBWrm")>;
-
-def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup97], (instregex "IST_F16m")>;
-def: InstRW<[SBWriteResGroup97], (instregex "IST_F32m")>;
-def: InstRW<[SBWriteResGroup97], (instregex "IST_FP16m")>;
-def: InstRW<[SBWriteResGroup97], (instregex "IST_FP32m")>;
-def: InstRW<[SBWriteResGroup97], (instregex "IST_FP64m")>;
-def: InstRW<[SBWriteResGroup97], (instregex "SHL64mCL")>;
-def: InstRW<[SBWriteResGroup97], (instregex "SHL8mCL")>;
-
-def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
- let Latency = 9;
- let NumMicroOps = 6;
- let ResourceCycles = [1,2,3];
-}
-def: InstRW<[SBWriteResGroup98], (instregex "ADC64mi8")>;
-def: InstRW<[SBWriteResGroup98], (instregex "ADC8mi")>;
-def: InstRW<[SBWriteResGroup98], (instregex "SBB64mi8")>;
-def: InstRW<[SBWriteResGroup98], (instregex "SBB8mi")>;
-
-def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
- let Latency = 9;
- let NumMicroOps = 6;
- let ResourceCycles = [1,2,2,1];
-}
-def: InstRW<[SBWriteResGroup99], (instregex "ADC64mr")>;
-def: InstRW<[SBWriteResGroup99], (instregex "ADC8mr")>;
-def: InstRW<[SBWriteResGroup99], (instregex "SBB64mr")>;
-def: InstRW<[SBWriteResGroup99], (instregex "SBB8mr")>;
-
-def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort0,SBPort015]> {
- let Latency = 9;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,2,1,1];
-}
-def: InstRW<[SBWriteResGroup100], (instregex "BT64mr")>;
-def: InstRW<[SBWriteResGroup100], (instregex "BTC64mr")>;
-def: InstRW<[SBWriteResGroup100], (instregex "BTR64mr")>;
-def: InstRW<[SBWriteResGroup100], (instregex "BTS64mr")>;
-
-def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
- let Latency = 10;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "ADD_F64m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "ILD_F16m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "ILD_F32m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "ILD_F64m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F32m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F64m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "SUB_F32m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "SUB_F64m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VADDPDYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VADDPSYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPDYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPSYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VCMPPDYrmi")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VCMPPSYrmi")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VCVTDQ2PSYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VCVTPS2DQYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VCVTTPS2DQrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VMAXPDYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VMAXPSYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VMINPDrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VMINPSrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPDm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPSm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VSUBPDYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VSUBPSYrm")>;
-
-def SBWriteResGroup102 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
- let Latency = 10;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rm")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SI64rm")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SIrm")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rm")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SI64rm")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SIrm")>;
-
-def SBWriteResGroup103 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
- let Latency = 10;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup103], (instregex "CVTDQ2PDrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2DQrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2PSrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "CVTSD2SSrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SS64rm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SSrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "CVTTPD2DQrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPD2PIirm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPI2PDirm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTTPD2PIirm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDYrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2DQrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2PSrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "VCVTSD2SSrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SS64rm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SSrm")>;
-def: InstRW<[SBWriteResGroup103], (instregex "VCVTTPD2DQrm")>;
-
-def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 11;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup104], (instregex "MULPDrm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "MULPSrm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "MULSDrm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "MULSSrm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "PCMPGTQrm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "PHMINPOSUWrm128")>;
-def: InstRW<[SBWriteResGroup104], (instregex "RCPPSm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "RCPSSm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "RSQRTPSm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "RSQRTSSm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "VMULPDrm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "VMULPSrm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "VMULSDrm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "VMULSSrm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "VPCMPGTQrm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "VPHMINPOSUWrm128")>;
-def: InstRW<[SBWriteResGroup104], (instregex "VRCPPSm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "VRCPSSm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTPSm")>;
-def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTSSm")>;
-
-def SBWriteResGroup105 : SchedWriteRes<[SBPort0]> {
- let Latency = 11;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRIrr")>;
-def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRM128rr")>;
-def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRIrr")>;
-def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRM128rr")>;
-
-def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
- let Latency = 11;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SBWriteResGroup106], (instregex "FICOM16m")>;
-def: InstRW<[SBWriteResGroup106], (instregex "FICOM32m")>;
-def: InstRW<[SBWriteResGroup106], (instregex "FICOMP16m")>;
-def: InstRW<[SBWriteResGroup106], (instregex "FICOMP32m")>;
-
-def SBWriteResGroup107 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
- let Latency = 11;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2DQYrm")>;
-def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2PSYrm")>;
-def: InstRW<[SBWriteResGroup107], (instregex "VCVTTPD2DQYrm")>;
-
-def SBWriteResGroup108 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> {
- let Latency = 11;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup108], (instregex "MPSADBWrmi")>;
-def: InstRW<[SBWriteResGroup108], (instregex "VMPSADBWrmi")>;
-
-def SBWriteResGroup109 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
- let Latency = 11;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SBWriteResGroup109], (instregex "HADDPDrm")>;
-def: InstRW<[SBWriteResGroup109], (instregex "HADDPSrm")>;
-def: InstRW<[SBWriteResGroup109], (instregex "HSUBPDrm")>;
-def: InstRW<[SBWriteResGroup109], (instregex "HSUBPSrm")>;
-def: InstRW<[SBWriteResGroup109], (instregex "VHADDPDrm")>;
-def: InstRW<[SBWriteResGroup109], (instregex "VHADDPSrm")>;
-def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPDrm")>;
-def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPSrm")>;
-
-def SBWriteResGroup110 : SchedWriteRes<[SBPort5]> {
- let Latency = 12;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SBWriteResGroup110], (instregex "AESIMCrr")>;
-def: InstRW<[SBWriteResGroup110], (instregex "VAESIMCrr")>;
-
-def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 12;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup111], (instregex "MUL_F32m")>;
-def: InstRW<[SBWriteResGroup111], (instregex "MUL_F64m")>;
-def: InstRW<[SBWriteResGroup111], (instregex "VMULPDYrm")>;
-def: InstRW<[SBWriteResGroup111], (instregex "VMULPSYrm")>;
-
-def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
- let Latency = 12;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SBWriteResGroup112], (instregex "DPPSrri")>;
-def: InstRW<[SBWriteResGroup112], (instregex "VDPPSYrri")>;
-def: InstRW<[SBWriteResGroup112], (instregex "VDPPSrri")>;
-
-def SBWriteResGroup113 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
- let Latency = 12;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SBWriteResGroup113], (instregex "VHADDPDrm")>;
-def: InstRW<[SBWriteResGroup113], (instregex "VHADDPSYrm")>;
-def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPDYrm")>;
-def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPSYrm")>;
-
-def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
- let Latency = 13;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI16m")>;
-def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI32m")>;
-def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI16m")>;
-def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI32m")>;
-def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI16m")>;
-def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI32m")>;
-
-def SBWriteResGroup115 : SchedWriteRes<[SBPort5,SBPort23,SBPort015]> {
- let Latency = 13;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup115], (instregex "AESDECLASTrm")>;
-def: InstRW<[SBWriteResGroup115], (instregex "AESDECrm")>;
-def: InstRW<[SBWriteResGroup115], (instregex "AESENCLASTrm")>;
-def: InstRW<[SBWriteResGroup115], (instregex "AESENCrm")>;
-def: InstRW<[SBWriteResGroup115], (instregex "VAESDECLASTrm")>;
-def: InstRW<[SBWriteResGroup115], (instregex "VAESDECrm")>;
-def: InstRW<[SBWriteResGroup115], (instregex "VAESENCLASTrm")>;
-def: InstRW<[SBWriteResGroup115], (instregex "VAESENCrm")>;
-
-def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> {
- let Latency = 14;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup116], (instregex "DIVPSrr")>;
-def: InstRW<[SBWriteResGroup116], (instregex "DIVSSrr")>;
-def: InstRW<[SBWriteResGroup116], (instregex "SQRTPSr")>;
-def: InstRW<[SBWriteResGroup116], (instregex "SQRTSSr")>;
-def: InstRW<[SBWriteResGroup116], (instregex "VDIVPSrr")>;
-def: InstRW<[SBWriteResGroup116], (instregex "VDIVSSrr")>;
-def: InstRW<[SBWriteResGroup116], (instregex "VSQRTPSr")>;
-
-def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 14;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup117], (instregex "VSQRTSSm")>;
-
-def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
- let Latency = 14;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSm")>;
-def: InstRW<[SBWriteResGroup118], (instregex "VRSQRTPSYm")>;
-
-def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
- let Latency = 15;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI16m")>;
-def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI32m")>;
-
-def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> {
- let Latency = 15;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SBWriteResGroup120], (instregex "DPPDrmi")>;
-def: InstRW<[SBWriteResGroup120], (instregex "VDPPDrmi")>;
-
-def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 17;
- let NumMicroOps = 4;
- let ResourceCycles = [3,1];
-}
-def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRIrm")>;
-def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRM128rm")>;
-def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRIrm")>;
-def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRM128rm")>;
-
-def SBWriteResGroup122 : SchedWriteRes<[SBPort5,SBPort23]> {
- let Latency = 18;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SBWriteResGroup122], (instregex "AESIMCrm")>;
-def: InstRW<[SBWriteResGroup122], (instregex "VAESIMCrm")>;
-
-def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 20;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup123], (instregex "DIVPSrm")>;
-def: InstRW<[SBWriteResGroup123], (instregex "DIVSSrm")>;
-def: InstRW<[SBWriteResGroup123], (instregex "SQRTPSm")>;
-def: InstRW<[SBWriteResGroup123], (instregex "SQRTSSm")>;
-def: InstRW<[SBWriteResGroup123], (instregex "VDIVPSrm")>;
-def: InstRW<[SBWriteResGroup123], (instregex "VDIVSSrm")>;
-def: InstRW<[SBWriteResGroup123], (instregex "VSQRTPSm")>;
-
-def SBWriteResGroup124 : SchedWriteRes<[SBPort0]> {
- let Latency = 21;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup124], (instregex "VSQRTSDr")>;
-
-def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 21;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup125], (instregex "VSQRTSDm")>;
-
-def SBWriteResGroup126 : SchedWriteRes<[SBPort0]> {
- let Latency = 22;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup126], (instregex "DIVPDrr")>;
-def: InstRW<[SBWriteResGroup126], (instregex "DIVSDrr")>;
-def: InstRW<[SBWriteResGroup126], (instregex "SQRTPDr")>;
-def: InstRW<[SBWriteResGroup126], (instregex "SQRTSDr")>;
-def: InstRW<[SBWriteResGroup126], (instregex "VDIVPDrr")>;
-def: InstRW<[SBWriteResGroup126], (instregex "VDIVSDrr")>;
-def: InstRW<[SBWriteResGroup126], (instregex "VSQRTPDr")>;
-
-def SBWriteResGroup127 : SchedWriteRes<[SBPort0]> {
- let Latency = 24;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FPrST0")>;
-def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FST0r")>;
-def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FrST0")>;
-def: InstRW<[SBWriteResGroup127], (instregex "DIV_FPrST0")>;
-def: InstRW<[SBWriteResGroup127], (instregex "DIV_FST0r")>;
-def: InstRW<[SBWriteResGroup127], (instregex "DIV_FrST0")>;
-
-def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 28;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup128], (instregex "DIVPDrm")>;
-def: InstRW<[SBWriteResGroup128], (instregex "DIVSDrm")>;
-def: InstRW<[SBWriteResGroup128], (instregex "SQRTPDm")>;
-def: InstRW<[SBWriteResGroup128], (instregex "SQRTSDm")>;
-def: InstRW<[SBWriteResGroup128], (instregex "VDIVPDrm")>;
-def: InstRW<[SBWriteResGroup128], (instregex "VDIVSDrm")>;
-def: InstRW<[SBWriteResGroup128], (instregex "VSQRTPDm")>;
-
-def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort0]> {
- let Latency = 29;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>;
-def: InstRW<[SBWriteResGroup129], (instregex "VSQRTPSYr")>;
-
-def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 31;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F32m")>;
-def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F64m")>;
-def: InstRW<[SBWriteResGroup130], (instregex "DIV_F32m")>;
-def: InstRW<[SBWriteResGroup130], (instregex "DIV_F64m")>;
-
-def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
- let Latency = 34;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI16m")>;
-def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI32m")>;
-def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI16m")>;
-def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI32m")>;
-
-def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
- let Latency = 36;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>;
-def: InstRW<[SBWriteResGroup132], (instregex "VSQRTPSYm")>;
-
-def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort0]> {
- let Latency = 45;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>;
-def: InstRW<[SBWriteResGroup133], (instregex "VSQRTPDYr")>;
-
-def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
- let Latency = 52;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>;
-def: InstRW<[SBWriteResGroup134], (instregex "VSQRTPDYm")>;
-
-def SBWriteResGroup135 : SchedWriteRes<[SBPort0]> {
- let Latency = 114;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup135], (instregex "VSQRTSSr")>;
-
} // SchedModel
Index: head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
===================================================================
--- head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp (revision 322854)
+++ head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp (revision 322855)
@@ -1,167 +1,183 @@
//===- DlltoolDriver.cpp - dlltool.exe-compatible driver ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Defines an interface to a dlltool.exe-compatible driver.
//
//===----------------------------------------------------------------------===//
#include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/COFFModuleDefinition.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/Path.h"
#include <string>
#include <vector>
using namespace llvm;
using namespace llvm::object;
using namespace llvm::COFF;
namespace {
enum {
OPT_INVALID = 0,
#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID,
#include "Options.inc"
#undef OPTION
};
#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
#include "Options.inc"
#undef PREFIX
static const llvm::opt::OptTable::Info infoTable[] = {
#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \
{X1, X2, X10, X11, OPT_##ID, llvm::opt::Option::KIND##Class, \
X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12},
#include "Options.inc"
#undef OPTION
};
class DllOptTable : public llvm::opt::OptTable {
public:
DllOptTable() : OptTable(infoTable, false) {}
};
} // namespace
std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs;
// Opens a file. Path has to be resolved already.
// Newly created memory buffers are owned by this driver.
Optional<MemoryBufferRef> openFile(StringRef Path) {
ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MB = MemoryBuffer::getFile(Path);
if (std::error_code EC = MB.getError()) {
llvm::errs() << "fail openFile: " << EC.message() << "\n";
return None;
}
MemoryBufferRef MBRef = MB.get()->getMemBufferRef();
OwningMBs.push_back(std::move(MB.get())); // take ownership
return MBRef;
}
static MachineTypes getEmulation(StringRef S) {
return StringSwitch<MachineTypes>(S)
.Case("i386", IMAGE_FILE_MACHINE_I386)
.Case("i386:x86-64", IMAGE_FILE_MACHINE_AMD64)
.Case("arm", IMAGE_FILE_MACHINE_ARMNT)
.Default(IMAGE_FILE_MACHINE_UNKNOWN);
}
static std::string getImplibPath(std::string Path) {
SmallString<128> Out = StringRef("lib");
Out.append(Path);
sys::path::replace_extension(Out, ".a");
return Out.str();
}
int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
DllOptTable Table;
unsigned MissingIndex;
unsigned MissingCount;
llvm::opt::InputArgList Args =
Table.ParseArgs(ArgsArr.slice(1), MissingIndex, MissingCount);
if (MissingCount) {
llvm::errs() << Args.getArgString(MissingIndex) << ": missing argument\n";
return 1;
}
// Handle when no input or output is specified
if (Args.hasArgNoClaim(OPT_INPUT) ||
(!Args.hasArgNoClaim(OPT_d) && !Args.hasArgNoClaim(OPT_l))) {
Table.PrintHelp(outs(), ArgsArr[0], "dlltool", false);
llvm::outs() << "\nTARGETS: i386, i386:x86-64, arm\n";
return 1;
}
if (!Args.hasArgNoClaim(OPT_m) && Args.hasArgNoClaim(OPT_d)) {
llvm::errs() << "error: no target machine specified\n"
<< "supported targets: i386, i386:x86-64, arm\n";
return 1;
}
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
if (!Args.hasArg(OPT_d)) {
llvm::errs() << "no definition file specified\n";
return 1;
}
Optional<MemoryBufferRef> MB = openFile(Args.getLastArg(OPT_d)->getValue());
if (!MB)
return 1;
if (!MB->getBufferSize()) {
llvm::errs() << "definition file empty\n";
return 1;
}
COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN;
if (auto *Arg = Args.getLastArg(OPT_m))
Machine = getEmulation(Arg->getValue());
if (Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
llvm::errs() << "unknown target\n";
return 1;
}
Expected<COFFModuleDefinition> Def =
parseCOFFModuleDefinition(*MB, Machine, true);
if (!Def) {
llvm::errs() << "error parsing definition\n"
<< errorToErrorCode(Def.takeError()).message();
return 1;
}
// Do this after the parser because parseCOFFModuleDefinition sets OutputFile.
if (auto *Arg = Args.getLastArg(OPT_D))
Def->OutputFile = Arg->getValue();
if (Def->OutputFile.empty()) {
llvm::errs() << "no output file specified\n";
return 1;
}
std::string Path = Args.getLastArgValue(OPT_l);
if (Path.empty())
Path = getImplibPath(Def->OutputFile);
+ if (Machine == IMAGE_FILE_MACHINE_I386 && Args.getLastArg(OPT_k)) {
+ for (COFFShortExport& E : Def->Exports) {
+ if (E.isWeak() || (!E.Name.empty() && E.Name[0] == '?'))
+ continue;
+ E.SymbolName = E.Name;
+ // Trim off the trailing decoration. Symbols will always have a
+ // starting prefix here (either _ for cdecl/stdcall, @ for fastcall
+ // or ? for C++ functions). (Vectorcall functions also will end up having
+ // a prefix here, even if they shouldn't.)
+ E.Name = E.Name.substr(0, E.Name.find('@', 1));
+ // By making sure E.SymbolName != E.Name for decorated symbols,
+ // writeImportLibrary writes these symbols with the type
+ // IMPORT_NAME_UNDECORATE.
+ }
+ }
+
if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true))
return 1;
return 0;
}
Index: head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/Options.td
===================================================================
--- head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/Options.td (revision 322854)
+++ head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/Options.td (revision 322855)
@@ -1,26 +1,26 @@
include "llvm/Option/OptParser.td"
def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target machine">;
def m_long : JoinedOrSeparate<["--"], "machine">, Alias<m>;
def l: JoinedOrSeparate<["-"], "l">, HelpText<"Generate an import lib">;
def l_long : JoinedOrSeparate<["--"], "output-lib">, Alias<l>;
def D: JoinedOrSeparate<["-"], "D">, HelpText<"Specify the input DLL Name">;
def D_long : JoinedOrSeparate<["--"], "dllname">, Alias<D>;
def d: JoinedOrSeparate<["-"], "d">, HelpText<"Input .def File">;
def d_long : JoinedOrSeparate<["--"], "input-def">, Alias<d>;
+def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">;
+def k_alias: Flag<["--"], "kill-at">, Alias<k>;
+
//==============================================================================
// The flags below do nothing. They are defined only for dlltool compatibility.
//==============================================================================
-
-def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">;
-def k_alias: Flag<["--"], "kill-at">, Alias<k>;
def S: JoinedOrSeparate<["-"], "S">, HelpText<"Assembler">;
def S_alias: JoinedOrSeparate<["--"], "as">, Alias<S>;
def f: JoinedOrSeparate<["-"], "f">, HelpText<"Assembler Flags">;
def f_alias: JoinedOrSeparate<["--"], "as-flags">, Alias<f>;
Index: head/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp (revision 322854)
+++ head/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp (revision 322855)
@@ -1,174 +1,173 @@
//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass lowers atomic intrinsics to non-atomic form for use in a known
// non-preemptible environment.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "loweratomic"
static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
IRBuilder<> Builder(CXI);
Value *Ptr = CXI->getPointerOperand();
Value *Cmp = CXI->getCompareOperand();
Value *Val = CXI->getNewValOperand();
LoadInst *Orig = Builder.CreateLoad(Ptr);
Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
Builder.CreateStore(Res, Ptr);
Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0);
Res = Builder.CreateInsertValue(Res, Equal, 1);
CXI->replaceAllUsesWith(Res);
CXI->eraseFromParent();
return true;
}
static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
IRBuilder<> Builder(RMWI);
Value *Ptr = RMWI->getPointerOperand();
Value *Val = RMWI->getValOperand();
LoadInst *Orig = Builder.CreateLoad(Ptr);
Value *Res = nullptr;
switch (RMWI->getOperation()) {
default: llvm_unreachable("Unexpected RMW operation");
case AtomicRMWInst::Xchg:
Res = Val;
break;
case AtomicRMWInst::Add:
Res = Builder.CreateAdd(Orig, Val);
break;
case AtomicRMWInst::Sub:
Res = Builder.CreateSub(Orig, Val);
break;
case AtomicRMWInst::And:
Res = Builder.CreateAnd(Orig, Val);
break;
case AtomicRMWInst::Nand:
Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val));
break;
case AtomicRMWInst::Or:
Res = Builder.CreateOr(Orig, Val);
break;
case AtomicRMWInst::Xor:
Res = Builder.CreateXor(Orig, Val);
break;
case AtomicRMWInst::Max:
Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
Val, Orig);
break;
case AtomicRMWInst::Min:
Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
Orig, Val);
break;
case AtomicRMWInst::UMax:
Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
Val, Orig);
break;
case AtomicRMWInst::UMin:
Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
Orig, Val);
break;
}
Builder.CreateStore(Res, Ptr);
RMWI->replaceAllUsesWith(Orig);
RMWI->eraseFromParent();
return true;
}
static bool LowerFenceInst(FenceInst *FI) {
FI->eraseFromParent();
return true;
}
static bool LowerLoadInst(LoadInst *LI) {
LI->setAtomic(AtomicOrdering::NotAtomic);
return true;
}
static bool LowerStoreInst(StoreInst *SI) {
SI->setAtomic(AtomicOrdering::NotAtomic);
return true;
}
static bool runOnBasicBlock(BasicBlock &BB) {
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE;) {
Instruction *Inst = &*DI++;
if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
Changed |= LowerFenceInst(FI);
else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
Changed |= LowerAtomicCmpXchgInst(CXI);
else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst))
Changed |= LowerAtomicRMWInst(RMWI);
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
if (LI->isAtomic())
LowerLoadInst(LI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->isAtomic())
LowerStoreInst(SI);
}
}
return Changed;
}
static bool lowerAtomics(Function &F) {
bool Changed = false;
for (BasicBlock &BB : F) {
Changed |= runOnBasicBlock(BB);
}
return Changed;
}
PreservedAnalyses LowerAtomicPass::run(Function &F, FunctionAnalysisManager &) {
if (lowerAtomics(F))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
namespace {
class LowerAtomicLegacyPass : public FunctionPass {
public:
static char ID;
LowerAtomicLegacyPass() : FunctionPass(ID) {
initializeLowerAtomicLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
+ // Don't skip optnone functions; atomics still need to be lowered.
FunctionAnalysisManager DummyFAM;
auto PA = Impl.run(F, DummyFAM);
return !PA.areAllPreserved();
}
private:
LowerAtomicPass Impl;
};
}
char LowerAtomicLegacyPass::ID = 0;
INITIALIZE_PASS(LowerAtomicLegacyPass, "loweratomic",
"Lower atomic intrinsics to non-atomic form", false, false)
Pass *llvm::createLowerAtomicPass() { return new LowerAtomicLegacyPass(); }
Index: head/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp (revision 322854)
+++ head/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp (revision 322855)
@@ -1,2281 +1,2287 @@
//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass reassociates commutative expressions in an order that is designed
// to promote better constant propagation, GCSE, LICM, PRE, etc.
//
// For example: 4 + (x + 5) -> x + (4 + 5)
//
// In the implementation of this algorithm, constants are assigned rank = 0,
// function arguments are rank = 1, and other values are assigned ranks
// corresponding to the reverse post order traversal of current function
// (starting at 2), which effectively gives values in deep loops higher rank
// than values not in loops.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/Reassociate.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
using namespace reassociate;
#define DEBUG_TYPE "reassociate"
STATISTIC(NumChanged, "Number of insts reassociated");
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
STATISTIC(NumFactor , "Number of multiplies factored");
#ifndef NDEBUG
/// Print out the expression identified in the Ops list.
///
static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
Module *M = I->getModule();
dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
<< *Ops[0].Op->getType() << '\t';
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
dbgs() << "[ ";
Ops[i].Op->printAsOperand(dbgs(), false, M);
dbgs() << ", #" << Ops[i].Rank << "] ";
}
}
#endif
/// Utility class representing a non-constant Xor-operand. We classify
/// non-constant Xor-Operands into two categories:
/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
/// C2)
/// C2.1) The operand is in the form of "X | C", where C is a non-zero
/// constant.
/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
/// operand as "E | 0"
class llvm::reassociate::XorOpnd {
public:
XorOpnd(Value *V);
bool isInvalid() const { return SymbolicPart == nullptr; }
bool isOrExpr() const { return isOr; }
Value *getValue() const { return OrigVal; }
Value *getSymbolicPart() const { return SymbolicPart; }
unsigned getSymbolicRank() const { return SymbolicRank; }
const APInt &getConstPart() const { return ConstPart; }
void Invalidate() { SymbolicPart = OrigVal = nullptr; }
void setSymbolicRank(unsigned R) { SymbolicRank = R; }
private:
Value *OrigVal;
Value *SymbolicPart;
APInt ConstPart;
unsigned SymbolicRank;
bool isOr;
};
XorOpnd::XorOpnd(Value *V) {
assert(!isa<ConstantInt>(V) && "No ConstantInt");
OrigVal = V;
Instruction *I = dyn_cast<Instruction>(V);
SymbolicRank = 0;
if (I && (I->getOpcode() == Instruction::Or ||
I->getOpcode() == Instruction::And)) {
Value *V0 = I->getOperand(0);
Value *V1 = I->getOperand(1);
const APInt *C;
if (match(V0, PatternMatch::m_APInt(C)))
std::swap(V0, V1);
if (match(V1, PatternMatch::m_APInt(C))) {
ConstPart = *C;
SymbolicPart = V0;
isOr = (I->getOpcode() == Instruction::Or);
return;
}
}
// view the operand as "V | 0"
SymbolicPart = V;
ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits());
isOr = true;
}
/// Return true if V is an instruction of the specified opcode and if it
/// only has one use.
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
if (V->hasOneUse() && isa<Instruction>(V) &&
cast<Instruction>(V)->getOpcode() == Opcode &&
(!isa<FPMathOperator>(V) ||
cast<Instruction>(V)->hasUnsafeAlgebra()))
return cast<BinaryOperator>(V);
return nullptr;
}
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
unsigned Opcode2) {
if (V->hasOneUse() && isa<Instruction>(V) &&
(cast<Instruction>(V)->getOpcode() == Opcode1 ||
cast<Instruction>(V)->getOpcode() == Opcode2) &&
(!isa<FPMathOperator>(V) ||
cast<Instruction>(V)->hasUnsafeAlgebra()))
return cast<BinaryOperator>(V);
return nullptr;
}
void ReassociatePass::BuildRankMap(Function &F,
ReversePostOrderTraversal<Function*> &RPOT) {
unsigned i = 2;
// Assign distinct ranks to function arguments.
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
ValueRankMap[&*I] = ++i;
DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
}
// Traverse basic blocks in ReversePostOrder
for (BasicBlock *BB : RPOT) {
unsigned BBRank = RankMap[BB] = ++i << 16;
// Walk the basic block, adding precomputed ranks for any instructions that
// we cannot move. This ensures that the ranks for these instructions are
// all different in the block.
for (Instruction &I : *BB)
if (mayBeMemoryDependent(I))
ValueRankMap[&I] = ++BBRank;
}
}
unsigned ReassociatePass::getRank(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
return 0; // Otherwise it's a global or constant, rank 0.
}
if (unsigned Rank = ValueRankMap[I])
return Rank; // Rank already known?
// If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
// we can reassociate expressions for code motion! Since we do not recurse
// for PHI nodes, we cannot have infinite recursion here, because there
// cannot be loops in the value graph that do not go through PHI nodes.
unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
for (unsigned i = 0, e = I->getNumOperands();
i != e && Rank != MaxRank; ++i)
Rank = std::max(Rank, getRank(I->getOperand(i)));
// If this is a not or neg instruction, do not count it for rank. This
// assures us that X and ~X will have the same rank.
if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
!BinaryOperator::isFNeg(I))
++Rank;
DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n");
return ValueRankMap[I] = Rank;
}
// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
void ReassociatePass::canonicalizeOperands(Instruction *I) {
assert(isa<BinaryOperator>(I) && "Expected binary operator.");
assert(I->isCommutative() && "Expected commutative operator.");
Value *LHS = I->getOperand(0);
Value *RHS = I->getOperand(1);
unsigned LHSRank = getRank(LHS);
unsigned RHSRank = getRank(RHS);
if (isa<Constant>(RHS))
return;
if (isa<Constant>(LHS) || RHSRank < LHSRank)
cast<BinaryOperator>(I)->swapOperands();
}
static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
else {
BinaryOperator *Res =
BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
return Res;
}
}
static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
else {
BinaryOperator *Res =
BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
return Res;
}
}
static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
else {
BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore);
Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
return Res;
}
}
/// Replace 0-X with X*-1.
static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
Type *Ty = Neg->getType();
Constant *NegOne = Ty->isIntOrIntVectorTy() ?
ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
Res->takeName(Neg);
Neg->replaceAllUsesWith(Res);
Res->setDebugLoc(Neg->getDebugLoc());
return Res;
}
/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
/// even x in Bitwidth-bit arithmetic.
static unsigned CarmichaelShift(unsigned Bitwidth) {
if (Bitwidth < 3)
return Bitwidth - 1;
return Bitwidth - 2;
}
/// Add the extra weight 'RHS' to the existing weight 'LHS',
/// reducing the combined weight using any special properties of the operation.
/// The existing weight LHS represents the computation X op X op ... op X where
/// X occurs LHS times. The combined weight represents X op X op ... op X with
/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
// If we were working with infinite precision arithmetic then the combined
// weight would be LHS + RHS. But we are using finite precision arithmetic,
// and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
// for nilpotent operations and addition, but not for idempotent operations
// and multiplication), so it is important to correctly reduce the combined
// weight back into range if wrapping would be wrong.
// If RHS is zero then the weight didn't change.
if (RHS.isMinValue())
return;
// If LHS is zero then the combined weight is RHS.
if (LHS.isMinValue()) {
LHS = RHS;
return;
}
// From this point on we know that neither LHS nor RHS is zero.
if (Instruction::isIdempotent(Opcode)) {
// Idempotent means X op X === X, so any non-zero weight is equivalent to a
// weight of 1. Keeping weights at zero or one also means that wrapping is
// not a problem.
assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
return; // Return a weight of 1.
}
if (Instruction::isNilpotent(Opcode)) {
// Nilpotent means X op X === 0, so reduce weights modulo 2.
assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
LHS = 0; // 1 + 1 === 0 modulo 2.
return;
}
if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
// TODO: Reduce the weight by exploiting nsw/nuw?
LHS += RHS;
return;
}
assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
"Unknown associative operation!");
unsigned Bitwidth = LHS.getBitWidth();
// If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
// can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
// bit number x, since either x is odd in which case x^CM = 1, or x is even in
// which case both x^W and x^(W - CM) are zero. By subtracting off multiples
// of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
// which by a happy accident means that they can always be represented using
// Bitwidth bits.
// TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
// the Carmichael number).
if (Bitwidth > 3) {
/// CM - The value of Carmichael's lambda function.
APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
// Any weight W >= Threshold can be replaced with W - CM.
APInt Threshold = CM + Bitwidth;
assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
// For Bitwidth 4 or more the following sum does not overflow.
LHS += RHS;
while (LHS.uge(Threshold))
LHS -= CM;
} else {
// To avoid problems with overflow do everything the same as above but using
// a larger type.
unsigned CM = 1U << CarmichaelShift(Bitwidth);
unsigned Threshold = CM + Bitwidth;
assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
"Weights not reduced!");
unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
while (Total >= Threshold)
Total -= CM;
LHS = Total;
}
}
typedef std::pair<Value*, APInt> RepeatedValue;
/// Given an associative binary expression, return the leaf
/// nodes in Ops along with their weights (how many times the leaf occurs). The
/// original expression is the same as
/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
/// op
/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
/// op
/// ...
/// op
/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
///
/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
///
/// This routine may modify the function, in which case it returns 'true'. The
/// changes it makes may well be destructive, changing the value computed by 'I'
/// to something completely different. Thus if the routine returns 'true' then
/// you MUST either replace I with a new expression computed from the Ops array,
/// or use RewriteExprTree to put the values back in.
///
/// A leaf node is either not a binary operation of the same kind as the root
/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
/// opcode), or is the same kind of binary operator but has a use which either
/// does not belong to the expression, or does belong to the expression but is
/// a leaf node. Every leaf node has at least one use that is a non-leaf node
/// of the expression, while for non-leaf nodes (except for the root 'I') every
/// use is a non-leaf node of the expression.
///
/// For example:
/// expression graph node names
///
/// + | I
/// / \ |
/// + + | A, B
/// / \ / \ |
/// * + * | C, D, E
/// / \ / \ / \ |
/// + * | F, G
///
/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
///
/// The expression is maximal: if some instruction is a binary operator of the
/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
/// then the instruction also belongs to the expression, is not a leaf node of
/// it, and its operands also belong to the expression (but may be leaf nodes).
///
/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
/// order to ensure that every non-root node in the expression has *exactly one*
/// use by a non-leaf node of the expression. This destruction means that the
/// caller MUST either replace 'I' with a new expression or use something like
/// RewriteExprTree to put the values back in if the routine indicates that it
/// made a change by returning 'true'.
///
/// In the above example either the right operand of A or the left operand of B
/// will be replaced by undef. If it is B's operand then this gives:
///
/// + | I
/// / \ |
/// + + | A, B - operand of B replaced with undef
/// / \ \ |
/// * + * | C, D, E
/// / \ / \ / \ |
/// + * | F, G
///
/// Note that such undef operands can only be reached by passing through 'I'.
/// For example, if you visit operands recursively starting from a leaf node
/// then you will never see such an undef operand unless you get back to 'I',
/// which requires passing through a phi node.
///
/// Note that this routine may also mutate binary operators of the wrong type
/// that have all uses inside the expression (i.e. only used by non-leaf nodes
/// of the expression) if it can turn them into binary operators of the right
/// type and thus make the expression bigger.
static bool LinearizeExprTree(BinaryOperator *I,
SmallVectorImpl<RepeatedValue> &Ops) {
DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
unsigned Opcode = I->getOpcode();
assert(I->isAssociative() && I->isCommutative() &&
"Expected an associative and commutative operation!");
// Visit all operands of the expression, keeping track of their weight (the
// number of paths from the expression root to the operand, or if you like
// the number of times that operand occurs in the linearized expression).
// For example, if I = X + A, where X = A + B, then I, X and B have weight 1
// while A has weight two.
// Worklist of non-leaf nodes (their operands are in the expression too) along
// with their weights, representing a certain number of paths to the operator.
// If an operator occurs in the worklist multiple times then we found multiple
// ways to get to it.
SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight)
Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
bool Changed = false;
// Leaves of the expression are values that either aren't the right kind of
// operation (eg: a constant, or a multiply in an add tree), or are, but have
// some uses that are not inside the expression. For example, in I = X + X,
// X = A + B, the value X has two uses (by I) that are in the expression. If
// X has any other uses, for example in a return instruction, then we consider
// X to be a leaf, and won't analyze it further. When we first visit a value,
// if it has more than one use then at first we conservatively consider it to
// be a leaf. Later, as the expression is explored, we may discover some more
// uses of the value from inside the expression. If all uses turn out to be
// from within the expression (and the value is a binary operator of the right
// kind) then the value is no longer considered to be a leaf, and its operands
// are explored.
// Leaves - Keeps track of the set of putative leaves as well as the number of
// paths to each leaf seen so far.
typedef DenseMap<Value*, APInt> LeafMap;
LeafMap Leaves; // Leaf -> Total weight so far.
SmallVector<Value*, 8> LeafOrder; // Ensure deterministic leaf output order.
#ifndef NDEBUG
SmallPtrSet<Value*, 8> Visited; // For sanity checking the iteration scheme.
#endif
while (!Worklist.empty()) {
std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val();
I = P.first; // We examine the operands of this binary operator.
for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands.
Value *Op = I->getOperand(OpIdx);
APInt Weight = P.second; // Number of paths to this operand.
DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
assert(!Op->use_empty() && "No uses, so how did we get to it?!");
// If this is a binary operation of the right kind with only one use then
// add its operands to the expression.
if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
assert(Visited.insert(Op).second && "Not first visit!");
DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
Worklist.push_back(std::make_pair(BO, Weight));
continue;
}
// Appears to be a leaf. Is the operand already in the set of leaves?
LeafMap::iterator It = Leaves.find(Op);
if (It == Leaves.end()) {
// Not in the leaf map. Must be the first time we saw this operand.
assert(Visited.insert(Op).second && "Not first visit!");
if (!Op->hasOneUse()) {
// This value has uses not accounted for by the expression, so it is
// not safe to modify. Mark it as being a leaf.
DEBUG(dbgs() << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
LeafOrder.push_back(Op);
Leaves[Op] = Weight;
continue;
}
// No uses outside the expression, try morphing it.
} else {
// Already in the leaf map.
assert(It != Leaves.end() && Visited.count(Op) &&
"In leaf map but not visited!");
// Update the number of paths to the leaf.
IncorporateWeight(It->second, Weight, Opcode);
#if 0 // TODO: Re-enable once PR13021 is fixed.
// The leaf already has one use from inside the expression. As we want
// exactly one such use, drop this new use of the leaf.
assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
I->setOperand(OpIdx, UndefValue::get(I->getType()));
Changed = true;
// If the leaf is a binary operation of the right kind and we now see
// that its multiple original uses were in fact all by nodes belonging
// to the expression, then no longer consider it to be a leaf and add
// its operands to the expression.
if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
Worklist.push_back(std::make_pair(BO, It->second));
Leaves.erase(It);
continue;
}
#endif
// If we still have uses that are not accounted for by the expression
// then it is not safe to modify the value.
if (!Op->hasOneUse())
continue;
// No uses outside the expression, try morphing it.
Weight = It->second;
Leaves.erase(It); // Since the value may be morphed below.
}
// At this point we have a value which, first of all, is not a binary
// expression of the right kind, and secondly, is only used inside the
// expression. This means that it can safely be modified. See if we
// can usefully morph it into an expression of the right kind.
assert((!isa<Instruction>(Op) ||
cast<Instruction>(Op)->getOpcode() != Opcode
|| (isa<FPMathOperator>(Op) &&
!cast<Instruction>(Op)->hasUnsafeAlgebra())) &&
"Should have been handled above!");
assert(Op->hasOneUse() && "Has uses outside the expression tree!");
// If this is a multiply expression, turn any internal negations into
// multiplies by -1 so they can be reassociated.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op))
if ((Opcode == Instruction::Mul && BinaryOperator::isNeg(BO)) ||
(Opcode == Instruction::FMul && BinaryOperator::isFNeg(BO))) {
DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
BO = LowerNegateToMultiply(BO);
DEBUG(dbgs() << *BO << '\n');
Worklist.push_back(std::make_pair(BO, Weight));
Changed = true;
continue;
}
// Failed to morph into an expression of the right type. This really is
// a leaf.
DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
LeafOrder.push_back(Op);
Leaves[Op] = Weight;
}
}
// The leaves, repeated according to their weights, represent the linearized
// form of the expression.
for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
Value *V = LeafOrder[i];
LeafMap::iterator It = Leaves.find(V);
if (It == Leaves.end())
// Node initially thought to be a leaf wasn't.
continue;
assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
APInt Weight = It->second;
if (Weight.isMinValue())
// Leaf already output or weight reduction eliminated it.
continue;
// Ensure the leaf is only output once.
It->second = 0;
Ops.push_back(std::make_pair(V, Weight));
}
// For nilpotent operations or addition there may be no operands, for example
// because the expression was "X xor X" or consisted of 2^Bitwidth additions:
// in both cases the weight reduces to 0 causing the value to be skipped.
if (Ops.empty()) {
Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
assert(Identity && "Associative operation without identity!");
Ops.emplace_back(Identity, APInt(Bitwidth, 1));
}
return Changed;
}
/// Now that the operands for this expression tree are
/// linearized and optimized, emit them in-order.
void ReassociatePass::RewriteExprTree(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
assert(Ops.size() > 1 && "Single values should be used directly!");
// Since our optimizations should never increase the number of operations, the
// new expression can usually be written reusing the existing binary operators
// from the original expression tree, without creating any new instructions,
// though the rewritten expression may have a completely different topology.
// We take care to not change anything if the new expression will be the same
// as the original. If more than trivial changes (like commuting operands)
// were made then we are obliged to clear out any optional subclass data like
// nsw flags.
/// NodesToRewrite - Nodes from the original expression available for writing
/// the new expression into.
SmallVector<BinaryOperator*, 8> NodesToRewrite;
unsigned Opcode = I->getOpcode();
BinaryOperator *Op = I;
/// NotRewritable - The operands being written will be the leaves of the new
/// expression and must not be used as inner nodes (via NodesToRewrite) by
/// mistake. Inner nodes are always reassociable, and usually leaves are not
/// (if they were they would have been incorporated into the expression and so
/// would not be leaves), so most of the time there is no danger of this. But
/// in rare cases a leaf may become reassociable if an optimization kills uses
/// of it, or it may momentarily become reassociable during rewriting (below)
/// due it being removed as an operand of one of its uses. Ensure that misuse
/// of leaf nodes as inner nodes cannot occur by remembering all of the future
/// leaves and refusing to reuse any of them as inner nodes.
SmallPtrSet<Value*, 8> NotRewritable;
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
NotRewritable.insert(Ops[i].Op);
// ExpressionChanged - Non-null if the rewritten expression differs from the
// original in some non-trivial way, requiring the clearing of optional flags.
// Flags are cleared from the operator in ExpressionChanged up to I inclusive.
BinaryOperator *ExpressionChanged = nullptr;
for (unsigned i = 0; ; ++i) {
// The last operation (which comes earliest in the IR) is special as both
// operands will come from Ops, rather than just one with the other being
// a subexpression.
if (i+2 == Ops.size()) {
Value *NewLHS = Ops[i].Op;
Value *NewRHS = Ops[i+1].Op;
Value *OldLHS = Op->getOperand(0);
Value *OldRHS = Op->getOperand(1);
if (NewLHS == OldLHS && NewRHS == OldRHS)
// Nothing changed, leave it alone.
break;
if (NewLHS == OldRHS && NewRHS == OldLHS) {
// The order of the operands was reversed. Swap them.
DEBUG(dbgs() << "RA: " << *Op << '\n');
Op->swapOperands();
DEBUG(dbgs() << "TO: " << *Op << '\n');
MadeChange = true;
++NumChanged;
break;
}
// The new operation differs non-trivially from the original. Overwrite
// the old operands with the new ones.
DEBUG(dbgs() << "RA: " << *Op << '\n');
if (NewLHS != OldLHS) {
BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(0, NewLHS);
}
if (NewRHS != OldRHS) {
BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(1, NewRHS);
}
DEBUG(dbgs() << "TO: " << *Op << '\n');
ExpressionChanged = Op;
MadeChange = true;
++NumChanged;
break;
}
// Not the last operation. The left-hand side will be a sub-expression
// while the right-hand side will be the current element of Ops.
Value *NewRHS = Ops[i].Op;
if (NewRHS != Op->getOperand(1)) {
DEBUG(dbgs() << "RA: " << *Op << '\n');
if (NewRHS == Op->getOperand(0)) {
// The new right-hand side was already present as the left operand. If
// we are lucky then swapping the operands will sort out both of them.
Op->swapOperands();
} else {
// Overwrite with the new right-hand side.
BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(1, NewRHS);
ExpressionChanged = Op;
}
DEBUG(dbgs() << "TO: " << *Op << '\n');
MadeChange = true;
++NumChanged;
}
// Now deal with the left-hand side. If this is already an operation node
// from the original expression then just rewrite the rest of the expression
// into it.
BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
if (BO && !NotRewritable.count(BO)) {
Op = BO;
continue;
}
// Otherwise, grab a spare node from the original expression and use that as
// the left-hand side. If there are no nodes left then the optimizers made
// an expression with more nodes than the original! This usually means that
// they did something stupid but it might mean that the problem was just too
// hard (finding the mimimal number of multiplications needed to realize a
// multiplication expression is NP-complete). Whatever the reason, smart or
// stupid, create a new node if there are none left.
BinaryOperator *NewOp;
if (NodesToRewrite.empty()) {
Constant *Undef = UndefValue::get(I->getType());
NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
Undef, Undef, "", I);
if (NewOp->getType()->isFPOrFPVectorTy())
NewOp->setFastMathFlags(I->getFastMathFlags());
} else {
NewOp = NodesToRewrite.pop_back_val();
}
DEBUG(dbgs() << "RA: " << *Op << '\n');
Op->setOperand(0, NewOp);
DEBUG(dbgs() << "TO: " << *Op << '\n');
ExpressionChanged = Op;
MadeChange = true;
++NumChanged;
Op = NewOp;
}
// If the expression changed non-trivially then clear out all subclass data
// starting from the operator specified in ExpressionChanged, and compactify
// the operators to just before the expression root to guarantee that the
// expression tree is dominated by all of Ops.
if (ExpressionChanged)
do {
// Preserve FastMathFlags.
if (isa<FPMathOperator>(I)) {
FastMathFlags Flags = I->getFastMathFlags();
ExpressionChanged->clearSubclassOptionalData();
ExpressionChanged->setFastMathFlags(Flags);
} else
ExpressionChanged->clearSubclassOptionalData();
if (ExpressionChanged == I)
break;
ExpressionChanged->moveBefore(I);
ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
} while (1);
// Throw away any left over nodes from the original expression.
for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
RedoInsts.insert(NodesToRewrite[i]);
}
/// Insert instructions before the instruction pointed to by BI,
/// that computes the negative version of the value specified. The negative
/// version of the value is returned, and BI is left pointing at the instruction
/// that should be processed next by the reassociation pass.
/// Also add intermediate instructions to the redo list that are modified while
/// pushing the negates through adds. These will be revisited to see if
/// additional opportunities have been exposed.
static Value *NegateValue(Value *V, Instruction *BI,
SetVector<AssertingVH<Instruction>> &ToRedo) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->getType()->isFPOrFPVectorTy()) {
return ConstantExpr::getFNeg(C);
}
return ConstantExpr::getNeg(C);
}
// We are trying to expose opportunity for reassociation. One of the things
// that we want to do to achieve this is to push a negation as deep into an
// expression chain as possible, to expose the add instructions. In practice,
// this means that we turn this:
// X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
// so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
// the constants. We assume that instcombine will clean up the mess later if
// we introduce tons of unnecessary negation instructions.
//
if (BinaryOperator *I =
isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
// Push the negates through the add.
I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
if (I->getOpcode() == Instruction::Add) {
I->setHasNoUnsignedWrap(false);
I->setHasNoSignedWrap(false);
}
// We must move the add instruction here, because the neg instructions do
// not dominate the old add instruction in general. By moving it, we are
// assured that the neg instructions we just inserted dominate the
// instruction we are about to insert after them.
//
I->moveBefore(BI);
I->setName(I->getName()+".neg");
// Add the intermediate negates to the redo list as processing them later
// could expose more reassociating opportunities.
ToRedo.insert(I);
return I;
}
// Okay, we need to materialize a negated version of V with an instruction.
// Scan the use lists of V to see if we have one already.
for (User *U : V->users()) {
if (!BinaryOperator::isNeg(U) && !BinaryOperator::isFNeg(U))
continue;
// We found one! Now we have to make sure that the definition dominates
// this use. We do this by moving it to the entry block (if it is a
// non-instruction value) or right after the definition. These negates will
// be zapped by reassociate later, so we don't need much finesse here.
BinaryOperator *TheNeg = cast<BinaryOperator>(U);
// Verify that the negate is in this function, V might be a constant expr.
if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
continue;
BasicBlock::iterator InsertPt;
if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
InsertPt = II->getNormalDest()->begin();
} else {
InsertPt = ++InstInput->getIterator();
}
while (isa<PHINode>(InsertPt)) ++InsertPt;
} else {
InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
}
TheNeg->moveBefore(&*InsertPt);
if (TheNeg->getOpcode() == Instruction::Sub) {
TheNeg->setHasNoUnsignedWrap(false);
TheNeg->setHasNoSignedWrap(false);
} else {
TheNeg->andIRFlags(BI);
}
ToRedo.insert(TheNeg);
return TheNeg;
}
// Insert a 'neg' instruction that subtracts the value from zero to get the
// negation.
BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
ToRedo.insert(NewNeg);
return NewNeg;
}
/// Return true if we should break up this subtract of X-Y into (X + -Y).
static bool ShouldBreakUpSubtract(Instruction *Sub) {
// If this is a negation, we can't split it up!
if (BinaryOperator::isNeg(Sub) || BinaryOperator::isFNeg(Sub))
return false;
// Don't breakup X - undef.
if (isa<UndefValue>(Sub->getOperand(1)))
return false;
// Don't bother to break this up unless either the LHS is an associable add or
// subtract or if this is only used by one.
Value *V0 = Sub->getOperand(0);
if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
return true;
Value *V1 = Sub->getOperand(1);
if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
return true;
Value *VB = Sub->user_back();
if (Sub->hasOneUse() &&
(isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
return true;
return false;
}
/// If we have (X-Y), and if either X is an add, or if this is only used by an
/// add, transform this into (X+(0-Y)) to promote better reassociation.
static BinaryOperator *
BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
// Convert a subtract into an add and a neg instruction. This allows sub
// instructions to be commuted with other add instructions.
//
// Calculate the negative value of Operand 1 of the sub instruction,
// and set it as the RHS of the add instruction we just made.
//
Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
New->takeName(Sub);
// Everyone now refers to the add instruction.
Sub->replaceAllUsesWith(New);
New->setDebugLoc(Sub->getDebugLoc());
DEBUG(dbgs() << "Negated: " << *New << '\n');
return New;
}
/// If this is a shift of a reassociable multiply or is used by one, change
/// this into a multiply by a constant to assist with further reassociation.
static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
BinaryOperator *Mul =
BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op.
Mul->takeName(Shl);
// Everyone now refers to the mul instruction.
Shl->replaceAllUsesWith(Mul);
Mul->setDebugLoc(Shl->getDebugLoc());
// We can safely preserve the nuw flag in all cases. It's also safe to turn a
// nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
// handling.
bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
if (NSW && NUW)
Mul->setHasNoSignedWrap(true);
Mul->setHasNoUnsignedWrap(NUW);
return Mul;
}
/// Scan backwards and forwards among values with the same rank as element i
/// to see if X exists. If X does not exist, return i. This is useful when
/// scanning for 'x' when we see '-x' because they both get the same rank.
static unsigned FindInOperandList(const SmallVectorImpl<ValueEntry> &Ops,
unsigned i, Value *X) {
unsigned XRank = Ops[i].Rank;
unsigned e = Ops.size();
for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
if (Ops[j].Op == X)
return j;
if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
if (Instruction *I2 = dyn_cast<Instruction>(X))
if (I1->isIdenticalTo(I2))
return j;
}
// Scan backwards.
for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
if (Ops[j].Op == X)
return j;
if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
if (Instruction *I2 = dyn_cast<Instruction>(X))
if (I1->isIdenticalTo(I2))
return j;
}
return i;
}
/// Emit a tree of add instructions, summing Ops together
/// and returning the result. Insert the tree before I.
static Value *EmitAddTreeOfValues(Instruction *I,
SmallVectorImpl<WeakTrackingVH> &Ops) {
if (Ops.size() == 1) return Ops.back();
Value *V1 = Ops.back();
Ops.pop_back();
Value *V2 = EmitAddTreeOfValues(I, Ops);
return CreateAdd(V2, V1, "tmp", I, I);
}
/// If V is an expression tree that is a multiplication sequence,
/// and if this sequence contains a multiply by Factor,
/// remove Factor from the tree and return the new tree.
Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
if (!BO)
return nullptr;
SmallVector<RepeatedValue, 8> Tree;
MadeChange |= LinearizeExprTree(BO, Tree);
SmallVector<ValueEntry, 8> Factors;
Factors.reserve(Tree.size());
for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
RepeatedValue E = Tree[i];
Factors.append(E.second.getZExtValue(),
ValueEntry(getRank(E.first), E.first));
}
bool FoundFactor = false;
bool NeedsNegate = false;
for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
if (Factors[i].Op == Factor) {
FoundFactor = true;
Factors.erase(Factors.begin()+i);
break;
}
// If this is a negative version of this factor, remove it.
if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
if (FC1->getValue() == -FC2->getValue()) {
FoundFactor = NeedsNegate = true;
Factors.erase(Factors.begin()+i);
break;
}
} else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
const APFloat &F1 = FC1->getValueAPF();
APFloat F2(FC2->getValueAPF());
F2.changeSign();
if (F1.compare(F2) == APFloat::cmpEqual) {
FoundFactor = NeedsNegate = true;
Factors.erase(Factors.begin() + i);
break;
}
}
}
}
if (!FoundFactor) {
// Make sure to restore the operands to the expression tree.
RewriteExprTree(BO, Factors);
return nullptr;
}
BasicBlock::iterator InsertPt = ++BO->getIterator();
// If this was just a single multiply, remove the multiply and return the only
// remaining operand.
if (Factors.size() == 1) {
RedoInsts.insert(BO);
V = Factors[0].Op;
} else {
RewriteExprTree(BO, Factors);
V = BO;
}
if (NeedsNegate)
V = CreateNeg(V, "neg", &*InsertPt, BO);
return V;
}
/// If V is a single-use multiply, recursively add its operands as factors,
/// otherwise add V to the list of factors.
///
/// Ops is the top-level list of add operands we're trying to factor.
static void FindSingleUseMultiplyFactors(Value *V,
SmallVectorImpl<Value*> &Factors) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
if (!BO) {
Factors.push_back(V);
return;
}
// Otherwise, add the LHS and RHS to the list of factors.
FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
}
/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
/// This optimizes based on identities. If it can be reduced to a single Value,
/// it is returned, otherwise the Ops list is mutated as necessary.
static Value *OptimizeAndOrXor(unsigned Opcode,
SmallVectorImpl<ValueEntry> &Ops) {
// Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
// If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
// First, check for X and ~X in the operand list.
assert(i < Ops.size());
if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^.
Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
unsigned FoundX = FindInOperandList(Ops, i, X);
if (FoundX != i) {
if (Opcode == Instruction::And) // ...&X&~X = 0
return Constant::getNullValue(X->getType());
if (Opcode == Instruction::Or) // ...|X|~X = -1
return Constant::getAllOnesValue(X->getType());
}
}
// Next, check for duplicate pairs of values, which we assume are next to
// each other, due to our sorting criteria.
assert(i < Ops.size());
if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
if (Opcode == Instruction::And || Opcode == Instruction::Or) {
// Drop duplicate values for And and Or.
Ops.erase(Ops.begin()+i);
--i; --e;
++NumAnnihil;
continue;
}
// Drop pairs of values for Xor.
assert(Opcode == Instruction::Xor);
if (e == 2)
return Constant::getNullValue(Ops[0].Op->getType());
// Y ^ X^X -> Y
Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
i -= 1; e -= 2;
++NumAnnihil;
}
}
return nullptr;
}
/// Helper function of CombineXorOpnd(). It creates a bitwise-and
/// instruction with the given two operands, and return the resulting
/// instruction. There are two special cases: 1) if the constant operand is 0,
/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
/// be returned.
static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
const APInt &ConstOpnd) {
if (ConstOpnd.isNullValue())
return nullptr;
if (ConstOpnd.isAllOnesValue())
return Opnd;
Instruction *I = BinaryOperator::CreateAnd(
Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
InsertBefore);
I->setDebugLoc(InsertBefore->getDebugLoc());
return I;
}
// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
// into "R ^ C", where C would be 0, and R is a symbolic value.
//
// If it was successful, true is returned, and the "R" and "C" is returned
// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
// and both "Res" and "ConstOpnd" remain unchanged.
//
bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt &ConstOpnd, Value *&Res) {
// Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
// = ((x | c1) ^ c1) ^ (c1 ^ c2)
// = (x & ~c1) ^ (c1 ^ c2)
// It is useful only when c1 == c2.
if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isNullValue())
return false;
if (!Opnd1->getValue()->hasOneUse())
return false;
const APInt &C1 = Opnd1->getConstPart();
if (C1 != ConstOpnd)
return false;
Value *X = Opnd1->getSymbolicPart();
Res = createAndInstr(I, X, ~C1);
// ConstOpnd was C2, now C1 ^ C2.
ConstOpnd ^= C1;
if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
RedoInsts.insert(T);
return true;
}
// Helper function of OptimizeXor(). It tries to simplify
// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
// symbolic value.
//
// If it was successful, true is returned, and the "R" and "C" is returned
// via "Res" and "ConstOpnd", respectively (If the entire expression is
// evaluated to a constant, the Res is set to NULL); otherwise, false is
// returned, and both "Res" and "ConstOpnd" remain unchanged.
bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
XorOpnd *Opnd2, APInt &ConstOpnd,
Value *&Res) {
Value *X = Opnd1->getSymbolicPart();
if (X != Opnd2->getSymbolicPart())
return false;
// This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
int DeadInstNum = 1;
if (Opnd1->getValue()->hasOneUse())
DeadInstNum++;
if (Opnd2->getValue()->hasOneUse())
DeadInstNum++;
// Xor-Rule 2:
// (x | c1) ^ (x & c2)
// = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
// = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
// = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
//
if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
if (Opnd2->isOrExpr())
std::swap(Opnd1, Opnd2);
const APInt &C1 = Opnd1->getConstPart();
const APInt &C2 = Opnd2->getConstPart();
APInt C3((~C1) ^ C2);
// Do not increase code size!
if (!C3.isNullValue() && !C3.isAllOnesValue()) {
int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
}
Res = createAndInstr(I, X, C3);
ConstOpnd ^= C1;
} else if (Opnd1->isOrExpr()) {
// Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
//
const APInt &C1 = Opnd1->getConstPart();
const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
// Do not increase code size
if (!C3.isNullValue() && !C3.isAllOnesValue()) {
int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
}
Res = createAndInstr(I, X, C3);
ConstOpnd ^= C3;
} else {
// Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
//
const APInt &C1 = Opnd1->getConstPart();
const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
Res = createAndInstr(I, X, C3);
}
// Put the original operands in the Redo list; hope they will be deleted
// as dead code.
if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
RedoInsts.insert(T);
if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
RedoInsts.insert(T);
return true;
}
/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
/// to a single Value, it is returned, otherwise the Ops list is mutated as
/// necessary.
Value *ReassociatePass::OptimizeXor(Instruction *I,
SmallVectorImpl<ValueEntry> &Ops) {
if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
return V;
if (Ops.size() == 1)
return nullptr;
SmallVector<XorOpnd, 8> Opnds;
SmallVector<XorOpnd*, 8> OpndPtrs;
Type *Ty = Ops[0].Op->getType();
APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
// Step 1: Convert ValueEntry to XorOpnd
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
Value *V = Ops[i].Op;
const APInt *C;
// TODO: Support non-splat vectors.
if (match(V, PatternMatch::m_APInt(C))) {
ConstOpnd ^= *C;
} else {
XorOpnd O(V);
O.setSymbolicRank(getRank(O.getSymbolicPart()));
Opnds.push_back(O);
}
}
// NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
// It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
// the "OpndPtrs" as well. For the similar reason, do not fuse this loop
// with the previous loop --- the iterator of the "Opnds" may be invalidated
// when new elements are added to the vector.
for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
OpndPtrs.push_back(&Opnds[i]);
// Step 2: Sort the Xor-Operands in a way such that the operands containing
// the same symbolic value cluster together. For instance, the input operand
// sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
// ("x | 123", "x & 789", "y & 456").
//
// The purpose is twofold:
// 1) Cluster together the operands sharing the same symbolic-value.
// 2) Operand having smaller symbolic-value-rank is permuted earlier, which
// could potentially shorten crital path, and expose more loop-invariants.
// Note that values' rank are basically defined in RPO order (FIXME).
// So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
// than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
// "z" in the order of X-Y-Z is better than any other orders.
std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(),
[](XorOpnd *LHS, XorOpnd *RHS) {
return LHS->getSymbolicRank() < RHS->getSymbolicRank();
});
// Step 3: Combine adjacent operands
XorOpnd *PrevOpnd = nullptr;
bool Changed = false;
for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
XorOpnd *CurrOpnd = OpndPtrs[i];
// The combined value
Value *CV;
// Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
if (!ConstOpnd.isNullValue() &&
CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
Changed = true;
if (CV)
*CurrOpnd = XorOpnd(CV);
else {
CurrOpnd->Invalidate();
continue;
}
}
if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
PrevOpnd = CurrOpnd;
continue;
}
// step 3.2: When previous and current operands share the same symbolic
// value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
//
if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
// Remove previous operand
PrevOpnd->Invalidate();
if (CV) {
*CurrOpnd = XorOpnd(CV);
PrevOpnd = CurrOpnd;
} else {
CurrOpnd->Invalidate();
PrevOpnd = nullptr;
}
Changed = true;
}
}
// Step 4: Reassemble the Ops
if (Changed) {
Ops.clear();
for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
XorOpnd &O = Opnds[i];
if (O.isInvalid())
continue;
ValueEntry VE(getRank(O.getValue()), O.getValue());
Ops.push_back(VE);
}
if (!ConstOpnd.isNullValue()) {
Value *C = ConstantInt::get(Ty, ConstOpnd);
ValueEntry VE(getRank(C), C);
Ops.push_back(VE);
}
unsigned Sz = Ops.size();
if (Sz == 1)
return Ops.back().Op;
if (Sz == 0) {
assert(ConstOpnd.isNullValue());
return ConstantInt::get(Ty, ConstOpnd);
}
}
return nullptr;
}
/// Optimize a series of operands to an 'add' instruction. This
/// optimizes based on identities. If it can be reduced to a single Value, it
/// is returned, otherwise the Ops list is mutated as necessary.
Value *ReassociatePass::OptimizeAdd(Instruction *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Scan the operand lists looking for X and -X pairs. If we find any, we
// can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
// scan for any
// duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
Value *TheOp = Ops[i].Op;
// Check to see if we've seen this operand before. If so, we factor all
// instances of the operand together. Due to our sorting criteria, we know
// that these need to be next to each other in the vector.
if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
// Rescan the list, remove all instances of this operand from the expr.
unsigned NumFound = 0;
do {
Ops.erase(Ops.begin()+i);
++NumFound;
} while (i != Ops.size() && Ops[i].Op == TheOp);
DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
++NumFactor;
// Insert a new multiply.
Type *Ty = TheOp->getType();
Constant *C = Ty->isIntOrIntVectorTy() ?
ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
// Now that we have inserted a multiply, optimize it. This allows us to
// handle cases that require multiple factoring steps, such as this:
// (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
RedoInsts.insert(Mul);
// If every add operand was a duplicate, return the multiply.
if (Ops.empty())
return Mul;
// Otherwise, we had some input that didn't have the dupe, such as
// "A + A + B" -> "A*2 + B". Add the new multiply to the list of
// things being added by this operation.
Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
--i;
e = Ops.size();
continue;
}
// Check for X and -X or X and ~X in the operand list.
if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isFNeg(TheOp) &&
!BinaryOperator::isNot(TheOp))
continue;
Value *X = nullptr;
if (BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp))
X = BinaryOperator::getNegArgument(TheOp);
else if (BinaryOperator::isNot(TheOp))
X = BinaryOperator::getNotArgument(TheOp);
unsigned FoundX = FindInOperandList(Ops, i, X);
if (FoundX == i)
continue;
// Remove X and -X from the operand list.
if (Ops.size() == 2 &&
(BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp)))
return Constant::getNullValue(X->getType());
// Remove X and ~X from the operand list.
if (Ops.size() == 2 && BinaryOperator::isNot(TheOp))
return Constant::getAllOnesValue(X->getType());
Ops.erase(Ops.begin()+i);
if (i < FoundX)
--FoundX;
else
--i; // Need to back up an extra one.
Ops.erase(Ops.begin()+FoundX);
++NumAnnihil;
--i; // Revisit element.
e -= 2; // Removed two elements.
// if X and ~X we append -1 to the operand list.
if (BinaryOperator::isNot(TheOp)) {
Value *V = Constant::getAllOnesValue(X->getType());
Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
e += 1;
}
}
// Scan the operand list, checking to see if there are any common factors
// between operands. Consider something like A*A+A*B*C+D. We would like to
// reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
// To efficiently find this, we count the number of times a factor occurs
// for any ADD operands that are MULs.
DenseMap<Value*, unsigned> FactorOccurrences;
// Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
// where they are actually the same multiply.
unsigned MaxOcc = 0;
Value *MaxOccVal = nullptr;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
BinaryOperator *BOp =
isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
if (!BOp)
continue;
// Compute all of the factors of this added value.
SmallVector<Value*, 8> Factors;
FindSingleUseMultiplyFactors(BOp, Factors);
assert(Factors.size() > 1 && "Bad linearize!");
// Add one to FactorOccurrences for each unique factor in this op.
SmallPtrSet<Value*, 8> Duplicates;
for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
Value *Factor = Factors[i];
if (!Duplicates.insert(Factor).second)
continue;
unsigned Occ = ++FactorOccurrences[Factor];
if (Occ > MaxOcc) {
MaxOcc = Occ;
MaxOccVal = Factor;
}
// If Factor is a negative constant, add the negated value as a factor
// because we can percolate the negate out. Watch for minint, which
// cannot be positivified.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
if (CI->isNegative() && !CI->isMinValue(true)) {
Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
if (!Duplicates.insert(Factor).second)
continue;
unsigned Occ = ++FactorOccurrences[Factor];
if (Occ > MaxOcc) {
MaxOcc = Occ;
MaxOccVal = Factor;
}
}
} else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
if (CF->isNegative()) {
APFloat F(CF->getValueAPF());
F.changeSign();
Factor = ConstantFP::get(CF->getContext(), F);
if (!Duplicates.insert(Factor).second)
continue;
unsigned Occ = ++FactorOccurrences[Factor];
if (Occ > MaxOcc) {
MaxOcc = Occ;
MaxOccVal = Factor;
}
}
}
}
}
// If any factor occurred more than one time, we can pull it out.
if (MaxOcc > 1) {
DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
++NumFactor;
// Create a new instruction that uses the MaxOccVal twice. If we don't do
// this, we could otherwise run into situations where removing a factor
// from an expression will drop a use of maxocc, and this can cause
// RemoveFactorFromExpression on successive values to behave differently.
Instruction *DummyInst =
I->getType()->isIntOrIntVectorTy()
? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
: BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
SmallVector<WeakTrackingVH, 4> NewMulOps;
for (unsigned i = 0; i != Ops.size(); ++i) {
// Only try to remove factors from expressions we're allowed to.
BinaryOperator *BOp =
isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
if (!BOp)
continue;
if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
// The factorized operand may occur several times. Convert them all in
// one fell swoop.
for (unsigned j = Ops.size(); j != i;) {
--j;
if (Ops[j].Op == Ops[i].Op) {
NewMulOps.push_back(V);
Ops.erase(Ops.begin()+j);
}
}
--i;
}
}
// No need for extra uses anymore.
DummyInst->deleteValue();
unsigned NumAddedValues = NewMulOps.size();
Value *V = EmitAddTreeOfValues(I, NewMulOps);
// Now that we have inserted the add tree, optimize it. This allows us to
// handle cases that require multiple factoring steps, such as this:
// A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
(void)NumAddedValues;
if (Instruction *VI = dyn_cast<Instruction>(V))
RedoInsts.insert(VI);
// Create the multiply.
Instruction *V2 = CreateMul(V, MaxOccVal, "tmp", I, I);
// Rerun associate on the multiply in case the inner expression turned into
// a multiply. We want to make sure that we keep things in canonical form.
RedoInsts.insert(V2);
// If every add operand included the factor (e.g. "A*B + A*C"), then the
// entire result expression is just the multiply "A*(B+C)".
if (Ops.empty())
return V2;
// Otherwise, we had some input that didn't have the factor, such as
// "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
// things being added by this operation.
Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
}
return nullptr;
}
/// \brief Build up a vector of value/power pairs factoring a product.
///
/// Given a series of multiplication operands, build a vector of factors and
/// the powers each is raised to when forming the final product. Sort them in
/// the order of descending power.
///
/// (x*x) -> [(x, 2)]
/// ((x*x)*x) -> [(x, 3)]
/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
///
/// \returns Whether any factors have a power greater than one.
static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
SmallVectorImpl<Factor> &Factors) {
// FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
// Compute the sum of powers of simplifiable factors.
unsigned FactorPowerSum = 0;
for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
Value *Op = Ops[Idx-1].Op;
// Count the number of occurrences of this value.
unsigned Count = 1;
for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
++Count;
// Track for simplification all factors which occur 2 or more times.
if (Count > 1)
FactorPowerSum += Count;
}
// We can only simplify factors if the sum of the powers of our simplifiable
// factors is 4 or higher. When that is the case, we will *always* have
// a simplification. This is an important invariant to prevent cyclicly
// trying to simplify already minimal formations.
if (FactorPowerSum < 4)
return false;
// Now gather the simplifiable factors, removing them from Ops.
FactorPowerSum = 0;
for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
Value *Op = Ops[Idx-1].Op;
// Count the number of occurrences of this value.
unsigned Count = 1;
for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
++Count;
if (Count == 1)
continue;
// Move an even number of occurrences to Factors.
Count &= ~1U;
Idx -= Count;
FactorPowerSum += Count;
Factors.push_back(Factor(Op, Count));
Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
}
// None of the adjustments above should have reduced the sum of factor powers
// below our mininum of '4'.
assert(FactorPowerSum >= 4);
std::stable_sort(Factors.begin(), Factors.end(),
[](const Factor &LHS, const Factor &RHS) {
return LHS.Power > RHS.Power;
});
return true;
}
/// \brief Build a tree of multiplies, computing the product of Ops.
static Value *buildMultiplyTree(IRBuilder<> &Builder,
SmallVectorImpl<Value*> &Ops) {
if (Ops.size() == 1)
return Ops.back();
Value *LHS = Ops.pop_back_val();
do {
if (LHS->getType()->isIntOrIntVectorTy())
LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
else
LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
} while (!Ops.empty());
return LHS;
}
/// \brief Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
///
/// Given a vector of values raised to various powers, where no two values are
/// equal and the powers are sorted in decreasing order, compute the minimal
/// DAG of multiplies to compute the final product, and return that product
/// value.
Value *
ReassociatePass::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
SmallVectorImpl<Factor> &Factors) {
assert(Factors[0].Power);
SmallVector<Value *, 4> OuterProduct;
for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
Idx < Size && Factors[Idx].Power > 0; ++Idx) {
if (Factors[Idx].Power != Factors[LastIdx].Power) {
LastIdx = Idx;
continue;
}
// We want to multiply across all the factors with the same power so that
// we can raise them to that power as a single entity. Build a mini tree
// for that.
SmallVector<Value *, 4> InnerProduct;
InnerProduct.push_back(Factors[LastIdx].Base);
do {
InnerProduct.push_back(Factors[Idx].Base);
++Idx;
} while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
// Reset the base value of the first factor to the new expression tree.
// We'll remove all the factors with the same power in a second pass.
Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
if (Instruction *MI = dyn_cast<Instruction>(M))
RedoInsts.insert(MI);
LastIdx = Idx;
}
// Unique factors with equal powers -- we've folded them into the first one's
// base.
Factors.erase(std::unique(Factors.begin(), Factors.end(),
[](const Factor &LHS, const Factor &RHS) {
return LHS.Power == RHS.Power;
}),
Factors.end());
// Iteratively collect the base of each factor with an add power into the
// outer product, and halve each power in preparation for squaring the
// expression.
for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) {
if (Factors[Idx].Power & 1)
OuterProduct.push_back(Factors[Idx].Base);
Factors[Idx].Power >>= 1;
}
if (Factors[0].Power) {
Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
OuterProduct.push_back(SquareRoot);
OuterProduct.push_back(SquareRoot);
}
if (OuterProduct.size() == 1)
return OuterProduct.front();
Value *V = buildMultiplyTree(Builder, OuterProduct);
return V;
}
Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// We can only optimize the multiplies when there is a chain of more than
// three, such that a balanced tree might require fewer total multiplies.
if (Ops.size() < 4)
return nullptr;
// Try to turn linear trees of multiplies without other uses of the
// intermediate stages into minimal multiply DAGs with perfect sub-expression
// re-use.
SmallVector<Factor, 4> Factors;
if (!collectMultiplyFactors(Ops, Factors))
return nullptr; // All distinct factors, so nothing left for us to do.
IRBuilder<> Builder(I);
// The reassociate transformation for FP operations is performed only
// if unsafe algebra is permitted by FastMathFlags. Propagate those flags
// to the newly generated operations.
if (auto FPI = dyn_cast<FPMathOperator>(I))
Builder.setFastMathFlags(FPI->getFastMathFlags());
Value *V = buildMinimalMultiplyDAG(Builder, Factors);
if (Ops.empty())
return V;
ValueEntry NewEntry = ValueEntry(getRank(V), V);
Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
return nullptr;
}
Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
Constant *Cst = nullptr;
unsigned Opcode = I->getOpcode();
while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
Constant *C = cast<Constant>(Ops.pop_back_val().Op);
Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C;
}
// If there was nothing but constants then we are done.
if (Ops.empty())
return Cst;
// Put the combined constant back at the end of the operand list, except if
// there is no point. For example, an add of 0 gets dropped here, while a
// multiplication by zero turns the whole expression into zero.
if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
return Cst;
Ops.push_back(ValueEntry(0, Cst));
}
if (Ops.size() == 1) return Ops[0].Op;
// Handle destructive annihilation due to identities between elements in the
// argument list here.
unsigned NumOps = Ops.size();
switch (Opcode) {
default: break;
case Instruction::And:
case Instruction::Or:
if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
return Result;
break;
case Instruction::Xor:
if (Value *Result = OptimizeXor(I, Ops))
return Result;
break;
case Instruction::Add:
case Instruction::FAdd:
if (Value *Result = OptimizeAdd(I, Ops))
return Result;
break;
case Instruction::Mul:
case Instruction::FMul:
if (Value *Result = OptimizeMul(I, Ops))
return Result;
break;
}
if (Ops.size() != NumOps)
return OptimizeExpression(I, Ops);
return nullptr;
}
// Remove dead instructions and if any operands are trivially dead add them to
// Insts so they will be removed as well.
void ReassociatePass::RecursivelyEraseDeadInsts(
Instruction *I, SetVector<AssertingVH<Instruction>> &Insts) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
SmallVector<Value *, 4> Ops(I->op_begin(), I->op_end());
ValueRankMap.erase(I);
Insts.remove(I);
RedoInsts.remove(I);
I->eraseFromParent();
for (auto Op : Ops)
if (Instruction *OpInst = dyn_cast<Instruction>(Op))
if (OpInst->use_empty())
Insts.insert(OpInst);
}
/// Zap the given instruction, adding interesting operands to the work list.
void ReassociatePass::EraseInst(Instruction *I) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
// Erase the dead instruction.
ValueRankMap.erase(I);
RedoInsts.remove(I);
I->eraseFromParent();
// Optimize its operands.
SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
// If this is a node in an expression tree, climb to the expression root
// and add that since that's where optimization actually happens.
unsigned Opcode = Op->getOpcode();
while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
Visited.insert(Op).second)
Op = Op->user_back();
RedoInsts.insert(Op);
}
MadeChange = true;
}
// Canonicalize expressions of the following form:
// x + (-Constant * y) -> x - (Constant * y)
// x - (-Constant * y) -> x + (Constant * y)
Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) {
if (!I->hasOneUse() || I->getType()->isVectorTy())
return nullptr;
// Must be a fmul or fdiv instruction.
unsigned Opcode = I->getOpcode();
if (Opcode != Instruction::FMul && Opcode != Instruction::FDiv)
return nullptr;
auto *C0 = dyn_cast<ConstantFP>(I->getOperand(0));
auto *C1 = dyn_cast<ConstantFP>(I->getOperand(1));
// Both operands are constant, let it get constant folded away.
if (C0 && C1)
return nullptr;
ConstantFP *CF = C0 ? C0 : C1;
// Must have one constant operand.
if (!CF)
return nullptr;
// Must be a negative ConstantFP.
if (!CF->isNegative())
return nullptr;
// User must be a binary operator with one or more uses.
Instruction *User = I->user_back();
if (!isa<BinaryOperator>(User) || User->use_empty())
return nullptr;
unsigned UserOpcode = User->getOpcode();
if (UserOpcode != Instruction::FAdd && UserOpcode != Instruction::FSub)
return nullptr;
// Subtraction is not commutative. Explicitly, the following transform is
// not valid: (-Constant * y) - x -> x + (Constant * y)
if (!User->isCommutative() && User->getOperand(1) != I)
return nullptr;
+ // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
+ // resulting subtract will be broken up later. This can get us into an
+ // infinite loop during reassociation.
+ if (UserOpcode == Instruction::FAdd && ShouldBreakUpSubtract(User))
+ return nullptr;
+
// Change the sign of the constant.
APFloat Val = CF->getValueAPF();
Val.changeSign();
I->setOperand(C0 ? 0 : 1, ConstantFP::get(CF->getContext(), Val));
// Canonicalize I to RHS to simplify the next bit of logic. E.g.,
// ((-Const*y) + x) -> (x + (-Const*y)).
if (User->getOperand(0) == I && User->isCommutative())
cast<BinaryOperator>(User)->swapOperands();
Value *Op0 = User->getOperand(0);
Value *Op1 = User->getOperand(1);
BinaryOperator *NI;
switch (UserOpcode) {
default:
llvm_unreachable("Unexpected Opcode!");
case Instruction::FAdd:
NI = BinaryOperator::CreateFSub(Op0, Op1);
NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());
break;
case Instruction::FSub:
NI = BinaryOperator::CreateFAdd(Op0, Op1);
NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());
break;
}
NI->insertBefore(User);
NI->setName(User->getName());
User->replaceAllUsesWith(NI);
NI->setDebugLoc(I->getDebugLoc());
RedoInsts.insert(I);
MadeChange = true;
return NI;
}
/// Inspect and optimize the given instruction. Note that erasing
/// instructions is not allowed.
void ReassociatePass::OptimizeInst(Instruction *I) {
// Only consider operations that we understand.
if (!isa<BinaryOperator>(I))
return;
if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
// If an operand of this shift is a reassociable multiply, or if the shift
// is used by a reassociable multiply or add, turn into a multiply.
if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
(I->hasOneUse() &&
(isReassociableOp(I->user_back(), Instruction::Mul) ||
isReassociableOp(I->user_back(), Instruction::Add)))) {
Instruction *NI = ConvertShiftToMul(I);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
}
// Canonicalize negative constants out of expressions.
if (Instruction *Res = canonicalizeNegConstExpr(I))
I = Res;
// Commute binary operators, to canonicalize the order of their operands.
// This can potentially expose more CSE opportunities, and makes writing other
// transformations simpler.
if (I->isCommutative())
canonicalizeOperands(I);
// Don't optimize floating point instructions that don't have unsafe algebra.
if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra())
return;
// Do not reassociate boolean (i1) expressions. We want to preserve the
// original order of evaluation for short-circuited comparisons that
// SimplifyCFG has folded to AND/OR expressions. If the expression
// is not further optimized, it is likely to be transformed back to a
// short-circuited form for code gen, and the source order may have been
// optimized for the most likely conditions.
if (I->getType()->isIntegerTy(1))
return;
// If this is a subtract instruction which is not already in negate form,
// see if we can convert it to X+-Y.
if (I->getOpcode() == Instruction::Sub) {
if (ShouldBreakUpSubtract(I)) {
Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
} else if (BinaryOperator::isNeg(I)) {
// Otherwise, this is a negation. See if the operand is a multiply tree
// and if this is not an inner node of a multiply tree.
if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::Mul))) {
Instruction *NI = LowerNegateToMultiply(I);
// If the negate was simplified, revisit the users to see if we can
// reassociate further.
for (User *U : NI->users()) {
if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
RedoInsts.insert(Tmp);
}
RedoInsts.insert(I);
MadeChange = true;
I = NI;
}
}
} else if (I->getOpcode() == Instruction::FSub) {
if (ShouldBreakUpSubtract(I)) {
Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
} else if (BinaryOperator::isFNeg(I)) {
// Otherwise, this is a negation. See if the operand is a multiply tree
// and if this is not an inner node of a multiply tree.
if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::FMul))) {
// If the negate was simplified, revisit the users to see if we can
// reassociate further.
Instruction *NI = LowerNegateToMultiply(I);
for (User *U : NI->users()) {
if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
RedoInsts.insert(Tmp);
}
RedoInsts.insert(I);
MadeChange = true;
I = NI;
}
}
}
// If this instruction is an associative binary operator, process it.
if (!I->isAssociative()) return;
BinaryOperator *BO = cast<BinaryOperator>(I);
// If this is an interior node of a reassociable tree, ignore it until we
// get to the root of the tree, to avoid N^2 analysis.
unsigned Opcode = BO->getOpcode();
if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
// During the initial run we will get to the root of the tree.
// But if we get here while we are redoing instructions, there is no
// guarantee that the root will be visited. So Redo later
if (BO->user_back() != BO &&
BO->getParent() == BO->user_back()->getParent())
RedoInsts.insert(BO->user_back());
return;
}
// If this is an add tree that is used by a sub instruction, ignore it
// until we process the subtract.
if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
return;
if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
return;
ReassociateExpression(BO);
}
void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
SmallVector<RepeatedValue, 8> Tree;
MadeChange |= LinearizeExprTree(I, Tree);
SmallVector<ValueEntry, 8> Ops;
Ops.reserve(Tree.size());
for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
RepeatedValue E = Tree[i];
Ops.append(E.second.getZExtValue(),
ValueEntry(getRank(E.first), E.first));
}
DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
// Now that we have linearized the tree to a list and have gathered all of
// the operands and their ranks, sort the operands by their rank. Use a
// stable_sort so that values with equal ranks will have their relative
// positions maintained (and so the compiler is deterministic). Note that
// this sorts so that the highest ranking values end up at the beginning of
// the vector.
std::stable_sort(Ops.begin(), Ops.end());
// Now that we have the expression tree in a convenient
// sorted form, optimize it globally if possible.
if (Value *V = OptimizeExpression(I, Ops)) {
if (V == I)
// Self-referential expression in unreachable code.
return;
// This expression tree simplified to something that isn't a tree,
// eliminate it.
DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
I->replaceAllUsesWith(V);
if (Instruction *VI = dyn_cast<Instruction>(V))
VI->setDebugLoc(I->getDebugLoc());
RedoInsts.insert(I);
++NumAnnihil;
return;
}
// We want to sink immediates as deeply as possible except in the case where
// this is a multiply tree used only by an add, and the immediate is a -1.
// In this case we reassociate to put the negation on the outside so that we
// can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
if (I->hasOneUse()) {
if (I->getOpcode() == Instruction::Mul &&
cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
isa<ConstantInt>(Ops.back().Op) &&
cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
ValueEntry Tmp = Ops.pop_back_val();
Ops.insert(Ops.begin(), Tmp);
} else if (I->getOpcode() == Instruction::FMul &&
cast<Instruction>(I->user_back())->getOpcode() ==
Instruction::FAdd &&
isa<ConstantFP>(Ops.back().Op) &&
cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
ValueEntry Tmp = Ops.pop_back_val();
Ops.insert(Ops.begin(), Tmp);
}
}
DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
if (Ops.size() == 1) {
if (Ops[0].Op == I)
// Self-referential expression in unreachable code.
return;
// This expression tree simplified to something that isn't a tree,
// eliminate it.
I->replaceAllUsesWith(Ops[0].Op);
if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
OI->setDebugLoc(I->getDebugLoc());
RedoInsts.insert(I);
return;
}
// Now that we ordered and optimized the expressions, splat them back into
// the expression tree, removing any unneeded nodes.
RewriteExprTree(I, Ops);
}
PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
// Get the functions basic blocks in Reverse Post Order. This order is used by
// BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
// blocks (it has been seen that the analysis in this pass could hang when
// analysing dead basic blocks).
ReversePostOrderTraversal<Function *> RPOT(&F);
// Calculate the rank map for F.
BuildRankMap(F, RPOT);
MadeChange = false;
// Traverse the same blocks that was analysed by BuildRankMap.
for (BasicBlock *BI : RPOT) {
assert(RankMap.count(&*BI) && "BB should be ranked.");
// Optimize every instruction in the basic block.
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
if (isInstructionTriviallyDead(&*II)) {
EraseInst(&*II++);
} else {
OptimizeInst(&*II);
assert(II->getParent() == &*BI && "Moved to a different block!");
++II;
}
// Make a copy of all the instructions to be redone so we can remove dead
// instructions.
SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
// Iterate over all instructions to be reevaluated and remove trivially dead
// instructions. If any operand of the trivially dead instruction becomes
// dead mark it for deletion as well. Continue this process until all
// trivially dead instructions have been removed.
while (!ToRedo.empty()) {
Instruction *I = ToRedo.pop_back_val();
if (isInstructionTriviallyDead(I)) {
RecursivelyEraseDeadInsts(I, ToRedo);
MadeChange = true;
}
}
// Now that we have removed dead instructions, we can reoptimize the
// remaining instructions.
while (!RedoInsts.empty()) {
Instruction *I = RedoInsts.pop_back_val();
if (isInstructionTriviallyDead(I))
EraseInst(I);
else
OptimizeInst(I);
}
}
// We are done with the rank map.
RankMap.clear();
ValueRankMap.clear();
if (MadeChange) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
return PA;
}
return PreservedAnalyses::all();
}
namespace {
class ReassociateLegacyPass : public FunctionPass {
ReassociatePass Impl;
public:
static char ID; // Pass identification, replacement for typeid
ReassociateLegacyPass() : FunctionPass(ID) {
initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
FunctionAnalysisManager DummyFAM;
auto PA = Impl.run(F, DummyFAM);
return !PA.areAllPreserved();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addPreserved<GlobalsAAWrapperPass>();
}
};
}
char ReassociateLegacyPass::ID = 0;
INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
"Reassociate expressions", false, false)
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() {
return new ReassociateLegacyPass();
}
Index: head/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
===================================================================
--- head/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp (revision 322854)
+++ head/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp (revision 322855)
@@ -1,833 +1,834 @@
//===- CloneFunction.cpp - Clone a function into another function ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the CloneFunctionInto interface, which is used as the
// low-level function cloner. This is used by the CloneFunction and function
// inliner to do the dirty work of copying the body of a function around.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <map>
using namespace llvm;
/// See comments in Cloning.h.
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
ClonedCodeInfo *CodeInfo,
DebugInfoFinder *DIFinder) {
DenseMap<const MDNode *, MDNode *> Cache;
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
Module *TheModule = F ? F->getParent() : nullptr;
// Loop over all instructions, and copy them over.
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
if (DIFinder && TheModule) {
if (auto *DDI = dyn_cast<DbgDeclareInst>(II))
DIFinder->processDeclare(*TheModule, DDI);
else if (auto *DVI = dyn_cast<DbgValueInst>(II))
DIFinder->processValue(*TheModule, DVI);
if (auto DbgLoc = II->getDebugLoc())
DIFinder->processLocation(*TheModule, DbgLoc.get());
}
Instruction *NewInst = II->clone();
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
VMap[&*II] = NewInst; // Add instruction map to value.
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
hasStaticAllocas = true;
else
hasDynamicAllocas = true;
}
}
if (CodeInfo) {
CodeInfo->ContainsCalls |= hasCalls;
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
BB != &BB->getParent()->getEntryBlock();
}
return NewBB;
}
// Clone OldFunc into NewFunc, transforming the old arguments into references to
// VMap values.
//
void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
for (const Argument &I : OldFunc->args())
assert(VMap.count(&I) && "No mapping from source argument specified!");
#endif
// Copy all attributes other than those stored in the AttributeList. We need
// to remap the parameter indices of the AttributeList.
AttributeList NewAttrs = NewFunc->getAttributes();
NewFunc->copyAttributesFrom(OldFunc);
NewFunc->setAttributes(NewAttrs);
// Fix up the personality function that got copied over.
if (OldFunc->hasPersonalityFn())
NewFunc->setPersonalityFn(
MapValue(OldFunc->getPersonalityFn(), VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer));
SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size());
AttributeList OldAttrs = OldFunc->getAttributes();
// Clone any argument attributes that are present in the VMap.
for (const Argument &OldArg : OldFunc->args()) {
if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
NewArgAttrs[NewArg->getArgNo()] =
OldAttrs.getParamAttributes(OldArg.getArgNo());
}
}
NewFunc->setAttributes(
AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
OldAttrs.getRetAttributes(), NewArgAttrs));
bool MustCloneSP =
OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent();
DISubprogram *SP = OldFunc->getSubprogram();
if (SP) {
assert(!MustCloneSP || ModuleLevelChanges);
// Add mappings for some DebugInfo nodes that we don't want duplicated
// even if they're distinct.
auto &MD = VMap.MD();
MD[SP->getUnit()].reset(SP->getUnit());
MD[SP->getType()].reset(SP->getType());
MD[SP->getFile()].reset(SP->getFile());
// If we're not cloning into the same module, no need to clone the
// subprogram
if (!MustCloneSP)
MD[SP].reset(SP);
}
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
OldFunc->getAllMetadata(MDs);
for (auto MD : MDs) {
NewFunc->addMetadata(
MD.first,
*MapMetadata(MD.second, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer));
}
// When we remap instructions, we want to avoid duplicating inlined
// DISubprograms, so record all subprograms we find as we duplicate
// instructions and then freeze them in the MD map.
// We also record information about dbg.value and dbg.declare to avoid
// duplicating the types.
DebugInfoFinder DIFinder;
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
// recursive functions into themselves.
//
for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
BI != BE; ++BI) {
const BasicBlock &BB = *BI;
// Create a new basic block and copy instructions into it!
BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
SP ? &DIFinder : nullptr);
// Add basic block mapping.
VMap[&BB] = CBB;
// It is only legal to clone a function if a block address within that
// function is never referenced outside of the function. Given that, we
// want to map block addresses from the old function to block addresses in
// the clone. (This is different from the generic ValueMapper
// implementation, which generates an invalid blockaddress when
// cloning a function.)
if (BB.hasAddressTaken()) {
Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
const_cast<BasicBlock*>(&BB));
VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
}
// Note return instructions for the caller.
if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
Returns.push_back(RI);
}
for (DISubprogram *ISP : DIFinder.subprograms()) {
if (ISP != SP) {
VMap.MD()[ISP].reset(ISP);
}
}
for (auto *Type : DIFinder.types()) {
VMap.MD()[Type].reset(Type);
}
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
for (Function::iterator BB =
cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
BE = NewFunc->end();
BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
for (Instruction &II : *BB)
RemapInstruction(&II, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer);
}
/// Return a copy of the specified function and add it to that function's
/// module. Also, any references specified in the VMap are changed to refer to
/// their mapped value instead of the original one. If any of the arguments to
/// the function are in the VMap, the arguments are deleted from the resultant
/// function. The VMap is updated to include mappings from all of the
/// instructions and basicblocks in the function from their old to new values.
///
Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
ClonedCodeInfo *CodeInfo) {
std::vector<Type*> ArgTypes;
// The user might be deleting arguments to the function by specifying them in
// the VMap. If so, we need to not add the arguments to the arg ty vector
//
for (const Argument &I : F->args())
if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet?
ArgTypes.push_back(I.getType());
// Create a new function type...
FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
ArgTypes, F->getFunctionType()->isVarArg());
// Create the new function...
Function *NewF =
Function::Create(FTy, F->getLinkage(), F->getName(), F->getParent());
// Loop over the arguments, copying the names of the mapped arguments over...
Function::arg_iterator DestI = NewF->arg_begin();
for (const Argument & I : F->args())
if (VMap.count(&I) == 0) { // Is this argument preserved?
DestI->setName(I.getName()); // Copy the name over...
VMap[&I] = &*DestI++; // Add mapping to VMap
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "",
CodeInfo);
return NewF;
}
namespace {
/// This is a private class used to implement CloneAndPruneFunctionInto.
struct PruningFunctionCloner {
Function *NewFunc;
const Function *OldFunc;
ValueToValueMapTy &VMap;
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap, bool moduleLevelChanges,
const char *nameSuffix, ClonedCodeInfo *codeInfo)
: NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
CodeInfo(codeInfo) {}
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
void CloneBlock(const BasicBlock *BB,
BasicBlock::const_iterator StartingInst,
std::vector<const BasicBlock*> &ToClone);
};
}
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
BasicBlock::const_iterator StartingInst,
std::vector<const BasicBlock*> &ToClone){
WeakTrackingVH &BBEntry = VMap[BB];
// Have we already cloned this block?
if (BBEntry) return;
// Nope, clone it now.
BasicBlock *NewBB;
BBEntry = NewBB = BasicBlock::Create(BB->getContext());
if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
// It is only legal to clone a function if a block address within that
// function is never referenced outside of the function. Given that, we
// want to map block addresses from the old function to block addresses in
// the clone. (This is different from the generic ValueMapper
// implementation, which generates an invalid blockaddress when
// cloning a function.)
//
// Note that we don't need to fix the mapping for unreachable blocks;
// the default mapping there is safe.
if (BB->hasAddressTaken()) {
Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
const_cast<BasicBlock*>(BB));
VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
}
bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
// Loop over all instructions, and copy them over, DCE'ing as we go. This
// loop doesn't include the terminator.
for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
II != IE; ++II) {
Instruction *NewInst = II->clone();
// Eagerly remap operands to the newly cloned instruction, except for PHI
// nodes for which we defer processing until we update the CFG.
if (!isa<PHINode>(NewInst)) {
RemapInstruction(NewInst, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
// the basic block.
if (Value *V =
SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
// On the off-chance that this simplifies to an instruction in the old
// function, map it back into the new function.
- if (Value *MappedV = VMap.lookup(V))
- V = MappedV;
+ if (NewFunc != OldFunc)
+ if (Value *MappedV = VMap.lookup(V))
+ V = MappedV;
if (!NewInst->mayHaveSideEffects()) {
VMap[&*II] = V;
NewInst->deleteValue();
continue;
}
}
}
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
VMap[&*II] = NewInst; // Add instruction map to value.
NewBB->getInstList().push_back(NewInst);
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (CodeInfo)
if (auto CS = ImmutableCallSite(&*II))
if (CS.hasOperandBundles())
CodeInfo->OperandBundleCallSites.push_back(NewInst);
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
hasStaticAllocas = true;
else
hasDynamicAllocas = true;
}
}
// Finally, clone over the terminator.
const TerminatorInst *OldTI = BB->getTerminator();
bool TerminatorDone = false;
if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
if (BI->isConditional()) {
// If the condition was a known constant in the callee...
ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
// Or is a known constant in the caller...
if (!Cond) {
Value *V = VMap.lookup(BI->getCondition());
Cond = dyn_cast_or_null<ConstantInt>(V);
}
// Constant fold to uncond branch!
if (Cond) {
BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
TerminatorDone = true;
}
}
} else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
// If switching on a value known constant in the caller.
ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
if (!Cond) { // Or known constant after constant prop in the callee...
Value *V = VMap.lookup(SI->getCondition());
Cond = dyn_cast_or_null<ConstantInt>(V);
}
if (Cond) { // Constant fold to uncond branch!
SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond);
BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
TerminatorDone = true;
}
}
if (!TerminatorDone) {
Instruction *NewInst = OldTI->clone();
if (OldTI->hasName())
NewInst->setName(OldTI->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
VMap[OldTI] = NewInst; // Add instruction map to value.
if (CodeInfo)
if (auto CS = ImmutableCallSite(OldTI))
if (CS.hasOperandBundles())
CodeInfo->OperandBundleCallSites.push_back(NewInst);
// Recursively clone any reachable successor blocks.
const TerminatorInst *TI = BB->getTerminator();
for (const BasicBlock *Succ : TI->successors())
ToClone.push_back(Succ);
}
if (CodeInfo) {
CodeInfo->ContainsCalls |= hasCalls;
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
BB != &BB->getParent()->front();
}
}
/// This works like CloneAndPruneFunctionInto, except that it does not clone the
/// entire function. Instead it starts at an instruction provided by the caller
/// and copies (and prunes) only the code reachable from that instruction.
void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo) {
assert(NameSuffix && "NameSuffix cannot be null!");
ValueMapTypeRemapper *TypeMapper = nullptr;
ValueMaterializer *Materializer = nullptr;
#ifndef NDEBUG
// If the cloning starts at the beginning of the function, verify that
// the function arguments are mapped.
if (!StartingInst)
for (const Argument &II : OldFunc->args())
assert(VMap.count(&II) && "No mapping from source argument specified!");
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
NameSuffix, CodeInfo);
const BasicBlock *StartingBB;
if (StartingInst)
StartingBB = StartingInst->getParent();
else {
StartingBB = &OldFunc->getEntryBlock();
StartingInst = &StartingBB->front();
}
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
while (!CloneWorklist.empty()) {
const BasicBlock *BB = CloneWorklist.back();
CloneWorklist.pop_back();
PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
}
// Loop over all of the basic blocks in the old function. If the block was
// reachable, we have cloned it and the old block is now in the value map:
// insert it into the new function in the right order. If not, ignore it.
//
// Defer PHI resolution until rest of function is resolved.
SmallVector<const PHINode*, 16> PHIToResolve;
for (const BasicBlock &BI : *OldFunc) {
Value *V = VMap.lookup(&BI);
BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
if (!NewBB) continue; // Dead block.
// Add the new block to the new function.
NewFunc->getBasicBlockList().push_back(NewBB);
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) {
// PHI nodes may have been remapped to non-PHI nodes by the caller or
// during the cloning process.
if (const PHINode *PN = dyn_cast<PHINode>(I)) {
if (isa<PHINode>(VMap[PN]))
PHIToResolve.push_back(PN);
else
break;
} else {
break;
}
}
// Finally, remap the terminator instructions, as those can't be remapped
// until all BBs are mapped.
RemapInstruction(NewBB->getTerminator(), VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer);
}
// Defer PHI resolution until rest of function is resolved, PHI resolution
// requires the CFG to be up-to-date.
for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
const PHINode *OPN = PHIToResolve[phino];
unsigned NumPreds = OPN->getNumIncomingValues();
const BasicBlock *OldBB = OPN->getParent();
BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
// Map operands for blocks that are live and remove operands for blocks
// that are dead.
for (; phino != PHIToResolve.size() &&
PHIToResolve[phino]->getParent() == OldBB; ++phino) {
OPN = PHIToResolve[phino];
PHINode *PN = cast<PHINode>(VMap[OPN]);
for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
Value *V = VMap.lookup(PN->getIncomingBlock(pred));
if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
PN->setIncomingBlock(pred, MappedBlock);
} else {
PN->removeIncomingValue(pred, false);
--pred; // Revisit the next entry.
--e;
}
}
}
// The loop above has removed PHI entries for those blocks that are dead
// and has updated others. However, if a block is live (i.e. copied over)
// but its terminator has been changed to not go to this block, then our
// phi nodes will have invalid entries. Update the PHI nodes in this
// case.
PHINode *PN = cast<PHINode>(NewBB->begin());
NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
if (NumPreds != PN->getNumIncomingValues()) {
assert(NumPreds < PN->getNumIncomingValues());
// Count how many times each predecessor comes to this block.
std::map<BasicBlock*, unsigned> PredCount;
for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
PI != E; ++PI)
--PredCount[*PI];
// Figure out how many entries to remove from each PHI.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
++PredCount[PN->getIncomingBlock(i)];
// At this point, the excess predecessor entries are positive in the
// map. Loop over all of the PHIs and remove excess predecessor
// entries.
BasicBlock::iterator I = NewBB->begin();
for (; (PN = dyn_cast<PHINode>(I)); ++I) {
for (const auto &PCI : PredCount) {
BasicBlock *Pred = PCI.first;
for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove)
PN->removeIncomingValue(Pred, false);
}
}
}
// If the loops above have made these phi nodes have 0 or 1 operand,
// replace them with undef or the input value. We must do this for
// correctness, because 0-operand phis are not valid.
PN = cast<PHINode>(NewBB->begin());
if (PN->getNumIncomingValues() == 0) {
BasicBlock::iterator I = NewBB->begin();
BasicBlock::const_iterator OldI = OldBB->begin();
while ((PN = dyn_cast<PHINode>(I++))) {
Value *NV = UndefValue::get(PN->getType());
PN->replaceAllUsesWith(NV);
assert(VMap[&*OldI] == PN && "VMap mismatch");
VMap[&*OldI] = NV;
PN->eraseFromParent();
++OldI;
}
}
}
// Make a second pass over the PHINodes now that all of them have been
// remapped into the new function, simplifying the PHINode and performing any
// recursive simplifications exposed. This will transparently update the
// WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
// two PHINodes, the iteration over the old PHIs remains valid, and the
// mapping will just map us to the new node (which may not even be a PHI
// node).
const DataLayout &DL = NewFunc->getParent()->getDataLayout();
SmallSetVector<const Value *, 8> Worklist;
for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
Worklist.insert(PHIToResolve[Idx]);
// Note that we must test the size on each iteration, the worklist can grow.
for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
const Value *OrigV = Worklist[Idx];
auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
if (!I)
continue;
// Skip over non-intrinsic callsites, we don't want to remove any nodes from
// the CGSCC.
CallSite CS = CallSite(I);
if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isIntrinsic())
continue;
// See if this instruction simplifies.
Value *SimpleV = SimplifyInstruction(I, DL);
if (!SimpleV)
continue;
// Stash away all the uses of the old instruction so we can check them for
// recursive simplifications after a RAUW. This is cheaper than checking all
// uses of To on the recursive step in most cases.
for (const User *U : OrigV->users())
Worklist.insert(cast<Instruction>(U));
// Replace the instruction with its simplified value.
I->replaceAllUsesWith(SimpleV);
// If the original instruction had no side effects, remove it.
if (isInstructionTriviallyDead(I))
I->eraseFromParent();
else
VMap[OrigV] = I;
}
// Now that the inlined function body has been fully constructed, go through
// and zap unconditional fall-through branches. This happens all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
Function::iterator I = Begin;
while (I != NewFunc->end()) {
// Check if this block has become dead during inlining or other
// simplifications. Note that the first block will appear dead, as it has
// not yet been wired up properly.
if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
I->getSinglePredecessor() == &*I)) {
BasicBlock *DeadBB = &*I++;
DeleteDeadBlock(DeadBB);
continue;
}
// We need to simplify conditional branches and switches with a constant
// operand. We try to prune these out when cloning, but if the
// simplification required looking through PHI nodes, those are only
// available after forming the full basic block. That may leave some here,
// and we still want to prune the dead code as early as possible.
ConstantFoldTerminator(&*I);
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) { ++I; continue; }
BasicBlock *Dest = BI->getSuccessor(0);
if (!Dest->getSinglePredecessor()) {
++I; continue;
}
// We shouldn't be able to get single-entry PHI nodes here, as instsimplify
// above should have zapped all of them..
assert(!isa<PHINode>(Dest->begin()));
// We know all single-entry PHI nodes in the inlined function have been
// removed, so we just need to splice the blocks.
BI->eraseFromParent();
// Make all PHI nodes that referred to Dest now refer to I as their source.
Dest->replaceAllUsesWith(&*I);
// Move all the instructions in the succ to the pred.
I->getInstList().splice(I->end(), Dest->getInstList());
// Remove the dest block.
Dest->eraseFromParent();
// Do not increment I, iteratively merge all things this block branches to.
}
// Make a final pass over the basic blocks from the old function to gather
// any return instructions which survived folding. We have to do this here
// because we can iteratively remove and merge returns above.
for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
E = NewFunc->end();
I != E; ++I)
if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
Returns.push_back(RI);
}
/// This works exactly like CloneFunctionInto,
/// except that it does some simple constant prop and DCE on the fly. The
/// effect of this is to copy significantly less code in cases where (for
/// example) a function call with constant arguments is inlined, and those
/// constant arguments cause a significant amount of code in the callee to be
/// dead. Since this doesn't produce an exact copy of the input, it can't be
/// used for things like CloneFunction or CloneModule.
void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
Instruction *TheCall) {
CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
}
/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
void llvm::remapInstructionsInBlocks(
const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) {
// Rewrite the code to refer to itself.
for (auto *BB : Blocks)
for (auto &Inst : *BB)
RemapInstruction(&Inst, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
}
/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
/// Blocks.
///
/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
/// \p LoopDomBB. Insert the new blocks before block specified in \p Before.
Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
Loop *OrigLoop, ValueToValueMapTy &VMap,
const Twine &NameSuffix, LoopInfo *LI,
DominatorTree *DT,
SmallVectorImpl<BasicBlock *> &Blocks) {
assert(OrigLoop->getSubLoops().empty() &&
"Loop to be cloned cannot have inner loop");
Function *F = OrigLoop->getHeader()->getParent();
Loop *ParentLoop = OrigLoop->getParentLoop();
Loop *NewLoop = new Loop();
if (ParentLoop)
ParentLoop->addChildLoop(NewLoop);
else
LI->addTopLevelLoop(NewLoop);
BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
assert(OrigPH && "No preheader");
BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
// To rename the loop PHIs.
VMap[OrigPH] = NewPH;
Blocks.push_back(NewPH);
// Update LoopInfo.
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(NewPH, *LI);
// Update DominatorTree.
DT->addNewBlock(NewPH, LoopDomBB);
for (BasicBlock *BB : OrigLoop->getBlocks()) {
BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
VMap[BB] = NewBB;
// Update LoopInfo.
NewLoop->addBasicBlockToLoop(NewBB, *LI);
// Add DominatorTree node. After seeing all blocks, update to correct IDom.
DT->addNewBlock(NewBB, NewPH);
Blocks.push_back(NewBB);
}
for (BasicBlock *BB : OrigLoop->getBlocks()) {
// Update DominatorTree.
BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]),
cast<BasicBlock>(VMap[IDomBB]));
}
// Move them physically from the end of the block list.
F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
NewPH);
F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
NewLoop->getHeader()->getIterator(), F->end());
return NewLoop;
}
/// \brief Duplicate non-Phi instructions from the beginning of block up to
/// StopAt instruction into a split block between BB and its predecessor.
BasicBlock *
llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
Instruction *StopAt,
ValueToValueMapTy &ValueMapping) {
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
BasicBlock *NewBB = SplitEdge(PredBB, BB);
NewBB->setName(PredBB->getName() + ".split");
Instruction *NewTerm = NewBB->getTerminator();
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; StopAt != &*BI; ++BI) {
Instruction *New = BI->clone();
New->setName(BI->getName());
New->insertBefore(NewTerm);
ValueMapping[&*BI] = New;
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
auto I = ValueMapping.find(Inst);
if (I != ValueMapping.end())
New->setOperand(i, I->second);
}
}
return NewBB;
}
Index: head/contrib/llvm/tools/clang/include/clang/AST/DeclCXX.h
===================================================================
--- head/contrib/llvm/tools/clang/include/clang/AST/DeclCXX.h (revision 322854)
+++ head/contrib/llvm/tools/clang/include/clang/AST/DeclCXX.h (revision 322855)
@@ -1,3704 +1,3774 @@
//===-- DeclCXX.h - Classes for representing C++ declarations -*- C++ -*-=====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Defines the C++ Decl subclasses, other than those for templates
/// (found in DeclTemplate.h) and friends (in DeclFriend.h).
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_DECLCXX_H
#define LLVM_CLANG_AST_DECLCXX_H
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTUnresolvedSet.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Expr.h"
#include "clang/AST/LambdaCapture.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/Support/Compiler.h"
namespace clang {
class ClassTemplateDecl;
class ClassTemplateSpecializationDecl;
class ConstructorUsingShadowDecl;
class CXXBasePath;
class CXXBasePaths;
class CXXConstructorDecl;
class CXXConversionDecl;
class CXXDestructorDecl;
class CXXMethodDecl;
class CXXRecordDecl;
class CXXMemberLookupCriteria;
class CXXFinalOverriderMap;
class CXXIndirectPrimaryBaseSet;
class FriendDecl;
class LambdaExpr;
class UsingDecl;
/// \brief Represents any kind of function declaration, whether it is a
/// concrete function or a function template.
class AnyFunctionDecl {
NamedDecl *Function;
AnyFunctionDecl(NamedDecl *ND) : Function(ND) { }
public:
AnyFunctionDecl(FunctionDecl *FD) : Function(FD) { }
AnyFunctionDecl(FunctionTemplateDecl *FTD);
/// \brief Implicily converts any function or function template into a
/// named declaration.
operator NamedDecl *() const { return Function; }
/// \brief Retrieve the underlying function or function template.
NamedDecl *get() const { return Function; }
static AnyFunctionDecl getFromNamedDecl(NamedDecl *ND) {
return AnyFunctionDecl(ND);
}
};
} // end namespace clang
namespace llvm {
// Provide PointerLikeTypeTraits for non-cvr pointers.
template<>
class PointerLikeTypeTraits< ::clang::AnyFunctionDecl> {
public:
static inline void *getAsVoidPointer(::clang::AnyFunctionDecl F) {
return F.get();
}
static inline ::clang::AnyFunctionDecl getFromVoidPointer(void *P) {
return ::clang::AnyFunctionDecl::getFromNamedDecl(
static_cast< ::clang::NamedDecl*>(P));
}
enum { NumLowBitsAvailable = 2 };
};
} // end namespace llvm
namespace clang {
/// \brief Represents an access specifier followed by colon ':'.
///
/// An objects of this class represents sugar for the syntactic occurrence
/// of an access specifier followed by a colon in the list of member
/// specifiers of a C++ class definition.
///
/// Note that they do not represent other uses of access specifiers,
/// such as those occurring in a list of base specifiers.
/// Also note that this class has nothing to do with so-called
/// "access declarations" (C++98 11.3 [class.access.dcl]).
class AccessSpecDecl : public Decl {
virtual void anchor();
/// \brief The location of the ':'.
SourceLocation ColonLoc;
AccessSpecDecl(AccessSpecifier AS, DeclContext *DC,
SourceLocation ASLoc, SourceLocation ColonLoc)
: Decl(AccessSpec, DC, ASLoc), ColonLoc(ColonLoc) {
setAccess(AS);
}
AccessSpecDecl(EmptyShell Empty)
: Decl(AccessSpec, Empty) { }
public:
/// \brief The location of the access specifier.
SourceLocation getAccessSpecifierLoc() const { return getLocation(); }
/// \brief Sets the location of the access specifier.
void setAccessSpecifierLoc(SourceLocation ASLoc) { setLocation(ASLoc); }
/// \brief The location of the colon following the access specifier.
SourceLocation getColonLoc() const { return ColonLoc; }
/// \brief Sets the location of the colon.
void setColonLoc(SourceLocation CLoc) { ColonLoc = CLoc; }
SourceRange getSourceRange() const override LLVM_READONLY {
return SourceRange(getAccessSpecifierLoc(), getColonLoc());
}
static AccessSpecDecl *Create(ASTContext &C, AccessSpecifier AS,
DeclContext *DC, SourceLocation ASLoc,
SourceLocation ColonLoc) {
return new (C, DC) AccessSpecDecl(AS, DC, ASLoc, ColonLoc);
}
static AccessSpecDecl *CreateDeserialized(ASTContext &C, unsigned ID);
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == AccessSpec; }
};
/// \brief Represents a base class of a C++ class.
///
/// Each CXXBaseSpecifier represents a single, direct base class (or
/// struct) of a C++ class (or struct). It specifies the type of that
/// base class, whether it is a virtual or non-virtual base, and what
/// level of access (public, protected, private) is used for the
/// derivation. For example:
///
/// \code
/// class A { };
/// class B { };
/// class C : public virtual A, protected B { };
/// \endcode
///
/// In this code, C will have two CXXBaseSpecifiers, one for "public
/// virtual A" and the other for "protected B".
class CXXBaseSpecifier {
/// \brief The source code range that covers the full base
/// specifier, including the "virtual" (if present) and access
/// specifier (if present).
SourceRange Range;
/// \brief The source location of the ellipsis, if this is a pack
/// expansion.
SourceLocation EllipsisLoc;
/// \brief Whether this is a virtual base class or not.
unsigned Virtual : 1;
/// \brief Whether this is the base of a class (true) or of a struct (false).
///
/// This determines the mapping from the access specifier as written in the
/// source code to the access specifier used for semantic analysis.
unsigned BaseOfClass : 1;
/// \brief Access specifier as written in the source code (may be AS_none).
///
/// The actual type of data stored here is an AccessSpecifier, but we use
/// "unsigned" here to work around a VC++ bug.
unsigned Access : 2;
/// \brief Whether the class contains a using declaration
/// to inherit the named class's constructors.
unsigned InheritConstructors : 1;
/// \brief The type of the base class.
///
/// This will be a class or struct (or a typedef of such). The source code
/// range does not include the \c virtual or the access specifier.
TypeSourceInfo *BaseTypeInfo;
public:
CXXBaseSpecifier() { }
CXXBaseSpecifier(SourceRange R, bool V, bool BC, AccessSpecifier A,
TypeSourceInfo *TInfo, SourceLocation EllipsisLoc)
: Range(R), EllipsisLoc(EllipsisLoc), Virtual(V), BaseOfClass(BC),
Access(A), InheritConstructors(false), BaseTypeInfo(TInfo) { }
/// \brief Retrieves the source range that contains the entire base specifier.
SourceRange getSourceRange() const LLVM_READONLY { return Range; }
SourceLocation getLocStart() const LLVM_READONLY { return Range.getBegin(); }
SourceLocation getLocEnd() const LLVM_READONLY { return Range.getEnd(); }
/// \brief Get the location at which the base class type was written.
SourceLocation getBaseTypeLoc() const LLVM_READONLY {
return BaseTypeInfo->getTypeLoc().getLocStart();
}
/// \brief Determines whether the base class is a virtual base class (or not).
bool isVirtual() const { return Virtual; }
/// \brief Determine whether this base class is a base of a class declared
/// with the 'class' keyword (vs. one declared with the 'struct' keyword).
bool isBaseOfClass() const { return BaseOfClass; }
/// \brief Determine whether this base specifier is a pack expansion.
bool isPackExpansion() const { return EllipsisLoc.isValid(); }
/// \brief Determine whether this base class's constructors get inherited.
bool getInheritConstructors() const { return InheritConstructors; }
/// \brief Set that this base class's constructors should be inherited.
void setInheritConstructors(bool Inherit = true) {
InheritConstructors = Inherit;
}
/// \brief For a pack expansion, determine the location of the ellipsis.
SourceLocation getEllipsisLoc() const {
return EllipsisLoc;
}
/// \brief Returns the access specifier for this base specifier.
///
/// This is the actual base specifier as used for semantic analysis, so
/// the result can never be AS_none. To retrieve the access specifier as
/// written in the source code, use getAccessSpecifierAsWritten().
AccessSpecifier getAccessSpecifier() const {
if ((AccessSpecifier)Access == AS_none)
return BaseOfClass? AS_private : AS_public;
else
return (AccessSpecifier)Access;
}
/// \brief Retrieves the access specifier as written in the source code
/// (which may mean that no access specifier was explicitly written).
///
/// Use getAccessSpecifier() to retrieve the access specifier for use in
/// semantic analysis.
AccessSpecifier getAccessSpecifierAsWritten() const {
return (AccessSpecifier)Access;
}
/// \brief Retrieves the type of the base class.
///
/// This type will always be an unqualified class type.
QualType getType() const {
return BaseTypeInfo->getType().getUnqualifiedType();
}
/// \brief Retrieves the type and source location of the base class.
TypeSourceInfo *getTypeSourceInfo() const { return BaseTypeInfo; }
};
/// \brief Represents a C++ struct/union/class.
class CXXRecordDecl : public RecordDecl {
friend void TagDecl::startDefinition();
/// Values used in DefinitionData fields to represent special members.
enum SpecialMemberFlags {
SMF_DefaultConstructor = 0x1,
SMF_CopyConstructor = 0x2,
SMF_MoveConstructor = 0x4,
SMF_CopyAssignment = 0x8,
SMF_MoveAssignment = 0x10,
SMF_Destructor = 0x20,
SMF_All = 0x3f
};
struct DefinitionData {
DefinitionData(CXXRecordDecl *D);
/// \brief True if this class has any user-declared constructors.
unsigned UserDeclaredConstructor : 1;
/// \brief The user-declared special members which this class has.
unsigned UserDeclaredSpecialMembers : 6;
/// \brief True when this class is an aggregate.
unsigned Aggregate : 1;
/// \brief True when this class is a POD-type.
unsigned PlainOldData : 1;
/// true when this class is empty for traits purposes,
/// i.e. has no data members other than 0-width bit-fields, has no
/// virtual function/base, and doesn't inherit from a non-empty
/// class. Doesn't take union-ness into account.
unsigned Empty : 1;
/// \brief True when this class is polymorphic, i.e., has at
/// least one virtual member or derives from a polymorphic class.
unsigned Polymorphic : 1;
/// \brief True when this class is abstract, i.e., has at least
/// one pure virtual function, (that can come from a base class).
unsigned Abstract : 1;
/// \brief True when this class has standard layout.
///
/// C++11 [class]p7. A standard-layout class is a class that:
/// * has no non-static data members of type non-standard-layout class (or
/// array of such types) or reference,
/// * has no virtual functions (10.3) and no virtual base classes (10.1),
/// * has the same access control (Clause 11) for all non-static data
/// members
/// * has no non-standard-layout base classes,
/// * either has no non-static data members in the most derived class and at
/// most one base class with non-static data members, or has no base
/// classes with non-static data members, and
/// * has no base classes of the same type as the first non-static data
/// member.
unsigned IsStandardLayout : 1;
/// \brief True when there are no non-empty base classes.
///
/// This is a helper bit of state used to implement IsStandardLayout more
/// efficiently.
unsigned HasNoNonEmptyBases : 1;
/// \brief True when there are private non-static data members.
unsigned HasPrivateFields : 1;
/// \brief True when there are protected non-static data members.
unsigned HasProtectedFields : 1;
/// \brief True when there are private non-static data members.
unsigned HasPublicFields : 1;
/// \brief True if this class (or any subobject) has mutable fields.
unsigned HasMutableFields : 1;
/// \brief True if this class (or any nested anonymous struct or union)
/// has variant members.
unsigned HasVariantMembers : 1;
/// \brief True if there no non-field members declared by the user.
unsigned HasOnlyCMembers : 1;
/// \brief True if any field has an in-class initializer, including those
/// within anonymous unions or structs.
unsigned HasInClassInitializer : 1;
/// \brief True if any field is of reference type, and does not have an
/// in-class initializer.
///
/// In this case, value-initialization of this class is illegal in C++98
/// even if the class has a trivial default constructor.
unsigned HasUninitializedReferenceMember : 1;
/// \brief True if any non-mutable field whose type doesn't have a user-
/// provided default ctor also doesn't have an in-class initializer.
unsigned HasUninitializedFields : 1;
/// \brief True if there are any member using-declarations that inherit
/// constructors from a base class.
unsigned HasInheritedConstructor : 1;
/// \brief True if there are any member using-declarations named
/// 'operator='.
unsigned HasInheritedAssignment : 1;
/// \brief These flags are \c true if a defaulted corresponding special
/// member can't be fully analyzed without performing overload resolution.
/// @{
+ unsigned NeedOverloadResolutionForCopyConstructor : 1;
unsigned NeedOverloadResolutionForMoveConstructor : 1;
unsigned NeedOverloadResolutionForMoveAssignment : 1;
unsigned NeedOverloadResolutionForDestructor : 1;
/// @}
/// \brief These flags are \c true if an implicit defaulted corresponding
/// special member would be defined as deleted.
/// @{
+ unsigned DefaultedCopyConstructorIsDeleted : 1;
unsigned DefaultedMoveConstructorIsDeleted : 1;
unsigned DefaultedMoveAssignmentIsDeleted : 1;
unsigned DefaultedDestructorIsDeleted : 1;
/// @}
/// \brief The trivial special members which this class has, per
/// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25,
/// C++11 [class.dtor]p5, or would have if the member were not suppressed.
///
/// This excludes any user-declared but not user-provided special members
/// which have been declared but not yet defined.
unsigned HasTrivialSpecialMembers : 6;
/// \brief The declared special members of this class which are known to be
/// non-trivial.
///
/// This excludes any user-declared but not user-provided special members
/// which have been declared but not yet defined, and any implicit special
/// members which have not yet been declared.
unsigned DeclaredNonTrivialSpecialMembers : 6;
/// \brief True when this class has a destructor with no semantic effect.
unsigned HasIrrelevantDestructor : 1;
/// \brief True when this class has at least one user-declared constexpr
/// constructor which is neither the copy nor move constructor.
unsigned HasConstexprNonCopyMoveConstructor : 1;
/// \brief True if this class has a (possibly implicit) defaulted default
/// constructor.
unsigned HasDefaultedDefaultConstructor : 1;
+ /// \brief True if this class can be passed in a non-address-preserving
+ /// fashion (such as in registers) according to the C++ language rules.
+ /// This does not imply anything about how the ABI in use will actually
+ /// pass an object of this class.
+ unsigned CanPassInRegisters : 1;
+
/// \brief True if a defaulted default constructor for this class would
/// be constexpr.
unsigned DefaultedDefaultConstructorIsConstexpr : 1;
/// \brief True if this class has a constexpr default constructor.
///
/// This is true for either a user-declared constexpr default constructor
/// or an implicitly declared constexpr default constructor.
unsigned HasConstexprDefaultConstructor : 1;
/// \brief True when this class contains at least one non-static data
/// member or base class of non-literal or volatile type.
unsigned HasNonLiteralTypeFieldsOrBases : 1;
/// \brief True when visible conversion functions are already computed
/// and are available.
unsigned ComputedVisibleConversions : 1;
/// \brief Whether we have a C++11 user-provided default constructor (not
/// explicitly deleted or defaulted).
unsigned UserProvidedDefaultConstructor : 1;
/// \brief The special members which have been declared for this class,
/// either by the user or implicitly.
unsigned DeclaredSpecialMembers : 6;
/// \brief Whether an implicit copy constructor could have a const-qualified
/// parameter, for initializing virtual bases and for other subobjects.
unsigned ImplicitCopyConstructorCanHaveConstParamForVBase : 1;
unsigned ImplicitCopyConstructorCanHaveConstParamForNonVBase : 1;
/// \brief Whether an implicit copy assignment operator would have a
/// const-qualified parameter.
unsigned ImplicitCopyAssignmentHasConstParam : 1;
/// \brief Whether any declared copy constructor has a const-qualified
/// parameter.
unsigned HasDeclaredCopyConstructorWithConstParam : 1;
/// \brief Whether any declared copy assignment operator has either a
/// const-qualified reference parameter or a non-reference parameter.
unsigned HasDeclaredCopyAssignmentWithConstParam : 1;
/// \brief Whether this class describes a C++ lambda.
unsigned IsLambda : 1;
/// \brief Whether we are currently parsing base specifiers.
unsigned IsParsingBaseSpecifiers : 1;
unsigned HasODRHash : 1;
/// \brief A hash of parts of the class to help in ODR checking.
unsigned ODRHash;
/// \brief The number of base class specifiers in Bases.
unsigned NumBases;
/// \brief The number of virtual base class specifiers in VBases.
unsigned NumVBases;
/// \brief Base classes of this class.
///
/// FIXME: This is wasted space for a union.
LazyCXXBaseSpecifiersPtr Bases;
/// \brief direct and indirect virtual base classes of this class.
LazyCXXBaseSpecifiersPtr VBases;
/// \brief The conversion functions of this C++ class (but not its
/// inherited conversion functions).
///
/// Each of the entries in this overload set is a CXXConversionDecl.
LazyASTUnresolvedSet Conversions;
/// \brief The conversion functions of this C++ class and all those
/// inherited conversion functions that are visible in this class.
///
/// Each of the entries in this overload set is a CXXConversionDecl or a
/// FunctionTemplateDecl.
LazyASTUnresolvedSet VisibleConversions;
/// \brief The declaration which defines this record.
CXXRecordDecl *Definition;
/// \brief The first friend declaration in this class, or null if there
/// aren't any.
///
/// This is actually currently stored in reverse order.
LazyDeclPtr FirstFriend;
/// \brief Retrieve the set of direct base classes.
CXXBaseSpecifier *getBases() const {
if (!Bases.isOffset())
return Bases.get(nullptr);
return getBasesSlowCase();
}
/// \brief Retrieve the set of virtual base classes.
CXXBaseSpecifier *getVBases() const {
if (!VBases.isOffset())
return VBases.get(nullptr);
return getVBasesSlowCase();
}
ArrayRef<CXXBaseSpecifier> bases() const {
return llvm::makeArrayRef(getBases(), NumBases);
}
ArrayRef<CXXBaseSpecifier> vbases() const {
return llvm::makeArrayRef(getVBases(), NumVBases);
}
private:
CXXBaseSpecifier *getBasesSlowCase() const;
CXXBaseSpecifier *getVBasesSlowCase() const;
};
struct DefinitionData *DefinitionData;
/// \brief Describes a C++ closure type (generated by a lambda expression).
struct LambdaDefinitionData : public DefinitionData {
typedef LambdaCapture Capture;
LambdaDefinitionData(CXXRecordDecl *D, TypeSourceInfo *Info,
bool Dependent, bool IsGeneric,
LambdaCaptureDefault CaptureDefault)
: DefinitionData(D), Dependent(Dependent), IsGenericLambda(IsGeneric),
CaptureDefault(CaptureDefault), NumCaptures(0), NumExplicitCaptures(0),
ManglingNumber(0), ContextDecl(nullptr), Captures(nullptr),
MethodTyInfo(Info) {
IsLambda = true;
// C++1z [expr.prim.lambda]p4:
// This class type is not an aggregate type.
Aggregate = false;
PlainOldData = false;
}
/// \brief Whether this lambda is known to be dependent, even if its
/// context isn't dependent.
///
/// A lambda with a non-dependent context can be dependent if it occurs
/// within the default argument of a function template, because the
/// lambda will have been created with the enclosing context as its
/// declaration context, rather than function. This is an unfortunate
/// artifact of having to parse the default arguments before.
unsigned Dependent : 1;
/// \brief Whether this lambda is a generic lambda.
unsigned IsGenericLambda : 1;
/// \brief The Default Capture.
unsigned CaptureDefault : 2;
/// \brief The number of captures in this lambda is limited 2^NumCaptures.
unsigned NumCaptures : 15;
/// \brief The number of explicit captures in this lambda.
unsigned NumExplicitCaptures : 13;
/// \brief The number used to indicate this lambda expression for name
/// mangling in the Itanium C++ ABI.
unsigned ManglingNumber;
/// \brief The declaration that provides context for this lambda, if the
/// actual DeclContext does not suffice. This is used for lambdas that
/// occur within default arguments of function parameters within the class
/// or within a data member initializer.
LazyDeclPtr ContextDecl;
/// \brief The list of captures, both explicit and implicit, for this
/// lambda.
Capture *Captures;
/// \brief The type of the call method.
TypeSourceInfo *MethodTyInfo;
};
struct DefinitionData *dataPtr() const {
// Complete the redecl chain (if necessary).
getMostRecentDecl();
return DefinitionData;
}
struct DefinitionData &data() const {
auto *DD = dataPtr();
assert(DD && "queried property of class with no definition");
return *DD;
}
struct LambdaDefinitionData &getLambdaData() const {
// No update required: a merged definition cannot change any lambda
// properties.
auto *DD = DefinitionData;
assert(DD && DD->IsLambda && "queried lambda property of non-lambda class");
return static_cast<LambdaDefinitionData&>(*DD);
}
/// \brief The template or declaration that this declaration
/// describes or was instantiated from, respectively.
///
/// For non-templates, this value will be null. For record
/// declarations that describe a class template, this will be a
/// pointer to a ClassTemplateDecl. For member
/// classes of class template specializations, this will be the
/// MemberSpecializationInfo referring to the member class that was
/// instantiated or specialized.
llvm::PointerUnion<ClassTemplateDecl*, MemberSpecializationInfo*>
TemplateOrInstantiation;
friend class DeclContext;
friend class LambdaExpr;
/// \brief Called from setBases and addedMember to notify the class that a
/// direct or virtual base class or a member of class type has been added.
void addedClassSubobject(CXXRecordDecl *Base);
/// \brief Notify the class that member has been added.
///
/// This routine helps maintain information about the class based on which
/// members have been added. It will be invoked by DeclContext::addDecl()
/// whenever a member is added to this record.
void addedMember(Decl *D);
void markedVirtualFunctionPure();
friend void FunctionDecl::setPure(bool);
friend class ASTNodeImporter;
/// \brief Get the head of our list of friend declarations, possibly
/// deserializing the friends from an external AST source.
FriendDecl *getFirstFriend() const;
protected:
CXXRecordDecl(Kind K, TagKind TK, const ASTContext &C, DeclContext *DC,
SourceLocation StartLoc, SourceLocation IdLoc,
IdentifierInfo *Id, CXXRecordDecl *PrevDecl);
public:
/// \brief Iterator that traverses the base classes of a class.
typedef CXXBaseSpecifier* base_class_iterator;
/// \brief Iterator that traverses the base classes of a class.
typedef const CXXBaseSpecifier* base_class_const_iterator;
CXXRecordDecl *getCanonicalDecl() override {
return cast<CXXRecordDecl>(RecordDecl::getCanonicalDecl());
}
const CXXRecordDecl *getCanonicalDecl() const {
return const_cast<CXXRecordDecl*>(this)->getCanonicalDecl();
}
CXXRecordDecl *getPreviousDecl() {
return cast_or_null<CXXRecordDecl>(
static_cast<RecordDecl *>(this)->getPreviousDecl());
}
const CXXRecordDecl *getPreviousDecl() const {
return const_cast<CXXRecordDecl*>(this)->getPreviousDecl();
}
CXXRecordDecl *getMostRecentDecl() {
return cast<CXXRecordDecl>(
static_cast<RecordDecl *>(this)->getMostRecentDecl());
}
const CXXRecordDecl *getMostRecentDecl() const {
return const_cast<CXXRecordDecl*>(this)->getMostRecentDecl();
}
CXXRecordDecl *getDefinition() const {
// We only need an update if we don't already know which
// declaration is the definition.
auto *DD = DefinitionData ? DefinitionData : dataPtr();
return DD ? DD->Definition : nullptr;
}
bool hasDefinition() const { return DefinitionData || dataPtr(); }
static CXXRecordDecl *Create(const ASTContext &C, TagKind TK, DeclContext *DC,
SourceLocation StartLoc, SourceLocation IdLoc,
IdentifierInfo *Id,
CXXRecordDecl *PrevDecl = nullptr,
bool DelayTypeCreation = false);
static CXXRecordDecl *CreateLambda(const ASTContext &C, DeclContext *DC,
TypeSourceInfo *Info, SourceLocation Loc,
bool DependentLambda, bool IsGeneric,
LambdaCaptureDefault CaptureDefault);
static CXXRecordDecl *CreateDeserialized(const ASTContext &C, unsigned ID);
bool isDynamicClass() const {
return data().Polymorphic || data().NumVBases != 0;
}
void setIsParsingBaseSpecifiers() { data().IsParsingBaseSpecifiers = true; }
bool isParsingBaseSpecifiers() const {
return data().IsParsingBaseSpecifiers;
}
unsigned getODRHash() const;
/// \brief Sets the base classes of this struct or class.
void setBases(CXXBaseSpecifier const * const *Bases, unsigned NumBases);
/// \brief Retrieves the number of base classes of this class.
unsigned getNumBases() const { return data().NumBases; }
typedef llvm::iterator_range<base_class_iterator> base_class_range;
typedef llvm::iterator_range<base_class_const_iterator>
base_class_const_range;
base_class_range bases() {
return base_class_range(bases_begin(), bases_end());
}
base_class_const_range bases() const {
return base_class_const_range(bases_begin(), bases_end());
}
base_class_iterator bases_begin() { return data().getBases(); }
base_class_const_iterator bases_begin() const { return data().getBases(); }
base_class_iterator bases_end() { return bases_begin() + data().NumBases; }
base_class_const_iterator bases_end() const {
return bases_begin() + data().NumBases;
}
/// \brief Retrieves the number of virtual base classes of this class.
unsigned getNumVBases() const { return data().NumVBases; }
base_class_range vbases() {
return base_class_range(vbases_begin(), vbases_end());
}
base_class_const_range vbases() const {
return base_class_const_range(vbases_begin(), vbases_end());
}
base_class_iterator vbases_begin() { return data().getVBases(); }
base_class_const_iterator vbases_begin() const { return data().getVBases(); }
base_class_iterator vbases_end() { return vbases_begin() + data().NumVBases; }
base_class_const_iterator vbases_end() const {
return vbases_begin() + data().NumVBases;
}
/// \brief Determine whether this class has any dependent base classes which
/// are not the current instantiation.
bool hasAnyDependentBases() const;
/// Iterator access to method members. The method iterator visits
/// all method members of the class, including non-instance methods,
/// special methods, etc.
typedef specific_decl_iterator<CXXMethodDecl> method_iterator;
typedef llvm::iterator_range<specific_decl_iterator<CXXMethodDecl>>
method_range;
method_range methods() const {
return method_range(method_begin(), method_end());
}
/// \brief Method begin iterator. Iterates in the order the methods
/// were declared.
method_iterator method_begin() const {
return method_iterator(decls_begin());
}
/// \brief Method past-the-end iterator.
method_iterator method_end() const {
return method_iterator(decls_end());
}
/// Iterator access to constructor members.
typedef specific_decl_iterator<CXXConstructorDecl> ctor_iterator;
typedef llvm::iterator_range<specific_decl_iterator<CXXConstructorDecl>>
ctor_range;
ctor_range ctors() const { return ctor_range(ctor_begin(), ctor_end()); }
ctor_iterator ctor_begin() const {
return ctor_iterator(decls_begin());
}
ctor_iterator ctor_end() const {
return ctor_iterator(decls_end());
}
/// An iterator over friend declarations. All of these are defined
/// in DeclFriend.h.
class friend_iterator;
typedef llvm::iterator_range<friend_iterator> friend_range;
friend_range friends() const;
friend_iterator friend_begin() const;
friend_iterator friend_end() const;
void pushFriendDecl(FriendDecl *FD);
/// Determines whether this record has any friends.
bool hasFriends() const {
return data().FirstFriend.isValid();
}
+ /// \brief \c true if a defaulted copy constructor for this class would be
+ /// deleted.
+ bool defaultedCopyConstructorIsDeleted() const {
+ assert((!needsOverloadResolutionForCopyConstructor() ||
+ (data().DeclaredSpecialMembers & SMF_CopyConstructor)) &&
+ "this property has not yet been computed by Sema");
+ return data().DefaultedCopyConstructorIsDeleted;
+ }
+
+ /// \brief \c true if a defaulted move constructor for this class would be
+ /// deleted.
+ bool defaultedMoveConstructorIsDeleted() const {
+ assert((!needsOverloadResolutionForMoveConstructor() ||
+ (data().DeclaredSpecialMembers & SMF_MoveConstructor)) &&
+ "this property has not yet been computed by Sema");
+ return data().DefaultedMoveConstructorIsDeleted;
+ }
+
+ /// \brief \c true if a defaulted destructor for this class would be deleted.
+ bool defaultedDestructorIsDeleted() const {
+ return !data().DefaultedDestructorIsDeleted;
+ }
+
/// \brief \c true if we know for sure that this class has a single,
+ /// accessible, unambiguous copy constructor that is not deleted.
+ bool hasSimpleCopyConstructor() const {
+ return !hasUserDeclaredCopyConstructor() &&
+ !data().DefaultedCopyConstructorIsDeleted;
+ }
+
+ /// \brief \c true if we know for sure that this class has a single,
/// accessible, unambiguous move constructor that is not deleted.
bool hasSimpleMoveConstructor() const {
return !hasUserDeclaredMoveConstructor() && hasMoveConstructor() &&
!data().DefaultedMoveConstructorIsDeleted;
}
+
/// \brief \c true if we know for sure that this class has a single,
/// accessible, unambiguous move assignment operator that is not deleted.
bool hasSimpleMoveAssignment() const {
return !hasUserDeclaredMoveAssignment() && hasMoveAssignment() &&
!data().DefaultedMoveAssignmentIsDeleted;
}
+
/// \brief \c true if we know for sure that this class has an accessible
/// destructor that is not deleted.
bool hasSimpleDestructor() const {
return !hasUserDeclaredDestructor() &&
!data().DefaultedDestructorIsDeleted;
}
/// \brief Determine whether this class has any default constructors.
bool hasDefaultConstructor() const {
return (data().DeclaredSpecialMembers & SMF_DefaultConstructor) ||
needsImplicitDefaultConstructor();
}
/// \brief Determine if we need to declare a default constructor for
/// this class.
///
/// This value is used for lazy creation of default constructors.
bool needsImplicitDefaultConstructor() const {
return !data().UserDeclaredConstructor &&
!(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
// C++14 [expr.prim.lambda]p20:
// The closure type associated with a lambda-expression has no
// default constructor.
!isLambda();
}
/// \brief Determine whether this class has any user-declared constructors.
///
/// When true, a default constructor will not be implicitly declared.
bool hasUserDeclaredConstructor() const {
return data().UserDeclaredConstructor;
}
/// \brief Whether this class has a user-provided default constructor
/// per C++11.
bool hasUserProvidedDefaultConstructor() const {
return data().UserProvidedDefaultConstructor;
}
/// \brief Determine whether this class has a user-declared copy constructor.
///
/// When false, a copy constructor will be implicitly declared.
bool hasUserDeclaredCopyConstructor() const {
return data().UserDeclaredSpecialMembers & SMF_CopyConstructor;
}
/// \brief Determine whether this class needs an implicit copy
/// constructor to be lazily declared.
bool needsImplicitCopyConstructor() const {
return !(data().DeclaredSpecialMembers & SMF_CopyConstructor);
}
/// \brief Determine whether we need to eagerly declare a defaulted copy
/// constructor for this class.
bool needsOverloadResolutionForCopyConstructor() const {
- return data().HasMutableFields;
+ // C++17 [class.copy.ctor]p6:
+ // If the class definition declares a move constructor or move assignment
+ // operator, the implicitly declared copy constructor is defined as
+ // deleted.
+ // In MSVC mode, sometimes a declared move assignment does not delete an
+ // implicit copy constructor, so defer this choice to Sema.
+ if (data().UserDeclaredSpecialMembers &
+ (SMF_MoveConstructor | SMF_MoveAssignment))
+ return true;
+ return data().NeedOverloadResolutionForCopyConstructor;
}
/// \brief Determine whether an implicit copy constructor for this type
/// would have a parameter with a const-qualified reference type.
bool implicitCopyConstructorHasConstParam() const {
return data().ImplicitCopyConstructorCanHaveConstParamForNonVBase &&
(isAbstract() ||
data().ImplicitCopyConstructorCanHaveConstParamForVBase);
}
/// \brief Determine whether this class has a copy constructor with
/// a parameter type which is a reference to a const-qualified type.
bool hasCopyConstructorWithConstParam() const {
return data().HasDeclaredCopyConstructorWithConstParam ||
(needsImplicitCopyConstructor() &&
implicitCopyConstructorHasConstParam());
}
/// \brief Whether this class has a user-declared move constructor or
/// assignment operator.
///
/// When false, a move constructor and assignment operator may be
/// implicitly declared.
bool hasUserDeclaredMoveOperation() const {
return data().UserDeclaredSpecialMembers &
(SMF_MoveConstructor | SMF_MoveAssignment);
}
/// \brief Determine whether this class has had a move constructor
/// declared by the user.
bool hasUserDeclaredMoveConstructor() const {
return data().UserDeclaredSpecialMembers & SMF_MoveConstructor;
}
/// \brief Determine whether this class has a move constructor.
bool hasMoveConstructor() const {
return (data().DeclaredSpecialMembers & SMF_MoveConstructor) ||
needsImplicitMoveConstructor();
}
- /// \brief Set that we attempted to declare an implicitly move
+ /// \brief Set that we attempted to declare an implicit copy
/// constructor, but overload resolution failed so we deleted it.
+ void setImplicitCopyConstructorIsDeleted() {
+ assert((data().DefaultedCopyConstructorIsDeleted ||
+ needsOverloadResolutionForCopyConstructor()) &&
+ "Copy constructor should not be deleted");
+ data().DefaultedCopyConstructorIsDeleted = true;
+ }
+
+ /// \brief Set that we attempted to declare an implicit move
+ /// constructor, but overload resolution failed so we deleted it.
void setImplicitMoveConstructorIsDeleted() {
assert((data().DefaultedMoveConstructorIsDeleted ||
needsOverloadResolutionForMoveConstructor()) &&
"move constructor should not be deleted");
data().DefaultedMoveConstructorIsDeleted = true;
}
/// \brief Determine whether this class should get an implicit move
/// constructor or if any existing special member function inhibits this.
bool needsImplicitMoveConstructor() const {
return !(data().DeclaredSpecialMembers & SMF_MoveConstructor) &&
!hasUserDeclaredCopyConstructor() &&
!hasUserDeclaredCopyAssignment() &&
!hasUserDeclaredMoveAssignment() &&
!hasUserDeclaredDestructor();
}
/// \brief Determine whether we need to eagerly declare a defaulted move
/// constructor for this class.
bool needsOverloadResolutionForMoveConstructor() const {
return data().NeedOverloadResolutionForMoveConstructor;
}
/// \brief Determine whether this class has a user-declared copy assignment
/// operator.
///
/// When false, a copy assigment operator will be implicitly declared.
bool hasUserDeclaredCopyAssignment() const {
return data().UserDeclaredSpecialMembers & SMF_CopyAssignment;
}
/// \brief Determine whether this class needs an implicit copy
/// assignment operator to be lazily declared.
bool needsImplicitCopyAssignment() const {
return !(data().DeclaredSpecialMembers & SMF_CopyAssignment);
}
/// \brief Determine whether we need to eagerly declare a defaulted copy
/// assignment operator for this class.
bool needsOverloadResolutionForCopyAssignment() const {
return data().HasMutableFields;
}
/// \brief Determine whether an implicit copy assignment operator for this
/// type would have a parameter with a const-qualified reference type.
bool implicitCopyAssignmentHasConstParam() const {
return data().ImplicitCopyAssignmentHasConstParam;
}
/// \brief Determine whether this class has a copy assignment operator with
/// a parameter type which is a reference to a const-qualified type or is not
/// a reference.
bool hasCopyAssignmentWithConstParam() const {
return data().HasDeclaredCopyAssignmentWithConstParam ||
(needsImplicitCopyAssignment() &&
implicitCopyAssignmentHasConstParam());
}
/// \brief Determine whether this class has had a move assignment
/// declared by the user.
bool hasUserDeclaredMoveAssignment() const {
return data().UserDeclaredSpecialMembers & SMF_MoveAssignment;
}
/// \brief Determine whether this class has a move assignment operator.
bool hasMoveAssignment() const {
return (data().DeclaredSpecialMembers & SMF_MoveAssignment) ||
needsImplicitMoveAssignment();
}
/// \brief Set that we attempted to declare an implicit move assignment
/// operator, but overload resolution failed so we deleted it.
void setImplicitMoveAssignmentIsDeleted() {
assert((data().DefaultedMoveAssignmentIsDeleted ||
needsOverloadResolutionForMoveAssignment()) &&
"move assignment should not be deleted");
data().DefaultedMoveAssignmentIsDeleted = true;
}
/// \brief Determine whether this class should get an implicit move
/// assignment operator or if any existing special member function inhibits
/// this.
bool needsImplicitMoveAssignment() const {
return !(data().DeclaredSpecialMembers & SMF_MoveAssignment) &&
!hasUserDeclaredCopyConstructor() &&
!hasUserDeclaredCopyAssignment() &&
!hasUserDeclaredMoveConstructor() &&
!hasUserDeclaredDestructor() &&
// C++1z [expr.prim.lambda]p21: "the closure type has a deleted copy
// assignment operator". The intent is that this counts as a user
// declared copy assignment, but we do not model it that way.
!isLambda();
}
/// \brief Determine whether we need to eagerly declare a move assignment
/// operator for this class.
bool needsOverloadResolutionForMoveAssignment() const {
return data().NeedOverloadResolutionForMoveAssignment;
}
/// \brief Determine whether this class has a user-declared destructor.
///
/// When false, a destructor will be implicitly declared.
bool hasUserDeclaredDestructor() const {
return data().UserDeclaredSpecialMembers & SMF_Destructor;
}
/// \brief Determine whether this class needs an implicit destructor to
/// be lazily declared.
bool needsImplicitDestructor() const {
return !(data().DeclaredSpecialMembers & SMF_Destructor);
}
/// \brief Determine whether we need to eagerly declare a destructor for this
/// class.
bool needsOverloadResolutionForDestructor() const {
return data().NeedOverloadResolutionForDestructor;
}
/// \brief Determine whether this class describes a lambda function object.
bool isLambda() const {
// An update record can't turn a non-lambda into a lambda.
auto *DD = DefinitionData;
return DD && DD->IsLambda;
}
/// \brief Determine whether this class describes a generic
/// lambda function object (i.e. function call operator is
/// a template).
bool isGenericLambda() const;
/// \brief Retrieve the lambda call operator of the closure type
/// if this is a closure type.
CXXMethodDecl *getLambdaCallOperator() const;
/// \brief Retrieve the lambda static invoker, the address of which
/// is returned by the conversion operator, and the body of which
/// is forwarded to the lambda call operator.
CXXMethodDecl *getLambdaStaticInvoker() const;
/// \brief Retrieve the generic lambda's template parameter list.
/// Returns null if the class does not represent a lambda or a generic
/// lambda.
TemplateParameterList *getGenericLambdaTemplateParameterList() const;
LambdaCaptureDefault getLambdaCaptureDefault() const {
assert(isLambda());
return static_cast<LambdaCaptureDefault>(getLambdaData().CaptureDefault);
}
/// \brief For a closure type, retrieve the mapping from captured
/// variables and \c this to the non-static data members that store the
/// values or references of the captures.
///
/// \param Captures Will be populated with the mapping from captured
/// variables to the corresponding fields.
///
/// \param ThisCapture Will be set to the field declaration for the
/// \c this capture.
///
/// \note No entries will be added for init-captures, as they do not capture
/// variables.
void getCaptureFields(llvm::DenseMap<const VarDecl *, FieldDecl *> &Captures,
FieldDecl *&ThisCapture) const;
typedef const LambdaCapture *capture_const_iterator;
typedef llvm::iterator_range<capture_const_iterator> capture_const_range;
capture_const_range captures() const {
return capture_const_range(captures_begin(), captures_end());
}
capture_const_iterator captures_begin() const {
return isLambda() ? getLambdaData().Captures : nullptr;
}
capture_const_iterator captures_end() const {
return isLambda() ? captures_begin() + getLambdaData().NumCaptures
: nullptr;
}
typedef UnresolvedSetIterator conversion_iterator;
conversion_iterator conversion_begin() const {
return data().Conversions.get(getASTContext()).begin();
}
conversion_iterator conversion_end() const {
return data().Conversions.get(getASTContext()).end();
}
/// Removes a conversion function from this class. The conversion
/// function must currently be a member of this class. Furthermore,
/// this class must currently be in the process of being defined.
void removeConversion(const NamedDecl *Old);
/// \brief Get all conversion functions visible in current class,
/// including conversion function templates.
llvm::iterator_range<conversion_iterator> getVisibleConversionFunctions();
/// Determine whether this class is an aggregate (C++ [dcl.init.aggr]),
/// which is a class with no user-declared constructors, no private
/// or protected non-static data members, no base classes, and no virtual
/// functions (C++ [dcl.init.aggr]p1).
bool isAggregate() const { return data().Aggregate; }
/// \brief Whether this class has any in-class initializers
/// for non-static data members (including those in anonymous unions or
/// structs).
bool hasInClassInitializer() const { return data().HasInClassInitializer; }
/// \brief Whether this class or any of its subobjects has any members of
/// reference type which would make value-initialization ill-formed.
///
/// Per C++03 [dcl.init]p5:
/// - if T is a non-union class type without a user-declared constructor,
/// then every non-static data member and base-class component of T is
/// value-initialized [...] A program that calls for [...]
/// value-initialization of an entity of reference type is ill-formed.
bool hasUninitializedReferenceMember() const {
return !isUnion() && !hasUserDeclaredConstructor() &&
data().HasUninitializedReferenceMember;
}
/// \brief Whether this class is a POD-type (C++ [class]p4)
///
/// For purposes of this function a class is POD if it is an aggregate
/// that has no non-static non-POD data members, no reference data
/// members, no user-defined copy assignment operator and no
/// user-defined destructor.
///
/// Note that this is the C++ TR1 definition of POD.
bool isPOD() const { return data().PlainOldData; }
/// \brief True if this class is C-like, without C++-specific features, e.g.
/// it contains only public fields, no bases, tag kind is not 'class', etc.
bool isCLike() const;
/// \brief Determine whether this is an empty class in the sense of
/// (C++11 [meta.unary.prop]).
///
/// The CXXRecordDecl is a class type, but not a union type,
/// with no non-static data members other than bit-fields of length 0,
/// no virtual member functions, no virtual base classes,
/// and no base class B for which is_empty<B>::value is false.
///
/// \note This does NOT include a check for union-ness.
bool isEmpty() const { return data().Empty; }
/// \brief Determine whether this class has direct non-static data members.
bool hasDirectFields() const {
auto &D = data();
return D.HasPublicFields || D.HasProtectedFields || D.HasPrivateFields;
}
/// Whether this class is polymorphic (C++ [class.virtual]),
/// which means that the class contains or inherits a virtual function.
bool isPolymorphic() const { return data().Polymorphic; }
/// \brief Determine whether this class has a pure virtual function.
///
/// The class is is abstract per (C++ [class.abstract]p2) if it declares
/// a pure virtual function or inherits a pure virtual function that is
/// not overridden.
bool isAbstract() const { return data().Abstract; }
/// \brief Determine whether this class has standard layout per
/// (C++ [class]p7)
bool isStandardLayout() const { return data().IsStandardLayout; }
/// \brief Determine whether this class, or any of its class subobjects,
/// contains a mutable field.
bool hasMutableFields() const { return data().HasMutableFields; }
/// \brief Determine whether this class has any variant members.
bool hasVariantMembers() const { return data().HasVariantMembers; }
/// \brief Determine whether this class has a trivial default constructor
/// (C++11 [class.ctor]p5).
bool hasTrivialDefaultConstructor() const {
return hasDefaultConstructor() &&
(data().HasTrivialSpecialMembers & SMF_DefaultConstructor);
}
/// \brief Determine whether this class has a non-trivial default constructor
/// (C++11 [class.ctor]p5).
bool hasNonTrivialDefaultConstructor() const {
return (data().DeclaredNonTrivialSpecialMembers & SMF_DefaultConstructor) ||
(needsImplicitDefaultConstructor() &&
!(data().HasTrivialSpecialMembers & SMF_DefaultConstructor));
}
/// \brief Determine whether this class has at least one constexpr constructor
/// other than the copy or move constructors.
bool hasConstexprNonCopyMoveConstructor() const {
return data().HasConstexprNonCopyMoveConstructor ||
(needsImplicitDefaultConstructor() &&
defaultedDefaultConstructorIsConstexpr());
}
/// \brief Determine whether a defaulted default constructor for this class
/// would be constexpr.
bool defaultedDefaultConstructorIsConstexpr() const {
return data().DefaultedDefaultConstructorIsConstexpr &&
(!isUnion() || hasInClassInitializer() || !hasVariantMembers());
}
/// \brief Determine whether this class has a constexpr default constructor.
bool hasConstexprDefaultConstructor() const {
return data().HasConstexprDefaultConstructor ||
(needsImplicitDefaultConstructor() &&
defaultedDefaultConstructorIsConstexpr());
}
/// \brief Determine whether this class has a trivial copy constructor
/// (C++ [class.copy]p6, C++11 [class.copy]p12)
bool hasTrivialCopyConstructor() const {
return data().HasTrivialSpecialMembers & SMF_CopyConstructor;
}
/// \brief Determine whether this class has a non-trivial copy constructor
/// (C++ [class.copy]p6, C++11 [class.copy]p12)
bool hasNonTrivialCopyConstructor() const {
return data().DeclaredNonTrivialSpecialMembers & SMF_CopyConstructor ||
!hasTrivialCopyConstructor();
}
/// \brief Determine whether this class has a trivial move constructor
/// (C++11 [class.copy]p12)
bool hasTrivialMoveConstructor() const {
return hasMoveConstructor() &&
(data().HasTrivialSpecialMembers & SMF_MoveConstructor);
}
/// \brief Determine whether this class has a non-trivial move constructor
/// (C++11 [class.copy]p12)
bool hasNonTrivialMoveConstructor() const {
return (data().DeclaredNonTrivialSpecialMembers & SMF_MoveConstructor) ||
(needsImplicitMoveConstructor() &&
!(data().HasTrivialSpecialMembers & SMF_MoveConstructor));
}
/// \brief Determine whether this class has a trivial copy assignment operator
/// (C++ [class.copy]p11, C++11 [class.copy]p25)
bool hasTrivialCopyAssignment() const {
return data().HasTrivialSpecialMembers & SMF_CopyAssignment;
}
/// \brief Determine whether this class has a non-trivial copy assignment
/// operator (C++ [class.copy]p11, C++11 [class.copy]p25)
bool hasNonTrivialCopyAssignment() const {
return data().DeclaredNonTrivialSpecialMembers & SMF_CopyAssignment ||
!hasTrivialCopyAssignment();
}
/// \brief Determine whether this class has a trivial move assignment operator
/// (C++11 [class.copy]p25)
bool hasTrivialMoveAssignment() const {
return hasMoveAssignment() &&
(data().HasTrivialSpecialMembers & SMF_MoveAssignment);
}
/// \brief Determine whether this class has a non-trivial move assignment
/// operator (C++11 [class.copy]p25)
bool hasNonTrivialMoveAssignment() const {
return (data().DeclaredNonTrivialSpecialMembers & SMF_MoveAssignment) ||
(needsImplicitMoveAssignment() &&
!(data().HasTrivialSpecialMembers & SMF_MoveAssignment));
}
/// \brief Determine whether this class has a trivial destructor
/// (C++ [class.dtor]p3)
bool hasTrivialDestructor() const {
return data().HasTrivialSpecialMembers & SMF_Destructor;
}
/// \brief Determine whether this class has a non-trivial destructor
/// (C++ [class.dtor]p3)
bool hasNonTrivialDestructor() const {
return !(data().HasTrivialSpecialMembers & SMF_Destructor);
}
/// \brief Determine whether declaring a const variable with this type is ok
/// per core issue 253.
bool allowConstDefaultInit() const {
return !data().HasUninitializedFields ||
!(data().HasDefaultedDefaultConstructor ||
needsImplicitDefaultConstructor());
}
/// \brief Determine whether this class has a destructor which has no
/// semantic effect.
///
/// Any such destructor will be trivial, public, defaulted and not deleted,
/// and will call only irrelevant destructors.
bool hasIrrelevantDestructor() const {
return data().HasIrrelevantDestructor;
+ }
+
+ /// \brief Determine whether this class has at least one trivial, non-deleted
+ /// copy or move constructor.
+ bool canPassInRegisters() const {
+ return data().CanPassInRegisters;
+ }
+
+ /// \brief Set that we can pass this RecordDecl in registers.
+ // FIXME: This should be set as part of completeDefinition.
+ void setCanPassInRegisters(bool CanPass) {
+ data().CanPassInRegisters = CanPass;
}
/// \brief Determine whether this class has a non-literal or/ volatile type
/// non-static data member or base class.
bool hasNonLiteralTypeFieldsOrBases() const {
return data().HasNonLiteralTypeFieldsOrBases;
}
/// \brief Determine whether this class has a using-declaration that names
/// a user-declared base class constructor.
bool hasInheritedConstructor() const {
return data().HasInheritedConstructor;
}
/// \brief Determine whether this class has a using-declaration that names
/// a base class assignment operator.
bool hasInheritedAssignment() const {
return data().HasInheritedAssignment;
}
/// \brief Determine whether this class is considered trivially copyable per
/// (C++11 [class]p6).
bool isTriviallyCopyable() const;
/// \brief Determine whether this class is considered trivial.
///
/// C++11 [class]p6:
/// "A trivial class is a class that has a trivial default constructor and
/// is trivially copiable."
bool isTrivial() const {
return isTriviallyCopyable() && hasTrivialDefaultConstructor();
}
/// \brief Determine whether this class is a literal type.
///
/// C++11 [basic.types]p10:
/// A class type that has all the following properties:
/// - it has a trivial destructor
/// - every constructor call and full-expression in the
/// brace-or-equal-intializers for non-static data members (if any) is
/// a constant expression.
/// - it is an aggregate type or has at least one constexpr constructor
/// or constructor template that is not a copy or move constructor, and
/// - all of its non-static data members and base classes are of literal
/// types
///
/// We resolve DR1361 by ignoring the second bullet. We resolve DR1452 by
/// treating types with trivial default constructors as literal types.
///
/// Only in C++1z and beyond, are lambdas literal types.
bool isLiteral() const {
return hasTrivialDestructor() &&
(!isLambda() || getASTContext().getLangOpts().CPlusPlus1z) &&
!hasNonLiteralTypeFieldsOrBases() &&
(isAggregate() || isLambda() ||
hasConstexprNonCopyMoveConstructor() ||
hasTrivialDefaultConstructor());
}
/// \brief If this record is an instantiation of a member class,
/// retrieves the member class from which it was instantiated.
///
/// This routine will return non-null for (non-templated) member
/// classes of class templates. For example, given:
///
/// \code
/// template<typename T>
/// struct X {
/// struct A { };
/// };
/// \endcode
///
/// The declaration for X<int>::A is a (non-templated) CXXRecordDecl
/// whose parent is the class template specialization X<int>. For
/// this declaration, getInstantiatedFromMemberClass() will return
/// the CXXRecordDecl X<T>::A. When a complete definition of
/// X<int>::A is required, it will be instantiated from the
/// declaration returned by getInstantiatedFromMemberClass().
CXXRecordDecl *getInstantiatedFromMemberClass() const;
/// \brief If this class is an instantiation of a member class of a
/// class template specialization, retrieves the member specialization
/// information.
MemberSpecializationInfo *getMemberSpecializationInfo() const;
/// \brief Specify that this record is an instantiation of the
/// member class \p RD.
void setInstantiationOfMemberClass(CXXRecordDecl *RD,
TemplateSpecializationKind TSK);
/// \brief Retrieves the class template that is described by this
/// class declaration.
///
/// Every class template is represented as a ClassTemplateDecl and a
/// CXXRecordDecl. The former contains template properties (such as
/// the template parameter lists) while the latter contains the
/// actual description of the template's
/// contents. ClassTemplateDecl::getTemplatedDecl() retrieves the
/// CXXRecordDecl that from a ClassTemplateDecl, while
/// getDescribedClassTemplate() retrieves the ClassTemplateDecl from
/// a CXXRecordDecl.
ClassTemplateDecl *getDescribedClassTemplate() const;
void setDescribedClassTemplate(ClassTemplateDecl *Template);
/// \brief Determine whether this particular class is a specialization or
/// instantiation of a class template or member class of a class template,
/// and how it was instantiated or specialized.
TemplateSpecializationKind getTemplateSpecializationKind() const;
/// \brief Set the kind of specialization or template instantiation this is.
void setTemplateSpecializationKind(TemplateSpecializationKind TSK);
/// \brief Retrieve the record declaration from which this record could be
/// instantiated. Returns null if this class is not a template instantiation.
const CXXRecordDecl *getTemplateInstantiationPattern() const;
CXXRecordDecl *getTemplateInstantiationPattern() {
return const_cast<CXXRecordDecl *>(const_cast<const CXXRecordDecl *>(this)
->getTemplateInstantiationPattern());
}
/// \brief Returns the destructor decl for this class.
CXXDestructorDecl *getDestructor() const;
/// \brief Returns true if the class destructor, or any implicitly invoked
/// destructors are marked noreturn.
bool isAnyDestructorNoReturn() const;
/// \brief If the class is a local class [class.local], returns
/// the enclosing function declaration.
const FunctionDecl *isLocalClass() const {
if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(getDeclContext()))
return RD->isLocalClass();
return dyn_cast<FunctionDecl>(getDeclContext());
}
FunctionDecl *isLocalClass() {
return const_cast<FunctionDecl*>(
const_cast<const CXXRecordDecl*>(this)->isLocalClass());
}
/// \brief Determine whether this dependent class is a current instantiation,
/// when viewed from within the given context.
bool isCurrentInstantiation(const DeclContext *CurContext) const;
/// \brief Determine whether this class is derived from the class \p Base.
///
/// This routine only determines whether this class is derived from \p Base,
/// but does not account for factors that may make a Derived -> Base class
/// ill-formed, such as private/protected inheritance or multiple, ambiguous
/// base class subobjects.
///
/// \param Base the base class we are searching for.
///
/// \returns true if this class is derived from Base, false otherwise.
bool isDerivedFrom(const CXXRecordDecl *Base) const;
/// \brief Determine whether this class is derived from the type \p Base.
///
/// This routine only determines whether this class is derived from \p Base,
/// but does not account for factors that may make a Derived -> Base class
/// ill-formed, such as private/protected inheritance or multiple, ambiguous
/// base class subobjects.
///
/// \param Base the base class we are searching for.
///
/// \param Paths will contain the paths taken from the current class to the
/// given \p Base class.
///
/// \returns true if this class is derived from \p Base, false otherwise.
///
/// \todo add a separate parameter to configure IsDerivedFrom, rather than
/// tangling input and output in \p Paths
bool isDerivedFrom(const CXXRecordDecl *Base, CXXBasePaths &Paths) const;
/// \brief Determine whether this class is virtually derived from
/// the class \p Base.
///
/// This routine only determines whether this class is virtually
/// derived from \p Base, but does not account for factors that may
/// make a Derived -> Base class ill-formed, such as
/// private/protected inheritance or multiple, ambiguous base class
/// subobjects.
///
/// \param Base the base class we are searching for.
///
/// \returns true if this class is virtually derived from Base,
/// false otherwise.
bool isVirtuallyDerivedFrom(const CXXRecordDecl *Base) const;
/// \brief Determine whether this class is provably not derived from
/// the type \p Base.
bool isProvablyNotDerivedFrom(const CXXRecordDecl *Base) const;
/// \brief Function type used by forallBases() as a callback.
///
/// \param BaseDefinition the definition of the base class
///
/// \returns true if this base matched the search criteria
typedef llvm::function_ref<bool(const CXXRecordDecl *BaseDefinition)>
ForallBasesCallback;
/// \brief Determines if the given callback holds for all the direct
/// or indirect base classes of this type.
///
/// The class itself does not count as a base class. This routine
/// returns false if the class has non-computable base classes.
///
/// \param BaseMatches Callback invoked for each (direct or indirect) base
/// class of this type, or if \p AllowShortCircuit is true then until a call
/// returns false.
///
/// \param AllowShortCircuit if false, forces the callback to be called
/// for every base class, even if a dependent or non-matching base was
/// found.
bool forallBases(ForallBasesCallback BaseMatches,
bool AllowShortCircuit = true) const;
/// \brief Function type used by lookupInBases() to determine whether a
/// specific base class subobject matches the lookup criteria.
///
/// \param Specifier the base-class specifier that describes the inheritance
/// from the base class we are trying to match.
///
/// \param Path the current path, from the most-derived class down to the
/// base named by the \p Specifier.
///
/// \returns true if this base matched the search criteria, false otherwise.
typedef llvm::function_ref<bool(const CXXBaseSpecifier *Specifier,
CXXBasePath &Path)> BaseMatchesCallback;
/// \brief Look for entities within the base classes of this C++ class,
/// transitively searching all base class subobjects.
///
/// This routine uses the callback function \p BaseMatches to find base
/// classes meeting some search criteria, walking all base class subobjects
/// and populating the given \p Paths structure with the paths through the
/// inheritance hierarchy that resulted in a match. On a successful search,
/// the \p Paths structure can be queried to retrieve the matching paths and
/// to determine if there were any ambiguities.
///
/// \param BaseMatches callback function used to determine whether a given
/// base matches the user-defined search criteria.
///
/// \param Paths used to record the paths from this class to its base class
/// subobjects that match the search criteria.
///
/// \param LookupInDependent can be set to true to extend the search to
/// dependent base classes.
///
/// \returns true if there exists any path from this class to a base class
/// subobject that matches the search criteria.
bool lookupInBases(BaseMatchesCallback BaseMatches, CXXBasePaths &Paths,
bool LookupInDependent = false) const;
/// \brief Base-class lookup callback that determines whether the given
/// base class specifier refers to a specific class declaration.
///
/// This callback can be used with \c lookupInBases() to determine whether
/// a given derived class has is a base class subobject of a particular type.
/// The base record pointer should refer to the canonical CXXRecordDecl of the
/// base class that we are searching for.
static bool FindBaseClass(const CXXBaseSpecifier *Specifier,
CXXBasePath &Path, const CXXRecordDecl *BaseRecord);
/// \brief Base-class lookup callback that determines whether the
/// given base class specifier refers to a specific class
/// declaration and describes virtual derivation.
///
/// This callback can be used with \c lookupInBases() to determine
/// whether a given derived class has is a virtual base class
/// subobject of a particular type. The base record pointer should
/// refer to the canonical CXXRecordDecl of the base class that we
/// are searching for.
static bool FindVirtualBaseClass(const CXXBaseSpecifier *Specifier,
CXXBasePath &Path,
const CXXRecordDecl *BaseRecord);
/// \brief Base-class lookup callback that determines whether there exists
/// a tag with the given name.
///
/// This callback can be used with \c lookupInBases() to find tag members
/// of the given name within a C++ class hierarchy.
static bool FindTagMember(const CXXBaseSpecifier *Specifier,
CXXBasePath &Path, DeclarationName Name);
/// \brief Base-class lookup callback that determines whether there exists
/// a member with the given name.
///
/// This callback can be used with \c lookupInBases() to find members
/// of the given name within a C++ class hierarchy.
static bool FindOrdinaryMember(const CXXBaseSpecifier *Specifier,
CXXBasePath &Path, DeclarationName Name);
/// \brief Base-class lookup callback that determines whether there exists
/// a member with the given name.
///
/// This callback can be used with \c lookupInBases() to find members
/// of the given name within a C++ class hierarchy, including dependent
/// classes.
static bool
FindOrdinaryMemberInDependentClasses(const CXXBaseSpecifier *Specifier,
CXXBasePath &Path, DeclarationName Name);
/// \brief Base-class lookup callback that determines whether there exists
/// an OpenMP declare reduction member with the given name.
///
/// This callback can be used with \c lookupInBases() to find members
/// of the given name within a C++ class hierarchy.
static bool FindOMPReductionMember(const CXXBaseSpecifier *Specifier,
CXXBasePath &Path, DeclarationName Name);
/// \brief Base-class lookup callback that determines whether there exists
/// a member with the given name that can be used in a nested-name-specifier.
///
/// This callback can be used with \c lookupInBases() to find members of
/// the given name within a C++ class hierarchy that can occur within
/// nested-name-specifiers.
static bool FindNestedNameSpecifierMember(const CXXBaseSpecifier *Specifier,
CXXBasePath &Path,
DeclarationName Name);
/// \brief Retrieve the final overriders for each virtual member
/// function in the class hierarchy where this class is the
/// most-derived class in the class hierarchy.
void getFinalOverriders(CXXFinalOverriderMap &FinaOverriders) const;
/// \brief Get the indirect primary bases for this class.
void getIndirectPrimaryBases(CXXIndirectPrimaryBaseSet& Bases) const;
/// Performs an imprecise lookup of a dependent name in this class.
///
/// This function does not follow strict semantic rules and should be used
/// only when lookup rules can be relaxed, e.g. indexing.
std::vector<const NamedDecl *>
lookupDependentName(const DeclarationName &Name,
llvm::function_ref<bool(const NamedDecl *ND)> Filter);
/// Renders and displays an inheritance diagram
/// for this C++ class and all of its base classes (transitively) using
/// GraphViz.
void viewInheritance(ASTContext& Context) const;
/// \brief Calculates the access of a decl that is reached
/// along a path.
static AccessSpecifier MergeAccess(AccessSpecifier PathAccess,
AccessSpecifier DeclAccess) {
assert(DeclAccess != AS_none);
if (DeclAccess == AS_private) return AS_none;
return (PathAccess > DeclAccess ? PathAccess : DeclAccess);
}
/// \brief Indicates that the declaration of a defaulted or deleted special
/// member function is now complete.
void finishedDefaultedOrDeletedMember(CXXMethodDecl *MD);
/// \brief Indicates that the definition of this class is now complete.
void completeDefinition() override;
/// \brief Indicates that the definition of this class is now complete,
/// and provides a final overrider map to help determine
///
/// \param FinalOverriders The final overrider map for this class, which can
/// be provided as an optimization for abstract-class checking. If NULL,
/// final overriders will be computed if they are needed to complete the
/// definition.
void completeDefinition(CXXFinalOverriderMap *FinalOverriders);
/// \brief Determine whether this class may end up being abstract, even though
/// it is not yet known to be abstract.
///
/// \returns true if this class is not known to be abstract but has any
/// base classes that are abstract. In this case, \c completeDefinition()
/// will need to compute final overriders to determine whether the class is
/// actually abstract.
bool mayBeAbstract() const;
/// \brief If this is the closure type of a lambda expression, retrieve the
/// number to be used for name mangling in the Itanium C++ ABI.
///
/// Zero indicates that this closure type has internal linkage, so the
/// mangling number does not matter, while a non-zero value indicates which
/// lambda expression this is in this particular context.
unsigned getLambdaManglingNumber() const {
assert(isLambda() && "Not a lambda closure type!");
return getLambdaData().ManglingNumber;
}
/// \brief Retrieve the declaration that provides additional context for a
/// lambda, when the normal declaration context is not specific enough.
///
/// Certain contexts (default arguments of in-class function parameters and
/// the initializers of data members) have separate name mangling rules for
/// lambdas within the Itanium C++ ABI. For these cases, this routine provides
/// the declaration in which the lambda occurs, e.g., the function parameter
/// or the non-static data member. Otherwise, it returns NULL to imply that
/// the declaration context suffices.
Decl *getLambdaContextDecl() const;
/// \brief Set the mangling number and context declaration for a lambda
/// class.
void setLambdaMangling(unsigned ManglingNumber, Decl *ContextDecl) {
getLambdaData().ManglingNumber = ManglingNumber;
getLambdaData().ContextDecl = ContextDecl;
}
/// \brief Returns the inheritance model used for this record.
MSInheritanceAttr::Spelling getMSInheritanceModel() const;
/// \brief Calculate what the inheritance model would be for this class.
MSInheritanceAttr::Spelling calculateInheritanceModel() const;
/// In the Microsoft C++ ABI, use zero for the field offset of a null data
/// member pointer if we can guarantee that zero is not a valid field offset,
/// or if the member pointer has multiple fields. Polymorphic classes have a
/// vfptr at offset zero, so we can use zero for null. If there are multiple
/// fields, we can use zero even if it is a valid field offset because
/// null-ness testing will check the other fields.
bool nullFieldOffsetIsZero() const {
return !MSInheritanceAttr::hasOnlyOneField(/*IsMemberFunction=*/false,
getMSInheritanceModel()) ||
(hasDefinition() && isPolymorphic());
}
/// \brief Controls when vtordisps will be emitted if this record is used as a
/// virtual base.
MSVtorDispAttr::Mode getMSVtorDispMode() const;
/// \brief Determine whether this lambda expression was known to be dependent
/// at the time it was created, even if its context does not appear to be
/// dependent.
///
/// This flag is a workaround for an issue with parsing, where default
/// arguments are parsed before their enclosing function declarations have
/// been created. This means that any lambda expressions within those
/// default arguments will have as their DeclContext the context enclosing
/// the function declaration, which may be non-dependent even when the
/// function declaration itself is dependent. This flag indicates when we
/// know that the lambda is dependent despite that.
bool isDependentLambda() const {
return isLambda() && getLambdaData().Dependent;
}
TypeSourceInfo *getLambdaTypeInfo() const {
return getLambdaData().MethodTyInfo;
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
return K >= firstCXXRecord && K <= lastCXXRecord;
}
friend class ASTDeclReader;
friend class ASTDeclWriter;
friend class ASTRecordWriter;
friend class ASTReader;
friend class ASTWriter;
};
/// \brief Represents a C++ deduction guide declaration.
///
/// \code
/// template<typename T> struct A { A(); A(T); };
/// A() -> A<int>;
/// \endcode
///
/// In this example, there will be an explicit deduction guide from the
/// second line, and implicit deduction guide templates synthesized from
/// the constructors of \c A.
class CXXDeductionGuideDecl : public FunctionDecl {
void anchor() override;
private:
CXXDeductionGuideDecl(ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
bool IsExplicit, const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
SourceLocation EndLocation)
: FunctionDecl(CXXDeductionGuide, C, DC, StartLoc, NameInfo, T, TInfo,
SC_None, false, false) {
if (EndLocation.isValid())
setRangeEnd(EndLocation);
IsExplicitSpecified = IsExplicit;
}
public:
static CXXDeductionGuideDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation StartLoc, bool IsExplicit,
const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
SourceLocation EndLocation);
static CXXDeductionGuideDecl *CreateDeserialized(ASTContext &C, unsigned ID);
/// Whether this deduction guide is explicit.
bool isExplicit() const { return IsExplicitSpecified; }
/// Whether this deduction guide was declared with the 'explicit' specifier.
bool isExplicitSpecified() const { return IsExplicitSpecified; }
/// Get the template for which this guide performs deduction.
TemplateDecl *getDeducedTemplate() const {
return getDeclName().getCXXDeductionGuideTemplate();
}
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == CXXDeductionGuide; }
friend class ASTDeclReader;
friend class ASTDeclWriter;
};
/// \brief Represents a static or instance method of a struct/union/class.
///
/// In the terminology of the C++ Standard, these are the (static and
/// non-static) member functions, whether virtual or not.
class CXXMethodDecl : public FunctionDecl {
void anchor() override;
protected:
CXXMethodDecl(Kind DK, ASTContext &C, CXXRecordDecl *RD,
SourceLocation StartLoc, const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
StorageClass SC, bool isInline,
bool isConstexpr, SourceLocation EndLocation)
: FunctionDecl(DK, C, RD, StartLoc, NameInfo, T, TInfo,
SC, isInline, isConstexpr) {
if (EndLocation.isValid())
setRangeEnd(EndLocation);
}
public:
static CXXMethodDecl *Create(ASTContext &C, CXXRecordDecl *RD,
SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
StorageClass SC,
bool isInline,
bool isConstexpr,
SourceLocation EndLocation);
static CXXMethodDecl *CreateDeserialized(ASTContext &C, unsigned ID);
bool isStatic() const;
bool isInstance() const { return !isStatic(); }
/// Returns true if the given operator is implicitly static in a record
/// context.
static bool isStaticOverloadedOperator(OverloadedOperatorKind OOK) {
// [class.free]p1:
// Any allocation function for a class T is a static member
// (even if not explicitly declared static).
// [class.free]p6 Any deallocation function for a class X is a static member
// (even if not explicitly declared static).
return OOK == OO_New || OOK == OO_Array_New || OOK == OO_Delete ||
OOK == OO_Array_Delete;
}
bool isConst() const { return getType()->castAs<FunctionType>()->isConst(); }
bool isVolatile() const { return getType()->castAs<FunctionType>()->isVolatile(); }
bool isVirtual() const {
CXXMethodDecl *CD =
cast<CXXMethodDecl>(const_cast<CXXMethodDecl*>(this)->getCanonicalDecl());
// Member function is virtual if it is marked explicitly so, or if it is
// declared in __interface -- then it is automatically pure virtual.
if (CD->isVirtualAsWritten() || CD->isPure())
return true;
return (CD->begin_overridden_methods() != CD->end_overridden_methods());
}
/// If it's possible to devirtualize a call to this method, return the called
/// function. Otherwise, return null.
/// \param Base The object on which this virtual function is called.
/// \param IsAppleKext True if we are compiling for Apple kext.
CXXMethodDecl *getDevirtualizedMethod(const Expr *Base, bool IsAppleKext);
const CXXMethodDecl *getDevirtualizedMethod(const Expr *Base,
bool IsAppleKext) const {
return const_cast<CXXMethodDecl *>(this)->getDevirtualizedMethod(
Base, IsAppleKext);
}
/// \brief Determine whether this is a usual deallocation function
/// (C++ [basic.stc.dynamic.deallocation]p2), which is an overloaded
/// delete or delete[] operator with a particular signature.
bool isUsualDeallocationFunction() const;
/// \brief Determine whether this is a copy-assignment operator, regardless
/// of whether it was declared implicitly or explicitly.
bool isCopyAssignmentOperator() const;
/// \brief Determine whether this is a move assignment operator.
bool isMoveAssignmentOperator() const;
CXXMethodDecl *getCanonicalDecl() override {
return cast<CXXMethodDecl>(FunctionDecl::getCanonicalDecl());
}
const CXXMethodDecl *getCanonicalDecl() const {
return const_cast<CXXMethodDecl*>(this)->getCanonicalDecl();
}
CXXMethodDecl *getMostRecentDecl() {
return cast<CXXMethodDecl>(
static_cast<FunctionDecl *>(this)->getMostRecentDecl());
}
const CXXMethodDecl *getMostRecentDecl() const {
return const_cast<CXXMethodDecl*>(this)->getMostRecentDecl();
}
/// True if this method is user-declared and was not
/// deleted or defaulted on its first declaration.
bool isUserProvided() const {
return !(isDeleted() || getCanonicalDecl()->isDefaulted());
}
///
void addOverriddenMethod(const CXXMethodDecl *MD);
typedef const CXXMethodDecl *const* method_iterator;
method_iterator begin_overridden_methods() const;
method_iterator end_overridden_methods() const;
unsigned size_overridden_methods() const;
typedef ASTContext::overridden_method_range overridden_method_range;
overridden_method_range overridden_methods() const;
/// Returns the parent of this method declaration, which
/// is the class in which this method is defined.
const CXXRecordDecl *getParent() const {
return cast<CXXRecordDecl>(FunctionDecl::getParent());
}
/// Returns the parent of this method declaration, which
/// is the class in which this method is defined.
CXXRecordDecl *getParent() {
return const_cast<CXXRecordDecl *>(
cast<CXXRecordDecl>(FunctionDecl::getParent()));
}
/// \brief Returns the type of the \c this pointer.
///
/// Should only be called for instance (i.e., non-static) methods.
QualType getThisType(ASTContext &C) const;
unsigned getTypeQualifiers() const {
return getType()->getAs<FunctionProtoType>()->getTypeQuals();
}
/// \brief Retrieve the ref-qualifier associated with this method.
///
/// In the following example, \c f() has an lvalue ref-qualifier, \c g()
/// has an rvalue ref-qualifier, and \c h() has no ref-qualifier.
/// @code
/// struct X {
/// void f() &;
/// void g() &&;
/// void h();
/// };
/// @endcode
RefQualifierKind getRefQualifier() const {
return getType()->getAs<FunctionProtoType>()->getRefQualifier();
}
bool hasInlineBody() const;
/// \brief Determine whether this is a lambda closure type's static member
/// function that is used for the result of the lambda's conversion to
/// function pointer (for a lambda with no captures).
///
/// The function itself, if used, will have a placeholder body that will be
/// supplied by IR generation to either forward to the function call operator
/// or clone the function call operator.
bool isLambdaStaticInvoker() const;
/// \brief Find the method in \p RD that corresponds to this one.
///
/// Find if \p RD or one of the classes it inherits from override this method.
/// If so, return it. \p RD is assumed to be a subclass of the class defining
/// this method (or be the class itself), unless \p MayBeBase is set to true.
CXXMethodDecl *
getCorrespondingMethodInClass(const CXXRecordDecl *RD,
bool MayBeBase = false);
const CXXMethodDecl *
getCorrespondingMethodInClass(const CXXRecordDecl *RD,
bool MayBeBase = false) const {
return const_cast<CXXMethodDecl *>(this)
->getCorrespondingMethodInClass(RD, MayBeBase);
}
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
return K >= firstCXXMethod && K <= lastCXXMethod;
}
};
/// \brief Represents a C++ base or member initializer.
///
/// This is part of a constructor initializer that
/// initializes one non-static member variable or one base class. For
/// example, in the following, both 'A(a)' and 'f(3.14159)' are member
/// initializers:
///
/// \code
/// class A { };
/// class B : public A {
/// float f;
/// public:
/// B(A& a) : A(a), f(3.14159) { }
/// };
/// \endcode
class CXXCtorInitializer final {
/// \brief Either the base class name/delegating constructor type (stored as
/// a TypeSourceInfo*), an normal field (FieldDecl), or an anonymous field
/// (IndirectFieldDecl*) being initialized.
llvm::PointerUnion3<TypeSourceInfo *, FieldDecl *, IndirectFieldDecl *>
Initializee;
/// \brief The source location for the field name or, for a base initializer
/// pack expansion, the location of the ellipsis.
///
/// In the case of a delegating
/// constructor, it will still include the type's source location as the
/// Initializee points to the CXXConstructorDecl (to allow loop detection).
SourceLocation MemberOrEllipsisLocation;
/// \brief The argument used to initialize the base or member, which may
/// end up constructing an object (when multiple arguments are involved).
Stmt *Init;
/// \brief Location of the left paren of the ctor-initializer.
SourceLocation LParenLoc;
/// \brief Location of the right paren of the ctor-initializer.
SourceLocation RParenLoc;
/// \brief If the initializee is a type, whether that type makes this
/// a delegating initialization.
unsigned IsDelegating : 1;
/// \brief If the initializer is a base initializer, this keeps track
/// of whether the base is virtual or not.
unsigned IsVirtual : 1;
/// \brief Whether or not the initializer is explicitly written
/// in the sources.
unsigned IsWritten : 1;
/// If IsWritten is true, then this number keeps track of the textual order
/// of this initializer in the original sources, counting from 0.
unsigned SourceOrder : 13;
public:
/// \brief Creates a new base-class initializer.
explicit
CXXCtorInitializer(ASTContext &Context, TypeSourceInfo *TInfo, bool IsVirtual,
SourceLocation L, Expr *Init, SourceLocation R,
SourceLocation EllipsisLoc);
/// \brief Creates a new member initializer.
explicit
CXXCtorInitializer(ASTContext &Context, FieldDecl *Member,
SourceLocation MemberLoc, SourceLocation L, Expr *Init,
SourceLocation R);
/// \brief Creates a new anonymous field initializer.
explicit
CXXCtorInitializer(ASTContext &Context, IndirectFieldDecl *Member,
SourceLocation MemberLoc, SourceLocation L, Expr *Init,
SourceLocation R);
/// \brief Creates a new delegating initializer.
explicit
CXXCtorInitializer(ASTContext &Context, TypeSourceInfo *TInfo,
SourceLocation L, Expr *Init, SourceLocation R);
/// \brief Determine whether this initializer is initializing a base class.
bool isBaseInitializer() const {
return Initializee.is<TypeSourceInfo*>() && !IsDelegating;
}
/// \brief Determine whether this initializer is initializing a non-static
/// data member.
bool isMemberInitializer() const { return Initializee.is<FieldDecl*>(); }
bool isAnyMemberInitializer() const {
return isMemberInitializer() || isIndirectMemberInitializer();
}
bool isIndirectMemberInitializer() const {
return Initializee.is<IndirectFieldDecl*>();
}
/// \brief Determine whether this initializer is an implicit initializer
/// generated for a field with an initializer defined on the member
/// declaration.
///
/// In-class member initializers (also known as "non-static data member
/// initializations", NSDMIs) were introduced in C++11.
bool isInClassMemberInitializer() const {
return Init->getStmtClass() == Stmt::CXXDefaultInitExprClass;
}
/// \brief Determine whether this initializer is creating a delegating
/// constructor.
bool isDelegatingInitializer() const {
return Initializee.is<TypeSourceInfo*>() && IsDelegating;
}
/// \brief Determine whether this initializer is a pack expansion.
bool isPackExpansion() const {
return isBaseInitializer() && MemberOrEllipsisLocation.isValid();
}
// \brief For a pack expansion, returns the location of the ellipsis.
SourceLocation getEllipsisLoc() const {
assert(isPackExpansion() && "Initializer is not a pack expansion");
return MemberOrEllipsisLocation;
}
/// If this is a base class initializer, returns the type of the
/// base class with location information. Otherwise, returns an NULL
/// type location.
TypeLoc getBaseClassLoc() const;
/// If this is a base class initializer, returns the type of the base class.
/// Otherwise, returns null.
const Type *getBaseClass() const;
/// Returns whether the base is virtual or not.
bool isBaseVirtual() const {
assert(isBaseInitializer() && "Must call this on base initializer!");
return IsVirtual;
}
/// \brief Returns the declarator information for a base class or delegating
/// initializer.
TypeSourceInfo *getTypeSourceInfo() const {
return Initializee.dyn_cast<TypeSourceInfo *>();
}
/// \brief If this is a member initializer, returns the declaration of the
/// non-static data member being initialized. Otherwise, returns null.
FieldDecl *getMember() const {
if (isMemberInitializer())
return Initializee.get<FieldDecl*>();
return nullptr;
}
FieldDecl *getAnyMember() const {
if (isMemberInitializer())
return Initializee.get<FieldDecl*>();
if (isIndirectMemberInitializer())
return Initializee.get<IndirectFieldDecl*>()->getAnonField();
return nullptr;
}
IndirectFieldDecl *getIndirectMember() const {
if (isIndirectMemberInitializer())
return Initializee.get<IndirectFieldDecl*>();
return nullptr;
}
SourceLocation getMemberLocation() const {
return MemberOrEllipsisLocation;
}
/// \brief Determine the source location of the initializer.
SourceLocation getSourceLocation() const;
/// \brief Determine the source range covering the entire initializer.
SourceRange getSourceRange() const LLVM_READONLY;
/// \brief Determine whether this initializer is explicitly written
/// in the source code.
bool isWritten() const { return IsWritten; }
/// \brief Return the source position of the initializer, counting from 0.
/// If the initializer was implicit, -1 is returned.
int getSourceOrder() const {
return IsWritten ? static_cast<int>(SourceOrder) : -1;
}
/// \brief Set the source order of this initializer.
///
/// This can only be called once for each initializer; it cannot be called
/// on an initializer having a positive number of (implicit) array indices.
///
/// This assumes that the initializer was written in the source code, and
/// ensures that isWritten() returns true.
void setSourceOrder(int Pos) {
assert(!IsWritten &&
"setSourceOrder() used on implicit initializer");
assert(SourceOrder == 0 &&
"calling twice setSourceOrder() on the same initializer");
assert(Pos >= 0 &&
"setSourceOrder() used to make an initializer implicit");
IsWritten = true;
SourceOrder = static_cast<unsigned>(Pos);
}
SourceLocation getLParenLoc() const { return LParenLoc; }
SourceLocation getRParenLoc() const { return RParenLoc; }
/// \brief Get the initializer.
Expr *getInit() const { return static_cast<Expr*>(Init); }
};
/// Description of a constructor that was inherited from a base class.
class InheritedConstructor {
ConstructorUsingShadowDecl *Shadow;
CXXConstructorDecl *BaseCtor;
public:
InheritedConstructor() : Shadow(), BaseCtor() {}
InheritedConstructor(ConstructorUsingShadowDecl *Shadow,
CXXConstructorDecl *BaseCtor)
: Shadow(Shadow), BaseCtor(BaseCtor) {}
explicit operator bool() const { return Shadow; }
ConstructorUsingShadowDecl *getShadowDecl() const { return Shadow; }
CXXConstructorDecl *getConstructor() const { return BaseCtor; }
};
/// \brief Represents a C++ constructor within a class.
///
/// For example:
///
/// \code
/// class X {
/// public:
/// explicit X(int); // represented by a CXXConstructorDecl.
/// };
/// \endcode
class CXXConstructorDecl final
: public CXXMethodDecl,
private llvm::TrailingObjects<CXXConstructorDecl, InheritedConstructor> {
void anchor() override;
/// \name Support for base and member initializers.
/// \{
/// \brief The arguments used to initialize the base or member.
LazyCXXCtorInitializersPtr CtorInitializers;
unsigned NumCtorInitializers : 31;
/// \}
/// \brief Whether this constructor declaration is an implicitly-declared
/// inheriting constructor.
unsigned IsInheritingConstructor : 1;
CXXConstructorDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
bool isExplicitSpecified, bool isInline,
bool isImplicitlyDeclared, bool isConstexpr,
InheritedConstructor Inherited)
: CXXMethodDecl(CXXConstructor, C, RD, StartLoc, NameInfo, T, TInfo,
SC_None, isInline, isConstexpr, SourceLocation()),
CtorInitializers(nullptr), NumCtorInitializers(0),
IsInheritingConstructor((bool)Inherited) {
setImplicit(isImplicitlyDeclared);
if (Inherited)
*getTrailingObjects<InheritedConstructor>() = Inherited;
IsExplicitSpecified = isExplicitSpecified;
}
public:
static CXXConstructorDecl *CreateDeserialized(ASTContext &C, unsigned ID,
bool InheritsConstructor);
static CXXConstructorDecl *
Create(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
bool isExplicit, bool isInline, bool isImplicitlyDeclared,
bool isConstexpr,
InheritedConstructor Inherited = InheritedConstructor());
/// \brief Iterates through the member/base initializer list.
typedef CXXCtorInitializer **init_iterator;
/// \brief Iterates through the member/base initializer list.
typedef CXXCtorInitializer *const *init_const_iterator;
typedef llvm::iterator_range<init_iterator> init_range;
typedef llvm::iterator_range<init_const_iterator> init_const_range;
init_range inits() { return init_range(init_begin(), init_end()); }
init_const_range inits() const {
return init_const_range(init_begin(), init_end());
}
/// \brief Retrieve an iterator to the first initializer.
init_iterator init_begin() {
const auto *ConstThis = this;
return const_cast<init_iterator>(ConstThis->init_begin());
}
/// \brief Retrieve an iterator to the first initializer.
init_const_iterator init_begin() const;
/// \brief Retrieve an iterator past the last initializer.
init_iterator init_end() {
return init_begin() + NumCtorInitializers;
}
/// \brief Retrieve an iterator past the last initializer.
init_const_iterator init_end() const {
return init_begin() + NumCtorInitializers;
}
typedef std::reverse_iterator<init_iterator> init_reverse_iterator;
typedef std::reverse_iterator<init_const_iterator>
init_const_reverse_iterator;
init_reverse_iterator init_rbegin() {
return init_reverse_iterator(init_end());
}
init_const_reverse_iterator init_rbegin() const {
return init_const_reverse_iterator(init_end());
}
init_reverse_iterator init_rend() {
return init_reverse_iterator(init_begin());
}
init_const_reverse_iterator init_rend() const {
return init_const_reverse_iterator(init_begin());
}
/// \brief Determine the number of arguments used to initialize the member
/// or base.
unsigned getNumCtorInitializers() const {
return NumCtorInitializers;
}
void setNumCtorInitializers(unsigned numCtorInitializers) {
NumCtorInitializers = numCtorInitializers;
}
void setCtorInitializers(CXXCtorInitializer **Initializers) {
CtorInitializers = Initializers;
}
/// Whether this function is marked as explicit explicitly.
bool isExplicitSpecified() const { return IsExplicitSpecified; }
/// Whether this function is explicit.
bool isExplicit() const {
return getCanonicalDecl()->isExplicitSpecified();
}
/// \brief Determine whether this constructor is a delegating constructor.
bool isDelegatingConstructor() const {
return (getNumCtorInitializers() == 1) &&
init_begin()[0]->isDelegatingInitializer();
}
/// \brief When this constructor delegates to another, retrieve the target.
CXXConstructorDecl *getTargetConstructor() const;
/// Whether this constructor is a default
/// constructor (C++ [class.ctor]p5), which can be used to
/// default-initialize a class of this type.
bool isDefaultConstructor() const;
/// \brief Whether this constructor is a copy constructor (C++ [class.copy]p2,
/// which can be used to copy the class.
///
/// \p TypeQuals will be set to the qualifiers on the
/// argument type. For example, \p TypeQuals would be set to \c
/// Qualifiers::Const for the following copy constructor:
///
/// \code
/// class X {
/// public:
/// X(const X&);
/// };
/// \endcode
bool isCopyConstructor(unsigned &TypeQuals) const;
/// Whether this constructor is a copy
/// constructor (C++ [class.copy]p2, which can be used to copy the
/// class.
bool isCopyConstructor() const {
unsigned TypeQuals = 0;
return isCopyConstructor(TypeQuals);
}
/// \brief Determine whether this constructor is a move constructor
/// (C++11 [class.copy]p3), which can be used to move values of the class.
///
/// \param TypeQuals If this constructor is a move constructor, will be set
/// to the type qualifiers on the referent of the first parameter's type.
bool isMoveConstructor(unsigned &TypeQuals) const;
/// \brief Determine whether this constructor is a move constructor
/// (C++11 [class.copy]p3), which can be used to move values of the class.
bool isMoveConstructor() const {
unsigned TypeQuals = 0;
return isMoveConstructor(TypeQuals);
}
/// \brief Determine whether this is a copy or move constructor.
///
/// \param TypeQuals Will be set to the type qualifiers on the reference
/// parameter, if in fact this is a copy or move constructor.
bool isCopyOrMoveConstructor(unsigned &TypeQuals) const;
/// \brief Determine whether this a copy or move constructor.
bool isCopyOrMoveConstructor() const {
unsigned Quals;
return isCopyOrMoveConstructor(Quals);
}
/// Whether this constructor is a
/// converting constructor (C++ [class.conv.ctor]), which can be
/// used for user-defined conversions.
bool isConvertingConstructor(bool AllowExplicit) const;
/// \brief Determine whether this is a member template specialization that
/// would copy the object to itself. Such constructors are never used to copy
/// an object.
bool isSpecializationCopyingObject() const;
/// \brief Determine whether this is an implicit constructor synthesized to
/// model a call to a constructor inherited from a base class.
bool isInheritingConstructor() const { return IsInheritingConstructor; }
/// \brief Get the constructor that this inheriting constructor is based on.
InheritedConstructor getInheritedConstructor() const {
return IsInheritingConstructor ? *getTrailingObjects<InheritedConstructor>()
: InheritedConstructor();
}
CXXConstructorDecl *getCanonicalDecl() override {
return cast<CXXConstructorDecl>(FunctionDecl::getCanonicalDecl());
}
const CXXConstructorDecl *getCanonicalDecl() const {
return const_cast<CXXConstructorDecl*>(this)->getCanonicalDecl();
}
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == CXXConstructor; }
friend class ASTDeclReader;
friend class ASTDeclWriter;
friend TrailingObjects;
};
/// \brief Represents a C++ destructor within a class.
///
/// For example:
///
/// \code
/// class X {
/// public:
/// ~X(); // represented by a CXXDestructorDecl.
/// };
/// \endcode
class CXXDestructorDecl : public CXXMethodDecl {
void anchor() override;
FunctionDecl *OperatorDelete;
CXXDestructorDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
bool isInline, bool isImplicitlyDeclared)
: CXXMethodDecl(CXXDestructor, C, RD, StartLoc, NameInfo, T, TInfo,
SC_None, isInline, /*isConstexpr=*/false, SourceLocation()),
OperatorDelete(nullptr) {
setImplicit(isImplicitlyDeclared);
}
public:
static CXXDestructorDecl *Create(ASTContext &C, CXXRecordDecl *RD,
SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo* TInfo,
bool isInline,
bool isImplicitlyDeclared);
static CXXDestructorDecl *CreateDeserialized(ASTContext & C, unsigned ID);
void setOperatorDelete(FunctionDecl *OD);
const FunctionDecl *getOperatorDelete() const {
return getCanonicalDecl()->OperatorDelete;
}
CXXDestructorDecl *getCanonicalDecl() override {
return cast<CXXDestructorDecl>(FunctionDecl::getCanonicalDecl());
}
const CXXDestructorDecl *getCanonicalDecl() const {
return const_cast<CXXDestructorDecl*>(this)->getCanonicalDecl();
}
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == CXXDestructor; }
friend class ASTDeclReader;
friend class ASTDeclWriter;
};
/// \brief Represents a C++ conversion function within a class.
///
/// For example:
///
/// \code
/// class X {
/// public:
/// operator bool();
/// };
/// \endcode
class CXXConversionDecl : public CXXMethodDecl {
void anchor() override;
CXXConversionDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T,
TypeSourceInfo *TInfo, bool isInline,
bool isExplicitSpecified, bool isConstexpr,
SourceLocation EndLocation)
: CXXMethodDecl(CXXConversion, C, RD, StartLoc, NameInfo, T, TInfo,
SC_None, isInline, isConstexpr, EndLocation) {
IsExplicitSpecified = isExplicitSpecified;
}
public:
static CXXConversionDecl *Create(ASTContext &C, CXXRecordDecl *RD,
SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
bool isInline, bool isExplicit,
bool isConstexpr,
SourceLocation EndLocation);
static CXXConversionDecl *CreateDeserialized(ASTContext &C, unsigned ID);
/// Whether this function is marked as explicit explicitly.
bool isExplicitSpecified() const { return IsExplicitSpecified; }
/// Whether this function is explicit.
bool isExplicit() const {
return getCanonicalDecl()->isExplicitSpecified();
}
/// \brief Returns the type that this conversion function is converting to.
QualType getConversionType() const {
return getType()->getAs<FunctionType>()->getReturnType();
}
/// \brief Determine whether this conversion function is a conversion from
/// a lambda closure type to a block pointer.
bool isLambdaToBlockPointerConversion() const;
CXXConversionDecl *getCanonicalDecl() override {
return cast<CXXConversionDecl>(FunctionDecl::getCanonicalDecl());
}
const CXXConversionDecl *getCanonicalDecl() const {
return const_cast<CXXConversionDecl*>(this)->getCanonicalDecl();
}
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == CXXConversion; }
friend class ASTDeclReader;
friend class ASTDeclWriter;
};
/// \brief Represents a linkage specification.
///
/// For example:
/// \code
/// extern "C" void foo();
/// \endcode
class LinkageSpecDecl : public Decl, public DeclContext {
virtual void anchor();
public:
/// \brief Represents the language in a linkage specification.
///
/// The values are part of the serialization ABI for
/// ASTs and cannot be changed without altering that ABI. To help
/// ensure a stable ABI for this, we choose the DW_LANG_ encodings
/// from the dwarf standard.
enum LanguageIDs {
lang_c = /* DW_LANG_C */ 0x0002,
lang_cxx = /* DW_LANG_C_plus_plus */ 0x0004
};
private:
/// \brief The language for this linkage specification.
unsigned Language : 3;
/// \brief True if this linkage spec has braces.
///
/// This is needed so that hasBraces() returns the correct result while the
/// linkage spec body is being parsed. Once RBraceLoc has been set this is
/// not used, so it doesn't need to be serialized.
unsigned HasBraces : 1;
/// \brief The source location for the extern keyword.
SourceLocation ExternLoc;
/// \brief The source location for the right brace (if valid).
SourceLocation RBraceLoc;
LinkageSpecDecl(DeclContext *DC, SourceLocation ExternLoc,
SourceLocation LangLoc, LanguageIDs lang, bool HasBraces)
: Decl(LinkageSpec, DC, LangLoc), DeclContext(LinkageSpec),
Language(lang), HasBraces(HasBraces), ExternLoc(ExternLoc),
RBraceLoc(SourceLocation()) { }
public:
static LinkageSpecDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation ExternLoc,
SourceLocation LangLoc, LanguageIDs Lang,
bool HasBraces);
static LinkageSpecDecl *CreateDeserialized(ASTContext &C, unsigned ID);
/// \brief Return the language specified by this linkage specification.
LanguageIDs getLanguage() const { return LanguageIDs(Language); }
/// \brief Set the language specified by this linkage specification.
void setLanguage(LanguageIDs L) { Language = L; }
/// \brief Determines whether this linkage specification had braces in
/// its syntactic form.
bool hasBraces() const {
assert(!RBraceLoc.isValid() || HasBraces);
return HasBraces;
}
SourceLocation getExternLoc() const { return ExternLoc; }
SourceLocation getRBraceLoc() const { return RBraceLoc; }
void setExternLoc(SourceLocation L) { ExternLoc = L; }
void setRBraceLoc(SourceLocation L) {
RBraceLoc = L;
HasBraces = RBraceLoc.isValid();
}
SourceLocation getLocEnd() const LLVM_READONLY {
if (hasBraces())
return getRBraceLoc();
// No braces: get the end location of the (only) declaration in context
// (if present).
return decls_empty() ? getLocation() : decls_begin()->getLocEnd();
}
SourceRange getSourceRange() const override LLVM_READONLY {
return SourceRange(ExternLoc, getLocEnd());
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == LinkageSpec; }
static DeclContext *castToDeclContext(const LinkageSpecDecl *D) {
return static_cast<DeclContext *>(const_cast<LinkageSpecDecl*>(D));
}
static LinkageSpecDecl *castFromDeclContext(const DeclContext *DC) {
return static_cast<LinkageSpecDecl *>(const_cast<DeclContext*>(DC));
}
};
/// \brief Represents C++ using-directive.
///
/// For example:
/// \code
/// using namespace std;
/// \endcode
///
/// \note UsingDirectiveDecl should be Decl not NamedDecl, but we provide
/// artificial names for all using-directives in order to store
/// them in DeclContext effectively.
class UsingDirectiveDecl : public NamedDecl {
void anchor() override;
/// \brief The location of the \c using keyword.
SourceLocation UsingLoc;
/// \brief The location of the \c namespace keyword.
SourceLocation NamespaceLoc;
/// \brief The nested-name-specifier that precedes the namespace.
NestedNameSpecifierLoc QualifierLoc;
/// \brief The namespace nominated by this using-directive.
NamedDecl *NominatedNamespace;
/// Enclosing context containing both using-directive and nominated
/// namespace.
DeclContext *CommonAncestor;
/// \brief Returns special DeclarationName used by using-directives.
///
/// This is only used by DeclContext for storing UsingDirectiveDecls in
/// its lookup structure.
static DeclarationName getName() {
return DeclarationName::getUsingDirectiveName();
}
UsingDirectiveDecl(DeclContext *DC, SourceLocation UsingLoc,
SourceLocation NamespcLoc,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation IdentLoc,
NamedDecl *Nominated,
DeclContext *CommonAncestor)
: NamedDecl(UsingDirective, DC, IdentLoc, getName()), UsingLoc(UsingLoc),
NamespaceLoc(NamespcLoc), QualifierLoc(QualifierLoc),
NominatedNamespace(Nominated), CommonAncestor(CommonAncestor) { }
public:
/// \brief Retrieve the nested-name-specifier that qualifies the
/// name of the namespace, with source-location information.
NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
/// \brief Retrieve the nested-name-specifier that qualifies the
/// name of the namespace.
NestedNameSpecifier *getQualifier() const {
return QualifierLoc.getNestedNameSpecifier();
}
NamedDecl *getNominatedNamespaceAsWritten() { return NominatedNamespace; }
const NamedDecl *getNominatedNamespaceAsWritten() const {
return NominatedNamespace;
}
/// \brief Returns the namespace nominated by this using-directive.
NamespaceDecl *getNominatedNamespace();
const NamespaceDecl *getNominatedNamespace() const {
return const_cast<UsingDirectiveDecl*>(this)->getNominatedNamespace();
}
/// \brief Returns the common ancestor context of this using-directive and
/// its nominated namespace.
DeclContext *getCommonAncestor() { return CommonAncestor; }
const DeclContext *getCommonAncestor() const { return CommonAncestor; }
/// \brief Return the location of the \c using keyword.
SourceLocation getUsingLoc() const { return UsingLoc; }
// FIXME: Could omit 'Key' in name.
/// \brief Returns the location of the \c namespace keyword.
SourceLocation getNamespaceKeyLocation() const { return NamespaceLoc; }
/// \brief Returns the location of this using declaration's identifier.
SourceLocation getIdentLocation() const { return getLocation(); }
static UsingDirectiveDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation UsingLoc,
SourceLocation NamespaceLoc,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation IdentLoc,
NamedDecl *Nominated,
DeclContext *CommonAncestor);
static UsingDirectiveDecl *CreateDeserialized(ASTContext &C, unsigned ID);
SourceRange getSourceRange() const override LLVM_READONLY {
return SourceRange(UsingLoc, getLocation());
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == UsingDirective; }
// Friend for getUsingDirectiveName.
friend class DeclContext;
friend class ASTDeclReader;
};
/// \brief Represents a C++ namespace alias.
///
/// For example:
///
/// \code
/// namespace Foo = Bar;
/// \endcode
class NamespaceAliasDecl : public NamedDecl,
public Redeclarable<NamespaceAliasDecl> {
void anchor() override;
/// \brief The location of the \c namespace keyword.
SourceLocation NamespaceLoc;
/// \brief The location of the namespace's identifier.
///
/// This is accessed by TargetNameLoc.
SourceLocation IdentLoc;
/// \brief The nested-name-specifier that precedes the namespace.
NestedNameSpecifierLoc QualifierLoc;
/// \brief The Decl that this alias points to, either a NamespaceDecl or
/// a NamespaceAliasDecl.
NamedDecl *Namespace;
NamespaceAliasDecl(ASTContext &C, DeclContext *DC,
SourceLocation NamespaceLoc, SourceLocation AliasLoc,
IdentifierInfo *Alias, NestedNameSpecifierLoc QualifierLoc,
SourceLocation IdentLoc, NamedDecl *Namespace)
: NamedDecl(NamespaceAlias, DC, AliasLoc, Alias), redeclarable_base(C),
NamespaceLoc(NamespaceLoc), IdentLoc(IdentLoc),
QualifierLoc(QualifierLoc), Namespace(Namespace) {}
typedef Redeclarable<NamespaceAliasDecl> redeclarable_base;
NamespaceAliasDecl *getNextRedeclarationImpl() override;
NamespaceAliasDecl *getPreviousDeclImpl() override;
NamespaceAliasDecl *getMostRecentDeclImpl() override;
friend class ASTDeclReader;
public:
static NamespaceAliasDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation NamespaceLoc,
SourceLocation AliasLoc,
IdentifierInfo *Alias,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation IdentLoc,
NamedDecl *Namespace);
static NamespaceAliasDecl *CreateDeserialized(ASTContext &C, unsigned ID);
typedef redeclarable_base::redecl_range redecl_range;
typedef redeclarable_base::redecl_iterator redecl_iterator;
using redeclarable_base::redecls_begin;
using redeclarable_base::redecls_end;
using redeclarable_base::redecls;
using redeclarable_base::getPreviousDecl;
using redeclarable_base::getMostRecentDecl;
NamespaceAliasDecl *getCanonicalDecl() override {
return getFirstDecl();
}
const NamespaceAliasDecl *getCanonicalDecl() const {
return getFirstDecl();
}
/// \brief Retrieve the nested-name-specifier that qualifies the
/// name of the namespace, with source-location information.
NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
/// \brief Retrieve the nested-name-specifier that qualifies the
/// name of the namespace.
NestedNameSpecifier *getQualifier() const {
return QualifierLoc.getNestedNameSpecifier();
}
/// \brief Retrieve the namespace declaration aliased by this directive.
NamespaceDecl *getNamespace() {
if (NamespaceAliasDecl *AD = dyn_cast<NamespaceAliasDecl>(Namespace))
return AD->getNamespace();
return cast<NamespaceDecl>(Namespace);
}
const NamespaceDecl *getNamespace() const {
return const_cast<NamespaceAliasDecl*>(this)->getNamespace();
}
/// Returns the location of the alias name, i.e. 'foo' in
/// "namespace foo = ns::bar;".
SourceLocation getAliasLoc() const { return getLocation(); }
/// Returns the location of the \c namespace keyword.
SourceLocation getNamespaceLoc() const { return NamespaceLoc; }
/// Returns the location of the identifier in the named namespace.
SourceLocation getTargetNameLoc() const { return IdentLoc; }
/// \brief Retrieve the namespace that this alias refers to, which
/// may either be a NamespaceDecl or a NamespaceAliasDecl.
NamedDecl *getAliasedNamespace() const { return Namespace; }
SourceRange getSourceRange() const override LLVM_READONLY {
return SourceRange(NamespaceLoc, IdentLoc);
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == NamespaceAlias; }
};
/// \brief Represents a shadow declaration introduced into a scope by a
/// (resolved) using declaration.
///
/// For example,
/// \code
/// namespace A {
/// void foo();
/// }
/// namespace B {
/// using A::foo; // <- a UsingDecl
/// // Also creates a UsingShadowDecl for A::foo() in B
/// }
/// \endcode
class UsingShadowDecl : public NamedDecl, public Redeclarable<UsingShadowDecl> {
void anchor() override;
/// The referenced declaration.
NamedDecl *Underlying;
/// \brief The using declaration which introduced this decl or the next using
/// shadow declaration contained in the aforementioned using declaration.
NamedDecl *UsingOrNextShadow;
friend class UsingDecl;
typedef Redeclarable<UsingShadowDecl> redeclarable_base;
UsingShadowDecl *getNextRedeclarationImpl() override {
return getNextRedeclaration();
}
UsingShadowDecl *getPreviousDeclImpl() override {
return getPreviousDecl();
}
UsingShadowDecl *getMostRecentDeclImpl() override {
return getMostRecentDecl();
}
protected:
UsingShadowDecl(Kind K, ASTContext &C, DeclContext *DC, SourceLocation Loc,
UsingDecl *Using, NamedDecl *Target);
UsingShadowDecl(Kind K, ASTContext &C, EmptyShell);
public:
static UsingShadowDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation Loc, UsingDecl *Using,
NamedDecl *Target) {
return new (C, DC) UsingShadowDecl(UsingShadow, C, DC, Loc, Using, Target);
}
static UsingShadowDecl *CreateDeserialized(ASTContext &C, unsigned ID);
typedef redeclarable_base::redecl_range redecl_range;
typedef redeclarable_base::redecl_iterator redecl_iterator;
using redeclarable_base::redecls_begin;
using redeclarable_base::redecls_end;
using redeclarable_base::redecls;
using redeclarable_base::getPreviousDecl;
using redeclarable_base::getMostRecentDecl;
using redeclarable_base::isFirstDecl;
UsingShadowDecl *getCanonicalDecl() override {
return getFirstDecl();
}
const UsingShadowDecl *getCanonicalDecl() const {
return getFirstDecl();
}
/// \brief Gets the underlying declaration which has been brought into the
/// local scope.
NamedDecl *getTargetDecl() const { return Underlying; }
/// \brief Sets the underlying declaration which has been brought into the
/// local scope.
void setTargetDecl(NamedDecl* ND) {
assert(ND && "Target decl is null!");
Underlying = ND;
IdentifierNamespace = ND->getIdentifierNamespace();
}
/// \brief Gets the using declaration to which this declaration is tied.
UsingDecl *getUsingDecl() const;
/// \brief The next using shadow declaration contained in the shadow decl
/// chain of the using declaration which introduced this decl.
UsingShadowDecl *getNextUsingShadowDecl() const {
return dyn_cast_or_null<UsingShadowDecl>(UsingOrNextShadow);
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
return K == Decl::UsingShadow || K == Decl::ConstructorUsingShadow;
}
friend class ASTDeclReader;
friend class ASTDeclWriter;
};
/// \brief Represents a shadow constructor declaration introduced into a
/// class by a C++11 using-declaration that names a constructor.
///
/// For example:
/// \code
/// struct Base { Base(int); };
/// struct Derived {
/// using Base::Base; // creates a UsingDecl and a ConstructorUsingShadowDecl
/// };
/// \endcode
class ConstructorUsingShadowDecl final : public UsingShadowDecl {
void anchor() override;
/// \brief If this constructor using declaration inherted the constructor
/// from an indirect base class, this is the ConstructorUsingShadowDecl
/// in the named direct base class from which the declaration was inherited.
ConstructorUsingShadowDecl *NominatedBaseClassShadowDecl;
/// \brief If this constructor using declaration inherted the constructor
/// from an indirect base class, this is the ConstructorUsingShadowDecl
/// that will be used to construct the unique direct or virtual base class
/// that receives the constructor arguments.
ConstructorUsingShadowDecl *ConstructedBaseClassShadowDecl;
/// \brief \c true if the constructor ultimately named by this using shadow
/// declaration is within a virtual base class subobject of the class that
/// contains this declaration.
unsigned IsVirtual : 1;
ConstructorUsingShadowDecl(ASTContext &C, DeclContext *DC, SourceLocation Loc,
UsingDecl *Using, NamedDecl *Target,
bool TargetInVirtualBase)
: UsingShadowDecl(ConstructorUsingShadow, C, DC, Loc, Using,
Target->getUnderlyingDecl()),
NominatedBaseClassShadowDecl(
dyn_cast<ConstructorUsingShadowDecl>(Target)),
ConstructedBaseClassShadowDecl(NominatedBaseClassShadowDecl),
IsVirtual(TargetInVirtualBase) {
// If we found a constructor that chains to a constructor for a virtual
// base, we should directly call that virtual base constructor instead.
// FIXME: This logic belongs in Sema.
if (NominatedBaseClassShadowDecl &&
NominatedBaseClassShadowDecl->constructsVirtualBase()) {
ConstructedBaseClassShadowDecl =
NominatedBaseClassShadowDecl->ConstructedBaseClassShadowDecl;
IsVirtual = true;
}
}
ConstructorUsingShadowDecl(ASTContext &C, EmptyShell Empty)
: UsingShadowDecl(ConstructorUsingShadow, C, Empty),
NominatedBaseClassShadowDecl(), ConstructedBaseClassShadowDecl(),
IsVirtual(false) {}
public:
static ConstructorUsingShadowDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation Loc,
UsingDecl *Using, NamedDecl *Target,
bool IsVirtual);
static ConstructorUsingShadowDecl *CreateDeserialized(ASTContext &C,
unsigned ID);
/// Returns the parent of this using shadow declaration, which
/// is the class in which this is declared.
//@{
const CXXRecordDecl *getParent() const {
return cast<CXXRecordDecl>(getDeclContext());
}
CXXRecordDecl *getParent() {
return cast<CXXRecordDecl>(getDeclContext());
}
//@}
/// \brief Get the inheriting constructor declaration for the direct base
/// class from which this using shadow declaration was inherited, if there is
/// one. This can be different for each redeclaration of the same shadow decl.
ConstructorUsingShadowDecl *getNominatedBaseClassShadowDecl() const {
return NominatedBaseClassShadowDecl;
}
/// \brief Get the inheriting constructor declaration for the base class
/// for which we don't have an explicit initializer, if there is one.
ConstructorUsingShadowDecl *getConstructedBaseClassShadowDecl() const {
return ConstructedBaseClassShadowDecl;
}
/// \brief Get the base class that was named in the using declaration. This
/// can be different for each redeclaration of this same shadow decl.
CXXRecordDecl *getNominatedBaseClass() const;
/// \brief Get the base class whose constructor or constructor shadow
/// declaration is passed the constructor arguments.
CXXRecordDecl *getConstructedBaseClass() const {
return cast<CXXRecordDecl>((ConstructedBaseClassShadowDecl
? ConstructedBaseClassShadowDecl
: getTargetDecl())
->getDeclContext());
}
/// \brief Returns \c true if the constructed base class is a virtual base
/// class subobject of this declaration's class.
bool constructsVirtualBase() const {
return IsVirtual;
}
/// \brief Get the constructor or constructor template in the derived class
/// correspnding to this using shadow declaration, if it has been implicitly
/// declared already.
CXXConstructorDecl *getConstructor() const;
void setConstructor(NamedDecl *Ctor);
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == ConstructorUsingShadow; }
friend class ASTDeclReader;
friend class ASTDeclWriter;
};
/// \brief Represents a C++ using-declaration.
///
/// For example:
/// \code
/// using someNameSpace::someIdentifier;
/// \endcode
class UsingDecl : public NamedDecl, public Mergeable<UsingDecl> {
void anchor() override;
/// \brief The source location of the 'using' keyword itself.
SourceLocation UsingLocation;
/// \brief The nested-name-specifier that precedes the name.
NestedNameSpecifierLoc QualifierLoc;
/// \brief Provides source/type location info for the declaration name
/// embedded in the ValueDecl base class.
DeclarationNameLoc DNLoc;
/// \brief The first shadow declaration of the shadow decl chain associated
/// with this using declaration.
///
/// The bool member of the pair store whether this decl has the \c typename
/// keyword.
llvm::PointerIntPair<UsingShadowDecl *, 1, bool> FirstUsingShadow;
UsingDecl(DeclContext *DC, SourceLocation UL,
NestedNameSpecifierLoc QualifierLoc,
const DeclarationNameInfo &NameInfo, bool HasTypenameKeyword)
: NamedDecl(Using, DC, NameInfo.getLoc(), NameInfo.getName()),
UsingLocation(UL), QualifierLoc(QualifierLoc),
DNLoc(NameInfo.getInfo()), FirstUsingShadow(nullptr, HasTypenameKeyword) {
}
public:
/// \brief Return the source location of the 'using' keyword.
SourceLocation getUsingLoc() const { return UsingLocation; }
/// \brief Set the source location of the 'using' keyword.
void setUsingLoc(SourceLocation L) { UsingLocation = L; }
/// \brief Retrieve the nested-name-specifier that qualifies the name,
/// with source-location information.
NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
/// \brief Retrieve the nested-name-specifier that qualifies the name.
NestedNameSpecifier *getQualifier() const {
return QualifierLoc.getNestedNameSpecifier();
}
DeclarationNameInfo getNameInfo() const {
return DeclarationNameInfo(getDeclName(), getLocation(), DNLoc);
}
/// \brief Return true if it is a C++03 access declaration (no 'using').
bool isAccessDeclaration() const { return UsingLocation.isInvalid(); }
/// \brief Return true if the using declaration has 'typename'.
bool hasTypename() const { return FirstUsingShadow.getInt(); }
/// \brief Sets whether the using declaration has 'typename'.
void setTypename(bool TN) { FirstUsingShadow.setInt(TN); }
/// \brief Iterates through the using shadow declarations associated with
/// this using declaration.
class shadow_iterator {
/// \brief The current using shadow declaration.
UsingShadowDecl *Current;
public:
typedef UsingShadowDecl* value_type;
typedef UsingShadowDecl* reference;
typedef UsingShadowDecl* pointer;
typedef std::forward_iterator_tag iterator_category;
typedef std::ptrdiff_t difference_type;
shadow_iterator() : Current(nullptr) { }
explicit shadow_iterator(UsingShadowDecl *C) : Current(C) { }
reference operator*() const { return Current; }
pointer operator->() const { return Current; }
shadow_iterator& operator++() {
Current = Current->getNextUsingShadowDecl();
return *this;
}
shadow_iterator operator++(int) {
shadow_iterator tmp(*this);
++(*this);
return tmp;
}
friend bool operator==(shadow_iterator x, shadow_iterator y) {
return x.Current == y.Current;
}
friend bool operator!=(shadow_iterator x, shadow_iterator y) {
return x.Current != y.Current;
}
};
typedef llvm::iterator_range<shadow_iterator> shadow_range;
shadow_range shadows() const {
return shadow_range(shadow_begin(), shadow_end());
}
shadow_iterator shadow_begin() const {
return shadow_iterator(FirstUsingShadow.getPointer());
}
shadow_iterator shadow_end() const { return shadow_iterator(); }
/// \brief Return the number of shadowed declarations associated with this
/// using declaration.
unsigned shadow_size() const {
return std::distance(shadow_begin(), shadow_end());
}
void addShadowDecl(UsingShadowDecl *S);
void removeShadowDecl(UsingShadowDecl *S);
static UsingDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation UsingL,
NestedNameSpecifierLoc QualifierLoc,
const DeclarationNameInfo &NameInfo,
bool HasTypenameKeyword);
static UsingDecl *CreateDeserialized(ASTContext &C, unsigned ID);
SourceRange getSourceRange() const override LLVM_READONLY;
/// Retrieves the canonical declaration of this declaration.
UsingDecl *getCanonicalDecl() override { return getFirstDecl(); }
const UsingDecl *getCanonicalDecl() const { return getFirstDecl(); }
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == Using; }
friend class ASTDeclReader;
friend class ASTDeclWriter;
};
/// Represents a pack of using declarations that a single
/// using-declarator pack-expanded into.
///
/// \code
/// template<typename ...T> struct X : T... {
/// using T::operator()...;
/// using T::operator T...;
/// };
/// \endcode
///
/// In the second case above, the UsingPackDecl will have the name
/// 'operator T' (which contains an unexpanded pack), but the individual
/// UsingDecls and UsingShadowDecls will have more reasonable names.
class UsingPackDecl final
: public NamedDecl, public Mergeable<UsingPackDecl>,
private llvm::TrailingObjects<UsingPackDecl, NamedDecl *> {
void anchor() override;
/// The UnresolvedUsingValueDecl or UnresolvedUsingTypenameDecl from
/// which this waas instantiated.
NamedDecl *InstantiatedFrom;
/// The number of using-declarations created by this pack expansion.
unsigned NumExpansions;
UsingPackDecl(DeclContext *DC, NamedDecl *InstantiatedFrom,
ArrayRef<NamedDecl *> UsingDecls)
: NamedDecl(UsingPack, DC,
InstantiatedFrom ? InstantiatedFrom->getLocation()
: SourceLocation(),
InstantiatedFrom ? InstantiatedFrom->getDeclName()
: DeclarationName()),
InstantiatedFrom(InstantiatedFrom), NumExpansions(UsingDecls.size()) {
std::uninitialized_copy(UsingDecls.begin(), UsingDecls.end(),
getTrailingObjects<NamedDecl *>());
}
public:
/// Get the using declaration from which this was instantiated. This will
/// always be an UnresolvedUsingValueDecl or an UnresolvedUsingTypenameDecl
/// that is a pack expansion.
NamedDecl *getInstantiatedFromUsingDecl() const { return InstantiatedFrom; }
/// Get the set of using declarations that this pack expanded into. Note that
/// some of these may still be unresolved.
ArrayRef<NamedDecl *> expansions() const {
return llvm::makeArrayRef(getTrailingObjects<NamedDecl *>(), NumExpansions);
}
static UsingPackDecl *Create(ASTContext &C, DeclContext *DC,
NamedDecl *InstantiatedFrom,
ArrayRef<NamedDecl *> UsingDecls);
static UsingPackDecl *CreateDeserialized(ASTContext &C, unsigned ID,
unsigned NumExpansions);
SourceRange getSourceRange() const override LLVM_READONLY {
return InstantiatedFrom->getSourceRange();
}
UsingPackDecl *getCanonicalDecl() override { return getFirstDecl(); }
const UsingPackDecl *getCanonicalDecl() const { return getFirstDecl(); }
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == UsingPack; }
friend class ASTDeclReader;
friend class ASTDeclWriter;
friend TrailingObjects;
};
/// \brief Represents a dependent using declaration which was not marked with
/// \c typename.
///
/// Unlike non-dependent using declarations, these *only* bring through
/// non-types; otherwise they would break two-phase lookup.
///
/// \code
/// template \<class T> class A : public Base<T> {
/// using Base<T>::foo;
/// };
/// \endcode
class UnresolvedUsingValueDecl : public ValueDecl,
public Mergeable<UnresolvedUsingValueDecl> {
void anchor() override;
/// \brief The source location of the 'using' keyword
SourceLocation UsingLocation;
/// \brief If this is a pack expansion, the location of the '...'.
SourceLocation EllipsisLoc;
/// \brief The nested-name-specifier that precedes the name.
NestedNameSpecifierLoc QualifierLoc;
/// \brief Provides source/type location info for the declaration name
/// embedded in the ValueDecl base class.
DeclarationNameLoc DNLoc;
UnresolvedUsingValueDecl(DeclContext *DC, QualType Ty,
SourceLocation UsingLoc,
NestedNameSpecifierLoc QualifierLoc,
const DeclarationNameInfo &NameInfo,
SourceLocation EllipsisLoc)
: ValueDecl(UnresolvedUsingValue, DC,
NameInfo.getLoc(), NameInfo.getName(), Ty),
UsingLocation(UsingLoc), EllipsisLoc(EllipsisLoc),
QualifierLoc(QualifierLoc), DNLoc(NameInfo.getInfo())
{ }
public:
/// \brief Returns the source location of the 'using' keyword.
SourceLocation getUsingLoc() const { return UsingLocation; }
/// \brief Set the source location of the 'using' keyword.
void setUsingLoc(SourceLocation L) { UsingLocation = L; }
/// \brief Return true if it is a C++03 access declaration (no 'using').
bool isAccessDeclaration() const { return UsingLocation.isInvalid(); }
/// \brief Retrieve the nested-name-specifier that qualifies the name,
/// with source-location information.
NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
/// \brief Retrieve the nested-name-specifier that qualifies the name.
NestedNameSpecifier *getQualifier() const {
return QualifierLoc.getNestedNameSpecifier();
}
DeclarationNameInfo getNameInfo() const {
return DeclarationNameInfo(getDeclName(), getLocation(), DNLoc);
}
/// \brief Determine whether this is a pack expansion.
bool isPackExpansion() const {
return EllipsisLoc.isValid();
}
/// \brief Get the location of the ellipsis if this is a pack expansion.
SourceLocation getEllipsisLoc() const {
return EllipsisLoc;
}
static UnresolvedUsingValueDecl *
Create(ASTContext &C, DeclContext *DC, SourceLocation UsingLoc,
NestedNameSpecifierLoc QualifierLoc,
const DeclarationNameInfo &NameInfo, SourceLocation EllipsisLoc);
static UnresolvedUsingValueDecl *
CreateDeserialized(ASTContext &C, unsigned ID);
SourceRange getSourceRange() const override LLVM_READONLY;
/// Retrieves the canonical declaration of this declaration.
UnresolvedUsingValueDecl *getCanonicalDecl() override {
return getFirstDecl();
}
const UnresolvedUsingValueDecl *getCanonicalDecl() const {
return getFirstDecl();
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == UnresolvedUsingValue; }
friend class ASTDeclReader;
friend class ASTDeclWriter;
};
/// \brief Represents a dependent using declaration which was marked with
/// \c typename.
///
/// \code
/// template \<class T> class A : public Base<T> {
/// using typename Base<T>::foo;
/// };
/// \endcode
///
/// The type associated with an unresolved using typename decl is
/// currently always a typename type.
class UnresolvedUsingTypenameDecl
: public TypeDecl,
public Mergeable<UnresolvedUsingTypenameDecl> {
void anchor() override;
/// \brief The source location of the 'typename' keyword
SourceLocation TypenameLocation;
/// \brief If this is a pack expansion, the location of the '...'.
SourceLocation EllipsisLoc;
/// \brief The nested-name-specifier that precedes the name.
NestedNameSpecifierLoc QualifierLoc;
UnresolvedUsingTypenameDecl(DeclContext *DC, SourceLocation UsingLoc,
SourceLocation TypenameLoc,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation TargetNameLoc,
IdentifierInfo *TargetName,
SourceLocation EllipsisLoc)
: TypeDecl(UnresolvedUsingTypename, DC, TargetNameLoc, TargetName,
UsingLoc),
TypenameLocation(TypenameLoc), EllipsisLoc(EllipsisLoc),
QualifierLoc(QualifierLoc) { }
friend class ASTDeclReader;
public:
/// \brief Returns the source location of the 'using' keyword.
SourceLocation getUsingLoc() const { return getLocStart(); }
/// \brief Returns the source location of the 'typename' keyword.
SourceLocation getTypenameLoc() const { return TypenameLocation; }
/// \brief Retrieve the nested-name-specifier that qualifies the name,
/// with source-location information.
NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }
/// \brief Retrieve the nested-name-specifier that qualifies the name.
NestedNameSpecifier *getQualifier() const {
return QualifierLoc.getNestedNameSpecifier();
}
DeclarationNameInfo getNameInfo() const {
return DeclarationNameInfo(getDeclName(), getLocation());
}
/// \brief Determine whether this is a pack expansion.
bool isPackExpansion() const {
return EllipsisLoc.isValid();
}
/// \brief Get the location of the ellipsis if this is a pack expansion.
SourceLocation getEllipsisLoc() const {
return EllipsisLoc;
}
static UnresolvedUsingTypenameDecl *
Create(ASTContext &C, DeclContext *DC, SourceLocation UsingLoc,
SourceLocation TypenameLoc, NestedNameSpecifierLoc QualifierLoc,
SourceLocation TargetNameLoc, DeclarationName TargetName,
SourceLocation EllipsisLoc);
static UnresolvedUsingTypenameDecl *
CreateDeserialized(ASTContext &C, unsigned ID);
/// Retrieves the canonical declaration of this declaration.
UnresolvedUsingTypenameDecl *getCanonicalDecl() override {
return getFirstDecl();
}
const UnresolvedUsingTypenameDecl *getCanonicalDecl() const {
return getFirstDecl();
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == UnresolvedUsingTypename; }
};
/// \brief Represents a C++11 static_assert declaration.
class StaticAssertDecl : public Decl {
virtual void anchor();
llvm::PointerIntPair<Expr *, 1, bool> AssertExprAndFailed;
StringLiteral *Message;
SourceLocation RParenLoc;
StaticAssertDecl(DeclContext *DC, SourceLocation StaticAssertLoc,
Expr *AssertExpr, StringLiteral *Message,
SourceLocation RParenLoc, bool Failed)
: Decl(StaticAssert, DC, StaticAssertLoc),
AssertExprAndFailed(AssertExpr, Failed), Message(Message),
RParenLoc(RParenLoc) { }
public:
static StaticAssertDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation StaticAssertLoc,
Expr *AssertExpr, StringLiteral *Message,
SourceLocation RParenLoc, bool Failed);
static StaticAssertDecl *CreateDeserialized(ASTContext &C, unsigned ID);
Expr *getAssertExpr() { return AssertExprAndFailed.getPointer(); }
const Expr *getAssertExpr() const { return AssertExprAndFailed.getPointer(); }
StringLiteral *getMessage() { return Message; }
const StringLiteral *getMessage() const { return Message; }
bool isFailed() const { return AssertExprAndFailed.getInt(); }
SourceLocation getRParenLoc() const { return RParenLoc; }
SourceRange getSourceRange() const override LLVM_READONLY {
return SourceRange(getLocation(), getRParenLoc());
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == StaticAssert; }
friend class ASTDeclReader;
};
/// A binding in a decomposition declaration. For instance, given:
///
/// int n[3];
/// auto &[a, b, c] = n;
///
/// a, b, and c are BindingDecls, whose bindings are the expressions
/// x[0], x[1], and x[2] respectively, where x is the implicit
/// DecompositionDecl of type 'int (&)[3]'.
class BindingDecl : public ValueDecl {
void anchor() override;
/// The binding represented by this declaration. References to this
/// declaration are effectively equivalent to this expression (except
/// that it is only evaluated once at the point of declaration of the
/// binding).
Expr *Binding;
BindingDecl(DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id)
: ValueDecl(Decl::Binding, DC, IdLoc, Id, QualType()), Binding(nullptr) {}
public:
static BindingDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation IdLoc, IdentifierInfo *Id);
static BindingDecl *CreateDeserialized(ASTContext &C, unsigned ID);
/// Get the expression to which this declaration is bound. This may be null
/// in two different cases: while parsing the initializer for the
/// decomposition declaration, and when the initializer is type-dependent.
Expr *getBinding() const { return Binding; }
/// Get the variable (if any) that holds the value of evaluating the binding.
/// Only present for user-defined bindings for tuple-like types.
VarDecl *getHoldingVar() const;
/// Set the binding for this BindingDecl, along with its declared type (which
/// should be a possibly-cv-qualified form of the type of the binding, or a
/// reference to such a type).
void setBinding(QualType DeclaredType, Expr *Binding) {
setType(DeclaredType);
this->Binding = Binding;
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == Decl::Binding; }
friend class ASTDeclReader;
};
/// A decomposition declaration. For instance, given:
///
/// int n[3];
/// auto &[a, b, c] = n;
///
/// the second line declares a DecompositionDecl of type 'int (&)[3]', and
/// three BindingDecls (named a, b, and c). An instance of this class is always
/// unnamed, but behaves in almost all other respects like a VarDecl.
class DecompositionDecl final
: public VarDecl,
private llvm::TrailingObjects<DecompositionDecl, BindingDecl *> {
void anchor() override;
/// The number of BindingDecl*s following this object.
unsigned NumBindings;
DecompositionDecl(ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
SourceLocation LSquareLoc, QualType T,
TypeSourceInfo *TInfo, StorageClass SC,
ArrayRef<BindingDecl *> Bindings)
: VarDecl(Decomposition, C, DC, StartLoc, LSquareLoc, nullptr, T, TInfo,
SC),
NumBindings(Bindings.size()) {
std::uninitialized_copy(Bindings.begin(), Bindings.end(),
getTrailingObjects<BindingDecl *>());
}
public:
static DecompositionDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation StartLoc,
SourceLocation LSquareLoc,
QualType T, TypeSourceInfo *TInfo,
StorageClass S,
ArrayRef<BindingDecl *> Bindings);
static DecompositionDecl *CreateDeserialized(ASTContext &C, unsigned ID,
unsigned NumBindings);
ArrayRef<BindingDecl *> bindings() const {
return llvm::makeArrayRef(getTrailingObjects<BindingDecl *>(), NumBindings);
}
void printName(raw_ostream &os) const override;
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == Decomposition; }
friend TrailingObjects;
friend class ASTDeclReader;
};
/// An instance of this class represents the declaration of a property
/// member. This is a Microsoft extension to C++, first introduced in
/// Visual Studio .NET 2003 as a parallel to similar features in C#
/// and Managed C++.
///
/// A property must always be a non-static class member.
///
/// A property member superficially resembles a non-static data
/// member, except preceded by a property attribute:
/// __declspec(property(get=GetX, put=PutX)) int x;
/// Either (but not both) of the 'get' and 'put' names may be omitted.
///
/// A reference to a property is always an lvalue. If the lvalue
/// undergoes lvalue-to-rvalue conversion, then a getter name is
/// required, and that member is called with no arguments.
/// If the lvalue is assigned into, then a setter name is required,
/// and that member is called with one argument, the value assigned.
/// Both operations are potentially overloaded. Compound assignments
/// are permitted, as are the increment and decrement operators.
///
/// The getter and putter methods are permitted to be overloaded,
/// although their return and parameter types are subject to certain
/// restrictions according to the type of the property.
///
/// A property declared using an incomplete array type may
/// additionally be subscripted, adding extra parameters to the getter
/// and putter methods.
class MSPropertyDecl : public DeclaratorDecl {
IdentifierInfo *GetterId, *SetterId;
MSPropertyDecl(DeclContext *DC, SourceLocation L, DeclarationName N,
QualType T, TypeSourceInfo *TInfo, SourceLocation StartL,
IdentifierInfo *Getter, IdentifierInfo *Setter)
: DeclaratorDecl(MSProperty, DC, L, N, T, TInfo, StartL),
GetterId(Getter), SetterId(Setter) {}
public:
static MSPropertyDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation L, DeclarationName N, QualType T,
TypeSourceInfo *TInfo, SourceLocation StartL,
IdentifierInfo *Getter, IdentifierInfo *Setter);
static MSPropertyDecl *CreateDeserialized(ASTContext &C, unsigned ID);
static bool classof(const Decl *D) { return D->getKind() == MSProperty; }
bool hasGetter() const { return GetterId != nullptr; }
IdentifierInfo* getGetterId() const { return GetterId; }
bool hasSetter() const { return SetterId != nullptr; }
IdentifierInfo* getSetterId() const { return SetterId; }
friend class ASTDeclReader;
};
/// Insertion operator for diagnostics. This allows sending an AccessSpecifier
/// into a diagnostic with <<.
const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
AccessSpecifier AS);
const PartialDiagnostic &operator<<(const PartialDiagnostic &DB,
AccessSpecifier AS);
} // end namespace clang
#endif
Index: head/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h
===================================================================
--- head/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h (revision 322854)
+++ head/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h (revision 322855)
@@ -1,2080 +1,2080 @@
//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Defines the clang::Preprocessor interface.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
#define LLVM_CLANG_LEX_PREPROCESSOR_H
#include "clang/Basic/Builtins.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/ModuleMap.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/PTHLexer.h"
#include "clang/Lex/TokenLexer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Registry.h"
#include <memory>
#include <vector>
namespace llvm {
template<unsigned InternalLen> class SmallString;
}
namespace clang {
class SourceManager;
class ExternalPreprocessorSource;
class FileManager;
class FileEntry;
class HeaderSearch;
class MemoryBufferCache;
class PragmaNamespace;
class PragmaHandler;
class CommentHandler;
class ScratchBuffer;
class TargetInfo;
class PPCallbacks;
class CodeCompletionHandler;
class DirectoryLookup;
class PreprocessingRecord;
class ModuleLoader;
class PTHManager;
class PreprocessorOptions;
/// \brief Stores token information for comparing actual tokens with
/// predefined values. Only handles simple tokens and identifiers.
class TokenValue {
tok::TokenKind Kind;
IdentifierInfo *II;
public:
TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
assert(Kind != tok::identifier &&
"Identifiers should be created by TokenValue(IdentifierInfo *)");
assert(!tok::isLiteral(Kind) && "Literals are not supported.");
assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
}
TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
bool operator==(const Token &Tok) const {
return Tok.getKind() == Kind &&
(!II || II == Tok.getIdentifierInfo());
}
};
/// \brief Context in which macro name is used.
enum MacroUse {
MU_Other = 0, // other than #define or #undef
MU_Define = 1, // macro name specified in #define
MU_Undef = 2 // macro name specified in #undef
};
/// \brief Engages in a tight little dance with the lexer to efficiently
/// preprocess tokens.
///
/// Lexers know only about tokens within a single source file, and don't
/// know anything about preprocessor-level issues like the \#include stack,
/// token expansion, etc.
class Preprocessor {
std::shared_ptr<PreprocessorOptions> PPOpts;
DiagnosticsEngine *Diags;
LangOptions &LangOpts;
const TargetInfo *Target;
const TargetInfo *AuxTarget;
FileManager &FileMgr;
SourceManager &SourceMgr;
MemoryBufferCache &PCMCache;
std::unique_ptr<ScratchBuffer> ScratchBuf;
HeaderSearch &HeaderInfo;
ModuleLoader &TheModuleLoader;
/// \brief External source of macros.
ExternalPreprocessorSource *ExternalSource;
/// An optional PTHManager object used for getting tokens from
/// a token cache rather than lexing the original source file.
std::unique_ptr<PTHManager> PTH;
/// A BumpPtrAllocator object used to quickly allocate and release
/// objects internal to the Preprocessor.
llvm::BumpPtrAllocator BP;
/// Identifiers for builtin macros and other builtins.
IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
IdentifierInfo *Ident__COUNTER__; // __COUNTER__
IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
IdentifierInfo *Ident__identifier; // __identifier
IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
IdentifierInfo *Ident__has_feature; // __has_feature
IdentifierInfo *Ident__has_extension; // __has_extension
IdentifierInfo *Ident__has_builtin; // __has_builtin
IdentifierInfo *Ident__has_attribute; // __has_attribute
IdentifierInfo *Ident__has_include; // __has_include
IdentifierInfo *Ident__has_include_next; // __has_include_next
IdentifierInfo *Ident__has_warning; // __has_warning
IdentifierInfo *Ident__is_identifier; // __is_identifier
IdentifierInfo *Ident__building_module; // __building_module
IdentifierInfo *Ident__MODULE__; // __MODULE__
IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
SourceLocation DATELoc, TIMELoc;
unsigned CounterValue; // Next __COUNTER__ value.
enum {
/// \brief Maximum depth of \#includes.
MaxAllowedIncludeStackDepth = 200
};
// State that is set before the preprocessor begins.
bool KeepComments : 1;
bool KeepMacroComments : 1;
bool SuppressIncludeNotFoundError : 1;
// State that changes while the preprocessor runs:
bool InMacroArgs : 1; // True if parsing fn macro invocation args.
/// Whether the preprocessor owns the header search object.
bool OwnsHeaderSearch : 1;
/// True if macro expansion is disabled.
bool DisableMacroExpansion : 1;
/// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
/// when parsing preprocessor directives.
bool MacroExpansionInDirectivesOverride : 1;
class ResetMacroExpansionHelper;
/// \brief Whether we have already loaded macros from the external source.
mutable bool ReadMacrosFromExternalSource : 1;
/// \brief True if pragmas are enabled.
bool PragmasEnabled : 1;
/// \brief True if the current build action is a preprocessing action.
bool PreprocessedOutput : 1;
/// \brief True if we are currently preprocessing a #if or #elif directive
bool ParsingIfOrElifDirective;
/// \brief True if we are pre-expanding macro arguments.
bool InMacroArgPreExpansion;
/// \brief Mapping/lookup information for all identifiers in
/// the program, including program keywords.
mutable IdentifierTable Identifiers;
/// \brief This table contains all the selectors in the program.
///
/// Unlike IdentifierTable above, this table *isn't* populated by the
/// preprocessor. It is declared/expanded here because its role/lifetime is
/// conceptually similar to the IdentifierTable. In addition, the current
/// control flow (in clang::ParseAST()), make it convenient to put here.
///
/// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
/// the lifetime of the preprocessor.
SelectorTable Selectors;
/// \brief Information about builtins.
Builtin::Context BuiltinInfo;
/// \brief Tracks all of the pragmas that the client registered
/// with this preprocessor.
std::unique_ptr<PragmaNamespace> PragmaHandlers;
/// \brief Pragma handlers of the original source is stored here during the
/// parsing of a model file.
std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
/// \brief Tracks all of the comment handlers that the client registered
/// with this preprocessor.
std::vector<CommentHandler *> CommentHandlers;
/// \brief True if we want to ignore EOF token and continue later on (thus
/// avoid tearing the Lexer and etc. down).
bool IncrementalProcessing;
/// The kind of translation unit we are processing.
TranslationUnitKind TUKind;
/// \brief The code-completion handler.
CodeCompletionHandler *CodeComplete;
/// \brief The file that we're performing code-completion for, if any.
const FileEntry *CodeCompletionFile;
/// \brief The offset in file for the code-completion point.
unsigned CodeCompletionOffset;
/// \brief The location for the code-completion point. This gets instantiated
/// when the CodeCompletionFile gets \#include'ed for preprocessing.
SourceLocation CodeCompletionLoc;
/// \brief The start location for the file of the code-completion point.
///
/// This gets instantiated when the CodeCompletionFile gets \#include'ed
/// for preprocessing.
SourceLocation CodeCompletionFileLoc;
/// \brief The source location of the \c import contextual keyword we just
/// lexed, if any.
SourceLocation ModuleImportLoc;
/// \brief The module import path that we're currently processing.
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
/// \brief Whether the last token we lexed was an '@'.
bool LastTokenWasAt;
/// \brief Whether the module import expects an identifier next. Otherwise,
/// it expects a '.' or ';'.
bool ModuleImportExpectsIdentifier;
/// \brief The source location of the currently-active
/// \#pragma clang arc_cf_code_audited begin.
SourceLocation PragmaARCCFCodeAuditedLoc;
/// \brief The source location of the currently-active
/// \#pragma clang assume_nonnull begin.
SourceLocation PragmaAssumeNonNullLoc;
/// \brief True if we hit the code-completion point.
bool CodeCompletionReached;
/// \brief The code completion token containing the information
/// on the stem that is to be code completed.
IdentifierInfo *CodeCompletionII;
/// \brief The directory that the main file should be considered to occupy,
/// if it does not correspond to a real file (as happens when building a
/// module).
const DirectoryEntry *MainFileDir;
/// \brief The number of bytes that we will initially skip when entering the
/// main file, along with a flag that indicates whether skipping this number
/// of bytes will place the lexer at the start of a line.
///
/// This is used when loading a precompiled preamble.
std::pair<int, bool> SkipMainFilePreamble;
class PreambleConditionalStackStore {
enum State {
Off = 0,
Recording = 1,
Replaying = 2,
};
public:
PreambleConditionalStackStore() : ConditionalStackState(Off) {}
void startRecording() { ConditionalStackState = Recording; }
void startReplaying() { ConditionalStackState = Replaying; }
bool isRecording() const { return ConditionalStackState == Recording; }
bool isReplaying() const { return ConditionalStackState == Replaying; }
ArrayRef<PPConditionalInfo> getStack() const {
return ConditionalStack;
}
void doneReplaying() {
ConditionalStack.clear();
ConditionalStackState = Off;
}
void setStack(ArrayRef<PPConditionalInfo> s) {
if (!isRecording() && !isReplaying())
return;
ConditionalStack.clear();
ConditionalStack.append(s.begin(), s.end());
}
bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
private:
SmallVector<PPConditionalInfo, 4> ConditionalStack;
State ConditionalStackState;
} PreambleConditionalStack;
/// \brief The current top of the stack that we're lexing from if
/// not expanding a macro and we are lexing directly from source code.
///
/// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
std::unique_ptr<Lexer> CurLexer;
/// \brief The current top of stack that we're lexing from if
/// not expanding from a macro and we are lexing from a PTH cache.
///
/// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
std::unique_ptr<PTHLexer> CurPTHLexer;
/// \brief The current top of the stack what we're lexing from
/// if not expanding a macro.
///
/// This is an alias for either CurLexer or CurPTHLexer.
PreprocessorLexer *CurPPLexer;
/// \brief Used to find the current FileEntry, if CurLexer is non-null
/// and if applicable.
///
/// This allows us to implement \#include_next and find directory-specific
/// properties.
const DirectoryLookup *CurDirLookup;
/// \brief The current macro we are expanding, if we are expanding a macro.
///
/// One of CurLexer and CurTokenLexer must be null.
std::unique_ptr<TokenLexer> CurTokenLexer;
/// \brief The kind of lexer we're currently working with.
enum CurLexerKind {
CLK_Lexer,
CLK_PTHLexer,
CLK_TokenLexer,
CLK_CachingLexer,
CLK_LexAfterModuleImport
} CurLexerKind;
/// \brief If the current lexer is for a submodule that is being built, this
/// is that submodule.
Module *CurLexerSubmodule;
/// \brief Keeps track of the stack of files currently
/// \#included, and macros currently being expanded from, not counting
/// CurLexer/CurTokenLexer.
struct IncludeStackInfo {
enum CurLexerKind CurLexerKind;
Module *TheSubmodule;
std::unique_ptr<Lexer> TheLexer;
std::unique_ptr<PTHLexer> ThePTHLexer;
PreprocessorLexer *ThePPLexer;
std::unique_ptr<TokenLexer> TheTokenLexer;
const DirectoryLookup *TheDirLookup;
// The following constructors are completely useless copies of the default
// versions, only needed to pacify MSVC.
IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
std::unique_ptr<Lexer> &&TheLexer,
std::unique_ptr<PTHLexer> &&ThePTHLexer,
PreprocessorLexer *ThePPLexer,
std::unique_ptr<TokenLexer> &&TheTokenLexer,
const DirectoryLookup *TheDirLookup)
: CurLexerKind(std::move(CurLexerKind)),
TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
ThePTHLexer(std::move(ThePTHLexer)),
ThePPLexer(std::move(ThePPLexer)),
TheTokenLexer(std::move(TheTokenLexer)),
TheDirLookup(std::move(TheDirLookup)) {}
};
std::vector<IncludeStackInfo> IncludeMacroStack;
/// \brief Actions invoked when some preprocessor activity is
/// encountered (e.g. a file is \#included, etc).
std::unique_ptr<PPCallbacks> Callbacks;
struct MacroExpandsInfo {
Token Tok;
MacroDefinition MD;
SourceRange Range;
MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
: Tok(Tok), MD(MD), Range(Range) { }
};
SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
/// Information about a name that has been used to define a module macro.
struct ModuleMacroInfo {
ModuleMacroInfo(MacroDirective *MD)
: MD(MD), ActiveModuleMacrosGeneration(0), IsAmbiguous(false) {}
/// The most recent macro directive for this identifier.
MacroDirective *MD;
/// The active module macros for this identifier.
llvm::TinyPtrVector<ModuleMacro*> ActiveModuleMacros;
/// The generation number at which we last updated ActiveModuleMacros.
/// \see Preprocessor::VisibleModules.
unsigned ActiveModuleMacrosGeneration;
/// Whether this macro name is ambiguous.
bool IsAmbiguous;
/// The module macros that are overridden by this macro.
llvm::TinyPtrVector<ModuleMacro*> OverriddenMacros;
};
/// The state of a macro for an identifier.
class MacroState {
mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
const IdentifierInfo *II) const {
if (II->isOutOfDate())
PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
// FIXME: Find a spare bit on IdentifierInfo and store a
// HasModuleMacros flag.
if (!II->hasMacroDefinition() ||
(!PP.getLangOpts().Modules &&
!PP.getLangOpts().ModulesLocalVisibility) ||
!PP.CurSubmoduleState->VisibleModules.getGeneration())
return nullptr;
auto *Info = State.dyn_cast<ModuleMacroInfo*>();
if (!Info) {
Info = new (PP.getPreprocessorAllocator())
ModuleMacroInfo(State.get<MacroDirective *>());
State = Info;
}
if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
Info->ActiveModuleMacrosGeneration)
PP.updateModuleMacroInfo(II, *Info);
return Info;
}
public:
MacroState() : MacroState(nullptr) {}
MacroState(MacroDirective *MD) : State(MD) {}
MacroState(MacroState &&O) noexcept : State(O.State) {
O.State = (MacroDirective *)nullptr;
}
MacroState &operator=(MacroState &&O) noexcept {
auto S = O.State;
O.State = (MacroDirective *)nullptr;
State = S;
return *this;
}
~MacroState() {
if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
Info->~ModuleMacroInfo();
}
MacroDirective *getLatest() const {
if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
return Info->MD;
return State.get<MacroDirective*>();
}
void setLatest(MacroDirective *MD) {
if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
Info->MD = MD;
else
State = MD;
}
bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
auto *Info = getModuleInfo(PP, II);
return Info ? Info->IsAmbiguous : false;
}
ArrayRef<ModuleMacro *>
getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
if (auto *Info = getModuleInfo(PP, II))
return Info->ActiveModuleMacros;
return None;
}
MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
SourceManager &SourceMgr) const {
// FIXME: Incorporate module macros into the result of this.
if (auto *Latest = getLatest())
return Latest->findDirectiveAtLoc(Loc, SourceMgr);
return MacroDirective::DefInfo();
}
void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
if (auto *Info = getModuleInfo(PP, II)) {
Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
Info->ActiveModuleMacros.begin(),
Info->ActiveModuleMacros.end());
Info->ActiveModuleMacros.clear();
Info->IsAmbiguous = false;
}
}
ArrayRef<ModuleMacro*> getOverriddenMacros() const {
if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
return Info->OverriddenMacros;
return None;
}
void setOverriddenMacros(Preprocessor &PP,
ArrayRef<ModuleMacro *> Overrides) {
auto *Info = State.dyn_cast<ModuleMacroInfo*>();
if (!Info) {
if (Overrides.empty())
return;
Info = new (PP.getPreprocessorAllocator())
ModuleMacroInfo(State.get<MacroDirective *>());
State = Info;
}
Info->OverriddenMacros.clear();
Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
Overrides.begin(), Overrides.end());
Info->ActiveModuleMacrosGeneration = 0;
}
};
/// For each IdentifierInfo that was associated with a macro, we
/// keep a mapping to the history of all macro definitions and #undefs in
/// the reverse order (the latest one is in the head of the list).
///
/// This mapping lives within the \p CurSubmoduleState.
typedef llvm::DenseMap<const IdentifierInfo *, MacroState> MacroMap;
friend class ASTReader;
struct SubmoduleState;
/// \brief Information about a submodule that we're currently building.
struct BuildingSubmoduleInfo {
BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
SubmoduleState *OuterSubmoduleState,
unsigned OuterPendingModuleMacroNames)
: M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
OuterSubmoduleState(OuterSubmoduleState),
OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
/// The module that we are building.
Module *M;
/// The location at which the module was included.
SourceLocation ImportLoc;
/// Whether we entered this submodule via a pragma.
bool IsPragma;
/// The previous SubmoduleState.
SubmoduleState *OuterSubmoduleState;
/// The number of pending module macro names when we started building this.
unsigned OuterPendingModuleMacroNames;
};
SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
/// \brief Information about a submodule's preprocessor state.
struct SubmoduleState {
/// The macros for the submodule.
MacroMap Macros;
/// The set of modules that are visible within the submodule.
VisibleModuleSet VisibleModules;
// FIXME: CounterValue?
// FIXME: PragmaPushMacroInfo?
};
std::map<Module*, SubmoduleState> Submodules;
/// The preprocessor state for preprocessing outside of any submodule.
SubmoduleState NullSubmoduleState;
/// The current submodule state. Will be \p NullSubmoduleState if we're not
/// in a submodule.
SubmoduleState *CurSubmoduleState;
/// The set of known macros exported from modules.
llvm::FoldingSet<ModuleMacro> ModuleMacros;
/// The names of potential module macros that we've not yet processed.
llvm::SmallVector<const IdentifierInfo*, 32> PendingModuleMacroNames;
/// The list of module macros, for each identifier, that are not overridden by
/// any other module macro.
llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro*>>
LeafModuleMacros;
/// \brief Macros that we want to warn because they are not used at the end
/// of the translation unit.
///
/// We store just their SourceLocations instead of
/// something like MacroInfo*. The benefit of this is that when we are
/// deserializing from PCH, we don't need to deserialize identifier & macros
/// just so that we can report that they are unused, we just warn using
/// the SourceLocations of this set (that will be filled by the ASTReader).
/// We are using SmallPtrSet instead of a vector for faster removal.
typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
/// \brief A "freelist" of MacroArg objects that can be
/// reused for quick allocation.
MacroArgs *MacroArgCache;
friend class MacroArgs;
/// For each IdentifierInfo used in a \#pragma push_macro directive,
/// we keep a MacroInfo stack used to restore the previous macro value.
llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
// Various statistics we track for performance analysis.
unsigned NumDirectives, NumDefined, NumUndefined, NumPragma;
unsigned NumIf, NumElse, NumEndif;
unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
unsigned NumSkipped;
/// \brief The predefined macros that preprocessor should use from the
/// command line etc.
std::string Predefines;
/// \brief The file ID for the preprocessor predefines.
FileID PredefinesFileID;
/// \{
/// \brief Cache of macro expanders to reduce malloc traffic.
enum { TokenLexerCacheSize = 8 };
unsigned NumCachedTokenLexers;
std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
/// \}
/// \brief Keeps macro expanded tokens for TokenLexers.
//
/// Works like a stack; a TokenLexer adds the macro expanded tokens that is
/// going to lex in the cache and when it finishes the tokens are removed
/// from the end of the cache.
SmallVector<Token, 16> MacroExpandedTokens;
std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
/// \brief A record of the macro definitions and expansions that
/// occurred during preprocessing.
///
/// This is an optional side structure that can be enabled with
/// \c createPreprocessingRecord() prior to preprocessing.
PreprocessingRecord *Record;
/// Cached tokens state.
typedef SmallVector<Token, 1> CachedTokensTy;
/// \brief Cached tokens are stored here when we do backtracking or
/// lookahead. They are "lexed" by the CachingLex() method.
CachedTokensTy CachedTokens;
/// \brief The position of the cached token that CachingLex() should
/// "lex" next.
///
/// If it points beyond the CachedTokens vector, it means that a normal
/// Lex() should be invoked.
CachedTokensTy::size_type CachedLexPos;
/// \brief Stack of backtrack positions, allowing nested backtracks.
///
/// The EnableBacktrackAtThisPos() method pushes a position to
/// indicate where CachedLexPos should be set when the BackTrack() method is
/// invoked (at which point the last position is popped).
std::vector<CachedTokensTy::size_type> BacktrackPositions;
struct MacroInfoChain {
MacroInfo MI;
MacroInfoChain *Next;
};
/// MacroInfos are managed as a chain for easy disposal. This is the head
/// of that list.
MacroInfoChain *MIChainHead;
void updateOutOfDateIdentifier(IdentifierInfo &II) const;
public:
Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
MemoryBufferCache &PCMCache,
HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
IdentifierInfoLookup *IILookup = nullptr,
bool OwnsHeaderSearch = false,
TranslationUnitKind TUKind = TU_Complete);
~Preprocessor();
/// \brief Initialize the preprocessor using information about the target.
///
/// \param Target is owned by the caller and must remain valid for the
/// lifetime of the preprocessor.
/// \param AuxTarget is owned by the caller and must remain valid for
/// the lifetime of the preprocessor.
void Initialize(const TargetInfo &Target,
const TargetInfo *AuxTarget = nullptr);
/// \brief Initialize the preprocessor to parse a model file
///
/// To parse model files the preprocessor of the original source is reused to
/// preserver the identifier table. However to avoid some duplicate
/// information in the preprocessor some cleanup is needed before it is used
/// to parse model files. This method does that cleanup.
void InitializeForModelFile();
/// \brief Cleanup after model file parsing
void FinalizeForModelFile();
/// \brief Retrieve the preprocessor options used to initialize this
/// preprocessor.
PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
DiagnosticsEngine &getDiagnostics() const { return *Diags; }
void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
const LangOptions &getLangOpts() const { return LangOpts; }
const TargetInfo &getTargetInfo() const { return *Target; }
const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
FileManager &getFileManager() const { return FileMgr; }
SourceManager &getSourceManager() const { return SourceMgr; }
MemoryBufferCache &getPCMCache() const { return PCMCache; }
HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
IdentifierTable &getIdentifierTable() { return Identifiers; }
const IdentifierTable &getIdentifierTable() const { return Identifiers; }
SelectorTable &getSelectorTable() { return Selectors; }
Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
void setPTHManager(PTHManager* pm);
PTHManager *getPTHManager() { return PTH.get(); }
void setExternalSource(ExternalPreprocessorSource *Source) {
ExternalSource = Source;
}
ExternalPreprocessorSource *getExternalSource() const {
return ExternalSource;
}
/// \brief Retrieve the module loader associated with this preprocessor.
ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
bool hadModuleLoaderFatalFailure() const {
return TheModuleLoader.HadFatalFailure;
}
/// \brief True if we are currently preprocessing a #if or #elif directive
bool isParsingIfOrElifDirective() const {
return ParsingIfOrElifDirective;
}
/// \brief Control whether the preprocessor retains comments in output.
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
this->KeepComments = KeepComments | KeepMacroComments;
this->KeepMacroComments = KeepMacroComments;
}
bool getCommentRetentionState() const { return KeepComments; }
void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
bool getPragmasEnabled() const { return PragmasEnabled; }
void SetSuppressIncludeNotFoundError(bool Suppress) {
SuppressIncludeNotFoundError = Suppress;
}
bool GetSuppressIncludeNotFoundError() {
return SuppressIncludeNotFoundError;
}
/// Sets whether the preprocessor is responsible for producing output or if
/// it is producing tokens to be consumed by Parse and Sema.
void setPreprocessedOutput(bool IsPreprocessedOutput) {
PreprocessedOutput = IsPreprocessedOutput;
}
/// Returns true if the preprocessor is responsible for generating output,
/// false if it is producing tokens to be consumed by Parse and Sema.
bool isPreprocessedOutput() const { return PreprocessedOutput; }
/// \brief Return true if we are lexing directly from the specified lexer.
bool isCurrentLexer(const PreprocessorLexer *L) const {
return CurPPLexer == L;
}
/// \brief Return the current lexer being lexed from.
///
/// Note that this ignores any potentially active macro expansions and _Pragma
/// expansions going on at the time.
PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
/// \brief Return the current file lexer being lexed from.
///
/// Note that this ignores any potentially active macro expansions and _Pragma
/// expansions going on at the time.
PreprocessorLexer *getCurrentFileLexer() const;
/// \brief Return the submodule owning the file being lexed. This may not be
/// the current module if we have changed modules since entering the file.
Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
/// \brief Returns the FileID for the preprocessor predefines.
FileID getPredefinesFileID() const { return PredefinesFileID; }
/// \{
/// \brief Accessors for preprocessor callbacks.
///
/// Note that this class takes ownership of any PPCallbacks object given to
/// it.
PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
if (Callbacks)
C = llvm::make_unique<PPChainedCallbacks>(std::move(C),
std::move(Callbacks));
Callbacks = std::move(C);
}
/// \}
bool isMacroDefined(StringRef Id) {
return isMacroDefined(&Identifiers.get(Id));
}
bool isMacroDefined(const IdentifierInfo *II) {
return II->hasMacroDefinition() &&
(!getLangOpts().Modules || (bool)getMacroDefinition(II));
}
/// \brief Determine whether II is defined as a macro within the module M,
/// if that is a module that we've already preprocessed. Does not check for
/// macros imported into M.
bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
if (!II->hasMacroDefinition())
return false;
auto I = Submodules.find(M);
if (I == Submodules.end())
return false;
auto J = I->second.Macros.find(II);
if (J == I->second.Macros.end())
return false;
auto *MD = J->second.getLatest();
return MD && MD->isDefined();
}
MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
if (!II->hasMacroDefinition())
return MacroDefinition();
MacroState &S = CurSubmoduleState->Macros[II];
auto *MD = S.getLatest();
while (MD && isa<VisibilityMacroDirective>(MD))
MD = MD->getPrevious();
return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
S.getActiveModuleMacros(*this, II),
S.isAmbiguous(*this, II));
}
MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
SourceLocation Loc) {
if (!II->hadMacroDefinition())
return MacroDefinition();
MacroState &S = CurSubmoduleState->Macros[II];
MacroDirective::DefInfo DI;
if (auto *MD = S.getLatest())
DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
// FIXME: Compute the set of active module macros at the specified location.
return MacroDefinition(DI.getDirective(),
S.getActiveModuleMacros(*this, II),
S.isAmbiguous(*this, II));
}
/// \brief Given an identifier, return its latest non-imported MacroDirective
/// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
if (!II->hasMacroDefinition())
return nullptr;
auto *MD = getLocalMacroDirectiveHistory(II);
if (!MD || MD->getDefinition().isUndefined())
return nullptr;
return MD;
}
const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
return const_cast<Preprocessor*>(this)->getMacroInfo(II);
}
MacroInfo *getMacroInfo(const IdentifierInfo *II) {
if (!II->hasMacroDefinition())
return nullptr;
if (auto MD = getMacroDefinition(II))
return MD.getMacroInfo();
return nullptr;
}
/// \brief Given an identifier, return the latest non-imported macro
/// directive for that identifier.
///
/// One can iterate over all previous macro directives from the most recent
/// one.
MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
/// \brief Add a directive to the macro directive history for this identifier.
void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
SourceLocation Loc) {
DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
appendMacroDirective(II, MD);
return MD;
}
DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
MacroInfo *MI) {
return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
}
/// \brief Set a MacroDirective that was loaded from a PCH file.
void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
MacroDirective *MD);
/// \brief Register an exported macro for a module and identifier.
ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II);
/// \brief Get the list of leaf (non-overridden) module macros for a name.
ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
if (II->isOutOfDate())
updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
auto I = LeafModuleMacros.find(II);
if (I != LeafModuleMacros.end())
return I->second;
return None;
}
/// \{
/// Iterators for the macro history table. Currently defined macros have
/// IdentifierInfo::hasMacroDefinition() set and an empty
/// MacroInfo::getUndefLoc() at the head of the list.
typedef MacroMap::const_iterator macro_iterator;
macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
macro_iterator macro_end(bool IncludeExternalMacros = true) const;
llvm::iterator_range<macro_iterator>
macros(bool IncludeExternalMacros = true) const {
return llvm::make_range(macro_begin(IncludeExternalMacros),
macro_end(IncludeExternalMacros));
}
/// \}
/// \brief Return the name of the macro defined before \p Loc that has
/// spelling \p Tokens. If there are multiple macros with same spelling,
/// return the last one defined.
StringRef getLastMacroWithSpelling(SourceLocation Loc,
ArrayRef<TokenValue> Tokens) const;
const std::string &getPredefines() const { return Predefines; }
/// \brief Set the predefines for this Preprocessor.
///
/// These predefines are automatically injected when parsing the main file.
void setPredefines(const char *P) { Predefines = P; }
void setPredefines(StringRef P) { Predefines = P; }
/// Return information about the specified preprocessor
/// identifier token.
IdentifierInfo *getIdentifierInfo(StringRef Name) const {
return &Identifiers.get(Name);
}
/// \brief Add the specified pragma handler to this preprocessor.
///
/// If \p Namespace is non-null, then it is a token required to exist on the
/// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
void AddPragmaHandler(PragmaHandler *Handler) {
AddPragmaHandler(StringRef(), Handler);
}
/// \brief Remove the specific pragma handler from this preprocessor.
///
/// If \p Namespace is non-null, then it should be the namespace that
/// \p Handler was added to. It is an error to remove a handler that
/// has not been registered.
void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
void RemovePragmaHandler(PragmaHandler *Handler) {
RemovePragmaHandler(StringRef(), Handler);
}
/// Install empty handlers for all pragmas (making them ignored).
void IgnorePragmas();
/// \brief Add the specified comment handler to the preprocessor.
void addCommentHandler(CommentHandler *Handler);
/// \brief Remove the specified comment handler.
///
/// It is an error to remove a handler that has not been registered.
void removeCommentHandler(CommentHandler *Handler);
/// \brief Set the code completion handler to the given object.
void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
CodeComplete = &Handler;
}
/// \brief Retrieve the current code-completion handler.
CodeCompletionHandler *getCodeCompletionHandler() const {
return CodeComplete;
}
/// \brief Clear out the code completion handler.
void clearCodeCompletionHandler() {
CodeComplete = nullptr;
}
/// \brief Hook used by the lexer to invoke the "natural language" code
/// completion point.
void CodeCompleteNaturalLanguage();
/// \brief Set the code completion token for filtering purposes.
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
CodeCompletionII = Filter;
}
/// \brief Get the code completion token for filtering purposes.
StringRef getCodeCompletionFilter() {
if (CodeCompletionII)
return CodeCompletionII->getName();
return {};
}
/// \brief Retrieve the preprocessing record, or NULL if there is no
/// preprocessing record.
PreprocessingRecord *getPreprocessingRecord() const { return Record; }
/// \brief Create a new preprocessing record, which will keep track of
/// all macro expansions, macro definitions, etc.
void createPreprocessingRecord();
/// \brief Enter the specified FileID as the main source file,
/// which implicitly adds the builtin defines etc.
void EnterMainSourceFile();
- /// \brief After parser warm-up, initialize the conditional stack from
- /// the preamble.
- void replayPreambleConditionalStack();
-
/// \brief Inform the preprocessor callbacks that processing is complete.
void EndSourceFile();
/// \brief Add a source file to the top of the include stack and
/// start lexing tokens from it instead of the current buffer.
///
/// Emits a diagnostic, doesn't enter the file, and returns true on error.
bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
SourceLocation Loc);
/// \brief Add a Macro to the top of the include stack and start lexing
/// tokens from it instead of the current buffer.
///
/// \param Args specifies the tokens input to a function-like macro.
/// \param ILEnd specifies the location of the ')' for a function-like macro
/// or the identifier for an object-like macro.
void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro,
MacroArgs *Args);
/// \brief Add a "macro" context to the top of the include stack,
/// which will cause the lexer to start returning the specified tokens.
///
/// If \p DisableMacroExpansion is true, tokens lexed from the token stream
/// will not be subject to further macro expansion. Otherwise, these tokens
/// will be re-macro-expanded when/if expansion is enabled.
///
/// If \p OwnsTokens is false, this method assumes that the specified stream
/// of tokens has a permanent owner somewhere, so they do not need to be
/// copied. If it is true, it assumes the array of tokens is allocated with
/// \c new[] and the Preprocessor will delete[] it.
private:
void EnterTokenStream(const Token *Toks, unsigned NumToks,
bool DisableMacroExpansion, bool OwnsTokens);
public:
void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
bool DisableMacroExpansion) {
EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true);
}
void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion) {
EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false);
}
/// \brief Pop the current lexer/macro exp off the top of the lexer stack.
///
/// This should only be used in situations where the current state of the
/// top-of-stack lexer is known.
void RemoveTopOfLexerStack();
/// From the point that this method is called, and until
/// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
/// keeps track of the lexed tokens so that a subsequent Backtrack() call will
/// make the Preprocessor re-lex the same tokens.
///
/// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
/// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
/// be combined with the EnableBacktrackAtThisPos calls in reverse order.
///
/// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
/// at some point after EnableBacktrackAtThisPos. If you don't, caching of
/// tokens will continue indefinitely.
///
void EnableBacktrackAtThisPos();
/// \brief Disable the last EnableBacktrackAtThisPos call.
void CommitBacktrackedTokens();
struct CachedTokensRange {
CachedTokensTy::size_type Begin, End;
};
private:
/// \brief A range of cached tokens that should be erased after lexing
/// when backtracking requires the erasure of such cached tokens.
Optional<CachedTokensRange> CachedTokenRangeToErase;
public:
/// \brief Returns the range of cached tokens that were lexed since
/// EnableBacktrackAtThisPos() was previously called.
CachedTokensRange LastCachedTokenRange();
/// \brief Erase the range of cached tokens that were lexed since
/// EnableBacktrackAtThisPos() was previously called.
void EraseCachedTokens(CachedTokensRange TokenRange);
/// \brief Make Preprocessor re-lex the tokens that were lexed since
/// EnableBacktrackAtThisPos() was previously called.
void Backtrack();
/// \brief True if EnableBacktrackAtThisPos() was called and
/// caching of tokens is on.
bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
/// \brief Lex the next token for this preprocessor.
void Lex(Token &Result);
void LexAfterModuleImport(Token &Result);
void makeModuleVisible(Module *M, SourceLocation Loc);
SourceLocation getModuleImportLoc(Module *M) const {
return CurSubmoduleState->VisibleModules.getImportLoc(M);
}
/// \brief Lex a string literal, which may be the concatenation of multiple
/// string literals and may even come from macro expansion.
/// \returns true on success, false if a error diagnostic has been generated.
bool LexStringLiteral(Token &Result, std::string &String,
const char *DiagnosticTag, bool AllowMacroExpansion) {
if (AllowMacroExpansion)
Lex(Result);
else
LexUnexpandedToken(Result);
return FinishLexStringLiteral(Result, String, DiagnosticTag,
AllowMacroExpansion);
}
/// \brief Complete the lexing of a string literal where the first token has
/// already been lexed (see LexStringLiteral).
bool FinishLexStringLiteral(Token &Result, std::string &String,
const char *DiagnosticTag,
bool AllowMacroExpansion);
/// \brief Lex a token. If it's a comment, keep lexing until we get
/// something not a comment.
///
/// This is useful in -E -C mode where comments would foul up preprocessor
/// directive handling.
void LexNonComment(Token &Result) {
do
Lex(Result);
while (Result.getKind() == tok::comment);
}
/// \brief Just like Lex, but disables macro expansion of identifier tokens.
void LexUnexpandedToken(Token &Result) {
// Disable macro expansion.
bool OldVal = DisableMacroExpansion;
DisableMacroExpansion = true;
// Lex the token.
Lex(Result);
// Reenable it.
DisableMacroExpansion = OldVal;
}
/// \brief Like LexNonComment, but this disables macro expansion of
/// identifier tokens.
void LexUnexpandedNonComment(Token &Result) {
do
LexUnexpandedToken(Result);
while (Result.getKind() == tok::comment);
}
/// \brief Parses a simple integer literal to get its numeric value. Floating
/// point literals and user defined literals are rejected. Used primarily to
/// handle pragmas that accept integer arguments.
bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
/// Disables macro expansion everywhere except for preprocessor directives.
void SetMacroExpansionOnlyInDirectives() {
DisableMacroExpansion = true;
MacroExpansionInDirectivesOverride = true;
}
/// \brief Peeks ahead N tokens and returns that token without consuming any
/// tokens.
///
/// LookAhead(0) returns the next token that would be returned by Lex(),
/// LookAhead(1) returns the token after it, etc. This returns normal
/// tokens after phase 5. As such, it is equivalent to using
/// 'Lex', not 'LexUnexpandedToken'.
const Token &LookAhead(unsigned N) {
if (CachedLexPos + N < CachedTokens.size())
return CachedTokens[CachedLexPos+N];
else
return PeekAhead(N+1);
}
/// \brief When backtracking is enabled and tokens are cached,
/// this allows to revert a specific number of tokens.
///
/// Note that the number of tokens being reverted should be up to the last
/// backtrack position, not more.
void RevertCachedTokens(unsigned N) {
assert(isBacktrackEnabled() &&
"Should only be called when tokens are cached for backtracking");
assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
&& "Should revert tokens up to the last backtrack position, not more");
assert(signed(CachedLexPos) - signed(N) >= 0 &&
"Corrupted backtrack positions ?");
CachedLexPos -= N;
}
/// \brief Enters a token in the token stream to be lexed next.
///
/// If BackTrack() is called afterwards, the token will remain at the
/// insertion point.
void EnterToken(const Token &Tok) {
EnterCachingLexMode();
CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
}
/// We notify the Preprocessor that if it is caching tokens (because
/// backtrack is enabled) it should replace the most recent cached tokens
/// with the given annotation token. This function has no effect if
/// backtracking is not enabled.
///
/// Note that the use of this function is just for optimization, so that the
/// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
/// invoked.
void AnnotateCachedTokens(const Token &Tok) {
assert(Tok.isAnnotation() && "Expected annotation token");
if (CachedLexPos != 0 && isBacktrackEnabled())
AnnotatePreviousCachedTokens(Tok);
}
/// Get the location of the last cached token, suitable for setting the end
/// location of an annotation token.
SourceLocation getLastCachedTokenLocation() const {
assert(CachedLexPos != 0);
return CachedTokens[CachedLexPos-1].getLastLoc();
}
/// \brief Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
/// CachedTokens.
bool IsPreviousCachedToken(const Token &Tok) const;
/// \brief Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
/// in \p NewToks.
///
/// Useful when a token needs to be split in smaller ones and CachedTokens
/// most recent token must to be updated to reflect that.
void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
/// \brief Replace the last token with an annotation token.
///
/// Like AnnotateCachedTokens(), this routine replaces an
/// already-parsed (and resolved) token with an annotation
/// token. However, this routine only replaces the last token with
/// the annotation token; it does not affect any other cached
/// tokens. This function has no effect if backtracking is not
/// enabled.
void ReplaceLastTokenWithAnnotation(const Token &Tok) {
assert(Tok.isAnnotation() && "Expected annotation token");
if (CachedLexPos != 0 && isBacktrackEnabled())
CachedTokens[CachedLexPos-1] = Tok;
}
/// Enter an annotation token into the token stream.
void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
void *AnnotationVal);
/// Update the current token to represent the provided
/// identifier, in order to cache an action performed by typo correction.
void TypoCorrectToken(const Token &Tok) {
assert(Tok.getIdentifierInfo() && "Expected identifier token");
if (CachedLexPos != 0 && isBacktrackEnabled())
CachedTokens[CachedLexPos-1] = Tok;
}
/// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
/// CurTokenLexer pointers.
void recomputeCurLexerKind();
/// \brief Returns true if incremental processing is enabled
bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
/// \brief Enables the incremental processing
void enableIncrementalProcessing(bool value = true) {
IncrementalProcessing = value;
}
/// \brief Specify the point at which code-completion will be performed.
///
/// \param File the file in which code completion should occur. If
/// this file is included multiple times, code-completion will
/// perform completion the first time it is included. If NULL, this
/// function clears out the code-completion point.
///
/// \param Line the line at which code completion should occur
/// (1-based).
///
/// \param Column the column at which code completion should occur
/// (1-based).
///
/// \returns true if an error occurred, false otherwise.
bool SetCodeCompletionPoint(const FileEntry *File,
unsigned Line, unsigned Column);
/// \brief Determine if we are performing code completion.
bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
/// \brief Returns the location of the code-completion point.
///
/// Returns an invalid location if code-completion is not enabled or the file
/// containing the code-completion point has not been lexed yet.
SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
/// \brief Returns the start location of the file of code-completion point.
///
/// Returns an invalid location if code-completion is not enabled or the file
/// containing the code-completion point has not been lexed yet.
SourceLocation getCodeCompletionFileLoc() const {
return CodeCompletionFileLoc;
}
/// \brief Returns true if code-completion is enabled and we have hit the
/// code-completion point.
bool isCodeCompletionReached() const { return CodeCompletionReached; }
/// \brief Note that we hit the code-completion point.
void setCodeCompletionReached() {
assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
CodeCompletionReached = true;
// Silence any diagnostics that occur after we hit the code-completion.
getDiagnostics().setSuppressAllDiagnostics(true);
}
/// \brief The location of the currently-active \#pragma clang
/// arc_cf_code_audited begin.
///
/// Returns an invalid location if there is no such pragma active.
SourceLocation getPragmaARCCFCodeAuditedLoc() const {
return PragmaARCCFCodeAuditedLoc;
}
/// \brief Set the location of the currently-active \#pragma clang
/// arc_cf_code_audited begin. An invalid location ends the pragma.
void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
PragmaARCCFCodeAuditedLoc = Loc;
}
/// \brief The location of the currently-active \#pragma clang
/// assume_nonnull begin.
///
/// Returns an invalid location if there is no such pragma active.
SourceLocation getPragmaAssumeNonNullLoc() const {
return PragmaAssumeNonNullLoc;
}
/// \brief Set the location of the currently-active \#pragma clang
/// assume_nonnull begin. An invalid location ends the pragma.
void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
PragmaAssumeNonNullLoc = Loc;
}
/// \brief Set the directory in which the main file should be considered
/// to have been found, if it is not a real file.
void setMainFileDir(const DirectoryEntry *Dir) {
MainFileDir = Dir;
}
/// \brief Instruct the preprocessor to skip part of the main source file.
///
/// \param Bytes The number of bytes in the preamble to skip.
///
/// \param StartOfLine Whether skipping these bytes puts the lexer at the
/// start of a line.
void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
SkipMainFilePreamble.first = Bytes;
SkipMainFilePreamble.second = StartOfLine;
}
/// Forwarding function for diagnostics. This emits a diagnostic at
/// the specified Token's location, translating the token's start
/// position in the current buffer into a SourcePosition object for rendering.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
return Diags->Report(Loc, DiagID);
}
DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
return Diags->Report(Tok.getLocation(), DiagID);
}
/// Return the 'spelling' of the token at the given
/// location; does not go up to the spelling location or down to the
/// expansion location.
///
/// \param buffer A buffer which will be used only if the token requires
/// "cleaning", e.g. if it contains trigraphs or escaped newlines
/// \param invalid If non-null, will be set \c true if an error occurs.
StringRef getSpelling(SourceLocation loc,
SmallVectorImpl<char> &buffer,
bool *invalid = nullptr) const {
return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
}
/// \brief Return the 'spelling' of the Tok token.
///
/// The spelling of a token is the characters used to represent the token in
/// the source file after trigraph expansion and escaped-newline folding. In
/// particular, this wants to get the true, uncanonicalized, spelling of
/// things like digraphs, UCNs, etc.
///
/// \param Invalid If non-null, will be set \c true if an error occurs.
std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
}
/// \brief Get the spelling of a token into a preallocated buffer, instead
/// of as an std::string.
///
/// The caller is required to allocate enough space for the token, which is
/// guaranteed to be at least Tok.getLength() bytes long. The length of the
/// actual result is returned.
///
/// Note that this method may do two possible things: it may either fill in
/// the buffer specified with characters, or it may *change the input pointer*
/// to point to a constant buffer with the data already in it (avoiding a
/// copy). The caller is not allowed to modify the returned buffer pointer
/// if an internal buffer is returned.
unsigned getSpelling(const Token &Tok, const char *&Buffer,
bool *Invalid = nullptr) const {
return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
}
/// \brief Get the spelling of a token into a SmallVector.
///
/// Note that the returned StringRef may not point to the
/// supplied buffer if a copy can be avoided.
StringRef getSpelling(const Token &Tok,
SmallVectorImpl<char> &Buffer,
bool *Invalid = nullptr) const;
/// \brief Relex the token at the specified location.
/// \returns true if there was a failure, false on success.
bool getRawToken(SourceLocation Loc, Token &Result,
bool IgnoreWhiteSpace = false) {
return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
}
/// \brief Given a Token \p Tok that is a numeric constant with length 1,
/// return the character.
char
getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
bool *Invalid = nullptr) const {
assert(Tok.is(tok::numeric_constant) &&
Tok.getLength() == 1 && "Called on unsupported token");
assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
// If the token is carrying a literal data pointer, just use it.
if (const char *D = Tok.getLiteralData())
return *D;
// Otherwise, fall back on getCharacterData, which is slower, but always
// works.
return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
}
/// \brief Retrieve the name of the immediate macro expansion.
///
/// This routine starts from a source location, and finds the name of the
/// macro responsible for its immediate expansion. It looks through any
/// intervening macro argument expansions to compute this. It returns a
/// StringRef that refers to the SourceManager-owned buffer of the source
/// where that macro name is spelled. Thus, the result shouldn't out-live
/// the SourceManager.
StringRef getImmediateMacroName(SourceLocation Loc) {
return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
}
/// \brief Plop the specified string into a scratch buffer and set the
/// specified token's location and length to it.
///
/// If specified, the source location provides a location of the expansion
/// point of the token.
void CreateString(StringRef Str, Token &Tok,
SourceLocation ExpansionLocStart = SourceLocation(),
SourceLocation ExpansionLocEnd = SourceLocation());
/// \brief Computes the source location just past the end of the
/// token at this source location.
///
/// This routine can be used to produce a source location that
/// points just past the end of the token referenced by \p Loc, and
/// is generally used when a diagnostic needs to point just after a
/// token where it expected something different that it received. If
/// the returned source location would not be meaningful (e.g., if
/// it points into a macro), this routine returns an invalid
/// source location.
///
/// \param Offset an offset from the end of the token, where the source
/// location should refer to. The default offset (0) produces a source
/// location pointing just past the end of the token; an offset of 1 produces
/// a source location pointing to the last character in the token, etc.
SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
}
/// \brief Returns true if the given MacroID location points at the first
/// token of the macro expansion.
///
/// \param MacroBegin If non-null and function returns true, it is set to
/// begin location of the macro.
bool isAtStartOfMacroExpansion(SourceLocation loc,
SourceLocation *MacroBegin = nullptr) const {
return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
MacroBegin);
}
/// \brief Returns true if the given MacroID location points at the last
/// token of the macro expansion.
///
/// \param MacroEnd If non-null and function returns true, it is set to
/// end location of the macro.
bool isAtEndOfMacroExpansion(SourceLocation loc,
SourceLocation *MacroEnd = nullptr) const {
return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
}
/// \brief Print the token to stderr, used for debugging.
void DumpToken(const Token &Tok, bool DumpFlags = false) const;
void DumpLocation(SourceLocation Loc) const;
void DumpMacro(const MacroInfo &MI) const;
void dumpMacroInfo(const IdentifierInfo *II);
/// \brief Given a location that specifies the start of a
/// token, return a new location that specifies a character within the token.
SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
unsigned Char) const {
return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
}
/// \brief Increment the counters for the number of token paste operations
/// performed.
///
/// If fast was specified, this is a 'fast paste' case we handled.
void IncrementPasteCounter(bool isFast) {
if (isFast)
++NumFastTokenPaste;
else
++NumTokenPaste;
}
void PrintStats();
size_t getTotalMemory() const;
/// When the macro expander pastes together a comment (/##/) in Microsoft
/// mode, this method handles updating the current state, returning the
/// token on the next source line.
void HandleMicrosoftCommentPaste(Token &Tok);
//===--------------------------------------------------------------------===//
// Preprocessor callback methods. These are invoked by a lexer as various
// directives and events are found.
/// Given a tok::raw_identifier token, look up the
/// identifier information for the token and install it into the token,
/// updating the token kind accordingly.
IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
private:
llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
public:
/// \brief Specifies the reason for poisoning an identifier.
///
/// If that identifier is accessed while poisoned, then this reason will be
/// used instead of the default "poisoned" diagnostic.
void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
/// \brief Display reason for poisoned identifier.
void HandlePoisonedIdentifier(Token & Tok);
void MaybeHandlePoisonedIdentifier(Token & Identifier) {
if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
if(II->isPoisoned()) {
HandlePoisonedIdentifier(Identifier);
}
}
}
private:
/// Identifiers used for SEH handling in Borland. These are only
/// allowed in particular circumstances
// __except block
IdentifierInfo *Ident__exception_code,
*Ident___exception_code,
*Ident_GetExceptionCode;
// __except filter expression
IdentifierInfo *Ident__exception_info,
*Ident___exception_info,
*Ident_GetExceptionInfo;
// __finally
IdentifierInfo *Ident__abnormal_termination,
*Ident___abnormal_termination,
*Ident_AbnormalTermination;
const char *getCurLexerEndPos();
void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
public:
void PoisonSEHIdentifiers(bool Poison = true); // Borland
/// \brief Callback invoked when the lexer reads an identifier and has
/// filled in the tokens IdentifierInfo member.
///
/// This callback potentially macro expands it or turns it into a named
/// token (like 'for').
///
/// \returns true if we actually computed a token, false if we need to
/// lex again.
bool HandleIdentifier(Token &Identifier);
/// \brief Callback invoked when the lexer hits the end of the current file.
///
/// This either returns the EOF token and returns true, or
/// pops a level off the include stack and returns false, at which point the
/// client should call lex again.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
/// \brief Callback invoked when the current TokenLexer hits the end of its
/// token stream.
bool HandleEndOfTokenLexer(Token &Result);
/// \brief Callback invoked when the lexer sees a # token at the start of a
/// line.
///
/// This consumes the directive, modifies the lexer/preprocessor state, and
/// advances the lexer(s) so that the next token read is the correct one.
void HandleDirective(Token &Result);
/// \brief Ensure that the next token is a tok::eod token.
///
/// If not, emit a diagnostic and consume up until the eod.
/// If \p EnableMacros is true, then we consider macros that expand to zero
/// tokens as being ok.
void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
/// \brief Read and discard all tokens remaining on the current line until
/// the tok::eod token is found.
void DiscardUntilEndOfDirective();
/// \brief Returns true if the preprocessor has seen a use of
/// __DATE__ or __TIME__ in the file so far.
bool SawDateOrTime() const {
return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
}
unsigned getCounterValue() const { return CounterValue; }
void setCounterValue(unsigned V) { CounterValue = V; }
/// \brief Retrieves the module that we're currently building, if any.
Module *getCurrentModule();
/// \brief Allocate a new MacroInfo object with the provided SourceLocation.
MacroInfo *AllocateMacroInfo(SourceLocation L);
/// \brief Turn the specified lexer token into a fully checked and spelled
/// filename, e.g. as an operand of \#include.
///
/// The caller is expected to provide a buffer that is large enough to hold
/// the spelling of the filename, but is also expected to handle the case
/// when this method decides to use a different buffer.
///
/// \returns true if the input filename was in <>'s or false if it was
/// in ""'s.
bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
/// \brief Given a "foo" or \<foo> reference, look up the indicated file.
///
/// Returns null on failure. \p isAngled indicates whether the file
/// reference is for system \#include's or not (i.e. using <> instead of "").
const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename,
bool isAngled, const DirectoryLookup *FromDir,
const FileEntry *FromFile,
const DirectoryLookup *&CurDir,
SmallVectorImpl<char> *SearchPath,
SmallVectorImpl<char> *RelativePath,
ModuleMap::KnownHeader *SuggestedModule,
bool *IsMapped, bool SkipCache = false);
/// \brief Get the DirectoryLookup structure used to find the current
/// FileEntry, if CurLexer is non-null and if applicable.
///
/// This allows us to implement \#include_next and find directory-specific
/// properties.
const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
/// \brief Return true if we're in the top-level file, not in a \#include.
bool isInPrimaryFile() const;
/// \brief Handle cases where the \#include name is expanded
/// from a macro as multiple tokens, which need to be glued together.
///
/// This occurs for code like:
/// \code
/// \#define FOO <x/y.h>
/// \#include FOO
/// \endcode
/// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
///
/// This code concatenates and consumes tokens up to the '>' token. It
/// returns false if the > was found, otherwise it returns true if it finds
/// and consumes the EOD marker.
bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
SourceLocation &End);
/// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is
/// followed by EOD. Return true if the token is not a valid on-off-switch.
bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
bool *ShadowFlag = nullptr);
void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
Module *LeaveSubmodule(bool ForPragma);
private:
void PushIncludeMacroStack() {
assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
std::move(CurLexer), std::move(CurPTHLexer),
CurPPLexer, std::move(CurTokenLexer),
CurDirLookup);
CurPPLexer = nullptr;
}
void PopIncludeMacroStack() {
CurLexer = std::move(IncludeMacroStack.back().TheLexer);
CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer);
CurPPLexer = IncludeMacroStack.back().ThePPLexer;
CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
CurDirLookup = IncludeMacroStack.back().TheDirLookup;
CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
CurLexerKind = IncludeMacroStack.back().CurLexerKind;
IncludeMacroStack.pop_back();
}
void PropagateLineStartLeadingSpaceInfo(Token &Result);
/// Determine whether we need to create module macros for #defines in the
/// current context.
bool needModuleMacros() const;
/// Update the set of active module macros and ambiguity flag for a module
/// macro name.
void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
SourceLocation Loc);
UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
bool isPublic);
/// \brief Lex and validate a macro name, which occurs after a
/// \#define or \#undef.
///
/// \param MacroNameTok Token that represents the name defined or undefined.
/// \param IsDefineUndef Kind if preprocessor directive.
/// \param ShadowFlag Points to flag that is set if macro name shadows
/// a keyword.
///
/// This emits a diagnostic, sets the token kind to eod,
/// and discards the rest of the macro line if the macro name is invalid.
void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
bool *ShadowFlag = nullptr);
/// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
/// entire line) of the macro's tokens and adds them to MacroInfo, and while
/// doing so performs certain validity checks including (but not limited to):
/// - # (stringization) is followed by a macro parameter
/// \param MacroNameTok - Token that represents the macro name
/// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
///
/// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
/// returns a nullptr if an invalid sequence of tokens is encountered.
MacroInfo *ReadOptionalMacroParameterListAndBody(
const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
/// The ( starting an argument list of a macro definition has just been read.
/// Lex the rest of the parameters and the closing ), updating \p MI with
/// what we learn and saving in \p LastTok the last token read.
/// Return true if an error occurs parsing the arg list.
bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
/// We just read a \#if or related directive and decided that the
/// subsequent tokens are in the \#if'd out portion of the
/// file. Lex the rest of the file, until we see an \#endif. If \p
/// FoundNonSkipPortion is true, then we have already emitted code for part of
/// this \#if directive, so \#else/\#elif blocks should never be entered. If
/// \p FoundElse is false, then \#else directives are ok, if not, then we have
/// already seen one so a \#else directive is a duplicate. When this returns,
/// the caller can lex the first valid token.
void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
bool FoundNonSkipPortion, bool FoundElse,
SourceLocation ElseLoc = SourceLocation());
/// \brief A fast PTH version of SkipExcludedConditionalBlock.
void PTHSkipExcludedConditionalBlock();
/// Information about the result for evaluating an expression for a
/// preprocessor directive.
struct DirectiveEvalResult {
/// Whether the expression was evaluated as true or not.
bool Conditional;
/// True if the expression contained identifiers that were undefined.
bool IncludedUndefinedIds;
};
/// \brief Evaluate an integer constant expression that may occur after a
/// \#if or \#elif directive and return a \p DirectiveEvalResult object.
///
/// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
/// \brief Install the standard preprocessor pragmas:
/// \#pragma GCC poison/system_header/dependency and \#pragma once.
void RegisterBuiltinPragmas();
/// \brief Register builtin macros such as __LINE__ with the identifier table.
void RegisterBuiltinMacros();
/// If an identifier token is read that is to be expanded as a macro, handle
/// it and return the next token as 'Tok'. If we lexed a token, return true;
/// otherwise the caller should lex again.
bool HandleMacroExpandedIdentifier(Token &Tok, const MacroDefinition &MD);
/// \brief Cache macro expanded tokens for TokenLexers.
//
/// Works like a stack; a TokenLexer adds the macro expanded tokens that is
/// going to lex in the cache and when it finishes the tokens are removed
/// from the end of the cache.
Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
ArrayRef<Token> tokens);
void removeCachedMacroExpandedTokensOfLastLexer();
friend void TokenLexer::ExpandFunctionArguments();
/// Determine whether the next preprocessor token to be
/// lexed is a '('. If so, consume the token and return true, if not, this
/// method should have no observable side-effect on the lexed tokens.
bool isNextPPTokenLParen();
/// After reading "MACRO(", this method is invoked to read all of the formal
/// arguments specified for the macro invocation. Returns null on error.
MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
SourceLocation &ExpansionEnd);
/// \brief If an identifier token is read that is to be expanded
/// as a builtin macro, handle it and return the next token as 'Tok'.
void ExpandBuiltinMacro(Token &Tok);
/// \brief Read a \c _Pragma directive, slice it up, process it, then
/// return the first token after the directive.
/// This assumes that the \c _Pragma token has just been read into \p Tok.
void Handle_Pragma(Token &Tok);
/// \brief Like Handle_Pragma except the pragma text is not enclosed within
/// a string literal.
void HandleMicrosoft__pragma(Token &Tok);
/// \brief Add a lexer to the top of the include stack and
/// start lexing tokens from it instead of the current buffer.
void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
/// \brief Add a lexer to the top of the include stack and
/// start getting tokens from it using the PTH cache.
void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
/// \brief Set the FileID for the preprocessor predefines.
void setPredefinesFileID(FileID FID) {
assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
PredefinesFileID = FID;
}
/// \brief Returns true if we are lexing from a file and not a
/// pragma or a macro.
static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
return L ? !L->isPragmaLexer() : P != nullptr;
}
static bool IsFileLexer(const IncludeStackInfo& I) {
return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
}
bool IsFileLexer() const {
return IsFileLexer(CurLexer.get(), CurPPLexer);
}
//===--------------------------------------------------------------------===//
// Caching stuff.
void CachingLex(Token &Result);
bool InCachingLexMode() const {
// If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
// that we are past EOF, not that we are in CachingLex mode.
return !CurPPLexer && !CurTokenLexer && !CurPTHLexer &&
!IncludeMacroStack.empty();
}
void EnterCachingLexMode();
void ExitCachingLexMode() {
if (InCachingLexMode())
RemoveTopOfLexerStack();
}
const Token &PeekAhead(unsigned N);
void AnnotatePreviousCachedTokens(const Token &Tok);
//===--------------------------------------------------------------------===//
/// Handle*Directive - implement the various preprocessor directives. These
/// should side-effect the current preprocessor object so that the next call
/// to Lex() will return the appropriate token next.
void HandleLineDirective();
void HandleDigitDirective(Token &Tok);
void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
void HandleIdentSCCSDirective(Token &Tok);
void HandleMacroPublicDirective(Token &Tok);
void HandleMacroPrivateDirective();
// File inclusion.
void HandleIncludeDirective(SourceLocation HashLoc,
Token &Tok,
const DirectoryLookup *LookupFrom = nullptr,
const FileEntry *LookupFromFile = nullptr,
bool isImport = false);
void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
void HandleMicrosoftImportDirective(Token &Tok);
public:
/// Check that the given module is available, producing a diagnostic if not.
/// \return \c true if the check failed (because the module is not available).
/// \c false if the module appears to be usable.
static bool checkModuleIsAvailable(const LangOptions &LangOpts,
const TargetInfo &TargetInfo,
DiagnosticsEngine &Diags, Module *M);
// Module inclusion testing.
/// \brief Find the module that owns the source or header file that
/// \p Loc points to. If the location is in a file that was included
/// into a module, or is outside any module, returns nullptr.
Module *getModuleForLocation(SourceLocation Loc);
/// \brief We want to produce a diagnostic at location IncLoc concerning a
/// missing module import.
///
/// \param IncLoc The location at which the missing import was detected.
/// \param M The desired module.
/// \param MLoc A location within the desired module at which some desired
/// effect occurred (eg, where a desired entity was declared).
///
/// \return A file that can be #included to import a module containing MLoc.
/// Null if no such file could be determined or if a #include is not
/// appropriate.
const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
Module *M,
SourceLocation MLoc);
bool isRecordingPreamble() const {
return PreambleConditionalStack.isRecording();
}
bool hasRecordedPreamble() const {
return PreambleConditionalStack.hasRecordedPreamble();
}
ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
return PreambleConditionalStack.getStack();
}
void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
PreambleConditionalStack.setStack(s);
}
void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
PreambleConditionalStack.startReplaying();
PreambleConditionalStack.setStack(s);
}
private:
+ /// \brief After processing predefined file, initialize the conditional stack from
+ /// the preamble.
+ void replayPreambleConditionalStack();
+
// Macro handling.
void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef);
void HandleUndefDirective();
// Conditional Inclusion.
void HandleIfdefDirective(Token &Tok, bool isIfndef,
bool ReadAnyTokensBeforeDirective);
void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
void HandleEndifDirective(Token &Tok);
void HandleElseDirective(Token &Tok);
void HandleElifDirective(Token &Tok);
// Pragmas.
void HandlePragmaDirective(SourceLocation IntroducerLoc,
PragmaIntroducerKind Introducer);
public:
void HandlePragmaOnce(Token &OnceTok);
void HandlePragmaMark();
void HandlePragmaPoison();
void HandlePragmaSystemHeader(Token &SysHeaderTok);
void HandlePragmaDependency(Token &DependencyTok);
void HandlePragmaPushMacro(Token &Tok);
void HandlePragmaPopMacro(Token &Tok);
void HandlePragmaIncludeAlias(Token &Tok);
void HandlePragmaModuleBuild(Token &Tok);
IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
// Return true and store the first token only if any CommentHandler
// has inserted some tokens and getCommentRetentionState() is false.
bool HandleComment(Token &Token, SourceRange Comment);
/// \brief A macro is used, update information about macros that need unused
/// warnings.
void markMacroAsUsed(MacroInfo *MI);
};
/// \brief Abstract base class that describes a handler that will receive
/// source ranges for each of the comments encountered in the source file.
class CommentHandler {
public:
virtual ~CommentHandler();
// The handler shall return true if it has pushed any tokens
// to be read using e.g. EnterToken or EnterTokenStream.
virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
};
/// \brief Registry of pragma handlers added by plugins
typedef llvm::Registry<PragmaHandler> PragmaHandlerRegistry;
} // end namespace clang
#endif
Index: head/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp (revision 322855)
@@ -1,6249 +1,6254 @@
//===--- ASTImporter.cpp - Importing ASTs from other Contexts ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the ASTImporter class which imports AST nodes from one
// context into another context.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/ASTImporter.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTDiagnostic.h"
#include "clang/AST/ASTStructuralEquivalence.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclVisitor.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/AST/TypeVisitor.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/Support/MemoryBuffer.h"
#include <deque>
namespace clang {
class ASTNodeImporter : public TypeVisitor<ASTNodeImporter, QualType>,
public DeclVisitor<ASTNodeImporter, Decl *>,
public StmtVisitor<ASTNodeImporter, Stmt *> {
ASTImporter &Importer;
public:
explicit ASTNodeImporter(ASTImporter &Importer) : Importer(Importer) { }
using TypeVisitor<ASTNodeImporter, QualType>::Visit;
using DeclVisitor<ASTNodeImporter, Decl *>::Visit;
using StmtVisitor<ASTNodeImporter, Stmt *>::Visit;
// Importing types
QualType VisitType(const Type *T);
QualType VisitAtomicType(const AtomicType *T);
QualType VisitBuiltinType(const BuiltinType *T);
QualType VisitDecayedType(const DecayedType *T);
QualType VisitComplexType(const ComplexType *T);
QualType VisitPointerType(const PointerType *T);
QualType VisitBlockPointerType(const BlockPointerType *T);
QualType VisitLValueReferenceType(const LValueReferenceType *T);
QualType VisitRValueReferenceType(const RValueReferenceType *T);
QualType VisitMemberPointerType(const MemberPointerType *T);
QualType VisitConstantArrayType(const ConstantArrayType *T);
QualType VisitIncompleteArrayType(const IncompleteArrayType *T);
QualType VisitVariableArrayType(const VariableArrayType *T);
// FIXME: DependentSizedArrayType
// FIXME: DependentSizedExtVectorType
QualType VisitVectorType(const VectorType *T);
QualType VisitExtVectorType(const ExtVectorType *T);
QualType VisitFunctionNoProtoType(const FunctionNoProtoType *T);
QualType VisitFunctionProtoType(const FunctionProtoType *T);
// FIXME: UnresolvedUsingType
QualType VisitParenType(const ParenType *T);
QualType VisitTypedefType(const TypedefType *T);
QualType VisitTypeOfExprType(const TypeOfExprType *T);
// FIXME: DependentTypeOfExprType
QualType VisitTypeOfType(const TypeOfType *T);
QualType VisitDecltypeType(const DecltypeType *T);
QualType VisitUnaryTransformType(const UnaryTransformType *T);
QualType VisitAutoType(const AutoType *T);
QualType VisitInjectedClassNameType(const InjectedClassNameType *T);
// FIXME: DependentDecltypeType
QualType VisitRecordType(const RecordType *T);
QualType VisitEnumType(const EnumType *T);
QualType VisitAttributedType(const AttributedType *T);
QualType VisitTemplateTypeParmType(const TemplateTypeParmType *T);
QualType VisitSubstTemplateTypeParmType(const SubstTemplateTypeParmType *T);
QualType VisitTemplateSpecializationType(const TemplateSpecializationType *T);
QualType VisitElaboratedType(const ElaboratedType *T);
// FIXME: DependentNameType
// FIXME: DependentTemplateSpecializationType
QualType VisitObjCInterfaceType(const ObjCInterfaceType *T);
QualType VisitObjCObjectType(const ObjCObjectType *T);
QualType VisitObjCObjectPointerType(const ObjCObjectPointerType *T);
// Importing declarations
bool ImportDeclParts(NamedDecl *D, DeclContext *&DC,
DeclContext *&LexicalDC, DeclarationName &Name,
NamedDecl *&ToD, SourceLocation &Loc);
void ImportDefinitionIfNeeded(Decl *FromD, Decl *ToD = nullptr);
void ImportDeclarationNameLoc(const DeclarationNameInfo &From,
DeclarationNameInfo& To);
void ImportDeclContext(DeclContext *FromDC, bool ForceImport = false);
bool ImportCastPath(CastExpr *E, CXXCastPath &Path);
typedef DesignatedInitExpr::Designator Designator;
Designator ImportDesignator(const Designator &D);
/// \brief What we should import from the definition.
enum ImportDefinitionKind {
/// \brief Import the default subset of the definition, which might be
/// nothing (if minimal import is set) or might be everything (if minimal
/// import is not set).
IDK_Default,
/// \brief Import everything.
IDK_Everything,
/// \brief Import only the bare bones needed to establish a valid
/// DeclContext.
IDK_Basic
};
bool shouldForceImportDeclContext(ImportDefinitionKind IDK) {
return IDK == IDK_Everything ||
(IDK == IDK_Default && !Importer.isMinimalImport());
}
bool ImportDefinition(RecordDecl *From, RecordDecl *To,
ImportDefinitionKind Kind = IDK_Default);
bool ImportDefinition(VarDecl *From, VarDecl *To,
ImportDefinitionKind Kind = IDK_Default);
bool ImportDefinition(EnumDecl *From, EnumDecl *To,
ImportDefinitionKind Kind = IDK_Default);
bool ImportDefinition(ObjCInterfaceDecl *From, ObjCInterfaceDecl *To,
ImportDefinitionKind Kind = IDK_Default);
bool ImportDefinition(ObjCProtocolDecl *From, ObjCProtocolDecl *To,
ImportDefinitionKind Kind = IDK_Default);
TemplateParameterList *ImportTemplateParameterList(
TemplateParameterList *Params);
TemplateArgument ImportTemplateArgument(const TemplateArgument &From);
TemplateArgumentLoc ImportTemplateArgumentLoc(
const TemplateArgumentLoc &TALoc, bool &Error);
bool ImportTemplateArguments(const TemplateArgument *FromArgs,
unsigned NumFromArgs,
SmallVectorImpl<TemplateArgument> &ToArgs);
bool IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord,
bool Complain = true);
bool IsStructuralMatch(VarDecl *FromVar, VarDecl *ToVar,
bool Complain = true);
bool IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToRecord);
bool IsStructuralMatch(EnumConstantDecl *FromEC, EnumConstantDecl *ToEC);
bool IsStructuralMatch(ClassTemplateDecl *From, ClassTemplateDecl *To);
bool IsStructuralMatch(VarTemplateDecl *From, VarTemplateDecl *To);
Decl *VisitDecl(Decl *D);
Decl *VisitAccessSpecDecl(AccessSpecDecl *D);
Decl *VisitStaticAssertDecl(StaticAssertDecl *D);
Decl *VisitTranslationUnitDecl(TranslationUnitDecl *D);
Decl *VisitNamespaceDecl(NamespaceDecl *D);
Decl *VisitTypedefNameDecl(TypedefNameDecl *D, bool IsAlias);
Decl *VisitTypedefDecl(TypedefDecl *D);
Decl *VisitTypeAliasDecl(TypeAliasDecl *D);
Decl *VisitLabelDecl(LabelDecl *D);
Decl *VisitEnumDecl(EnumDecl *D);
Decl *VisitRecordDecl(RecordDecl *D);
Decl *VisitEnumConstantDecl(EnumConstantDecl *D);
Decl *VisitFunctionDecl(FunctionDecl *D);
Decl *VisitCXXMethodDecl(CXXMethodDecl *D);
Decl *VisitCXXConstructorDecl(CXXConstructorDecl *D);
Decl *VisitCXXDestructorDecl(CXXDestructorDecl *D);
Decl *VisitCXXConversionDecl(CXXConversionDecl *D);
Decl *VisitFieldDecl(FieldDecl *D);
Decl *VisitIndirectFieldDecl(IndirectFieldDecl *D);
Decl *VisitFriendDecl(FriendDecl *D);
Decl *VisitObjCIvarDecl(ObjCIvarDecl *D);
Decl *VisitVarDecl(VarDecl *D);
Decl *VisitImplicitParamDecl(ImplicitParamDecl *D);
Decl *VisitParmVarDecl(ParmVarDecl *D);
Decl *VisitObjCMethodDecl(ObjCMethodDecl *D);
Decl *VisitObjCTypeParamDecl(ObjCTypeParamDecl *D);
Decl *VisitObjCCategoryDecl(ObjCCategoryDecl *D);
Decl *VisitObjCProtocolDecl(ObjCProtocolDecl *D);
Decl *VisitLinkageSpecDecl(LinkageSpecDecl *D);
ObjCTypeParamList *ImportObjCTypeParamList(ObjCTypeParamList *list);
Decl *VisitObjCInterfaceDecl(ObjCInterfaceDecl *D);
Decl *VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D);
Decl *VisitObjCImplementationDecl(ObjCImplementationDecl *D);
Decl *VisitObjCPropertyDecl(ObjCPropertyDecl *D);
Decl *VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D);
Decl *VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D);
Decl *VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D);
Decl *VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D);
Decl *VisitClassTemplateDecl(ClassTemplateDecl *D);
Decl *VisitClassTemplateSpecializationDecl(
ClassTemplateSpecializationDecl *D);
Decl *VisitVarTemplateDecl(VarTemplateDecl *D);
Decl *VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D);
// Importing statements
DeclGroupRef ImportDeclGroup(DeclGroupRef DG);
Stmt *VisitStmt(Stmt *S);
Stmt *VisitGCCAsmStmt(GCCAsmStmt *S);
Stmt *VisitDeclStmt(DeclStmt *S);
Stmt *VisitNullStmt(NullStmt *S);
Stmt *VisitCompoundStmt(CompoundStmt *S);
Stmt *VisitCaseStmt(CaseStmt *S);
Stmt *VisitDefaultStmt(DefaultStmt *S);
Stmt *VisitLabelStmt(LabelStmt *S);
Stmt *VisitAttributedStmt(AttributedStmt *S);
Stmt *VisitIfStmt(IfStmt *S);
Stmt *VisitSwitchStmt(SwitchStmt *S);
Stmt *VisitWhileStmt(WhileStmt *S);
Stmt *VisitDoStmt(DoStmt *S);
Stmt *VisitForStmt(ForStmt *S);
Stmt *VisitGotoStmt(GotoStmt *S);
Stmt *VisitIndirectGotoStmt(IndirectGotoStmt *S);
Stmt *VisitContinueStmt(ContinueStmt *S);
Stmt *VisitBreakStmt(BreakStmt *S);
Stmt *VisitReturnStmt(ReturnStmt *S);
// FIXME: MSAsmStmt
// FIXME: SEHExceptStmt
// FIXME: SEHFinallyStmt
// FIXME: SEHTryStmt
// FIXME: SEHLeaveStmt
// FIXME: CapturedStmt
Stmt *VisitCXXCatchStmt(CXXCatchStmt *S);
Stmt *VisitCXXTryStmt(CXXTryStmt *S);
Stmt *VisitCXXForRangeStmt(CXXForRangeStmt *S);
// FIXME: MSDependentExistsStmt
Stmt *VisitObjCForCollectionStmt(ObjCForCollectionStmt *S);
Stmt *VisitObjCAtCatchStmt(ObjCAtCatchStmt *S);
Stmt *VisitObjCAtFinallyStmt(ObjCAtFinallyStmt *S);
Stmt *VisitObjCAtTryStmt(ObjCAtTryStmt *S);
Stmt *VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *S);
Stmt *VisitObjCAtThrowStmt(ObjCAtThrowStmt *S);
Stmt *VisitObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *S);
// Importing expressions
Expr *VisitExpr(Expr *E);
Expr *VisitVAArgExpr(VAArgExpr *E);
Expr *VisitGNUNullExpr(GNUNullExpr *E);
Expr *VisitPredefinedExpr(PredefinedExpr *E);
Expr *VisitDeclRefExpr(DeclRefExpr *E);
Expr *VisitImplicitValueInitExpr(ImplicitValueInitExpr *ILE);
Expr *VisitDesignatedInitExpr(DesignatedInitExpr *E);
Expr *VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *E);
Expr *VisitIntegerLiteral(IntegerLiteral *E);
Expr *VisitFloatingLiteral(FloatingLiteral *E);
Expr *VisitCharacterLiteral(CharacterLiteral *E);
Expr *VisitStringLiteral(StringLiteral *E);
Expr *VisitCompoundLiteralExpr(CompoundLiteralExpr *E);
Expr *VisitAtomicExpr(AtomicExpr *E);
Expr *VisitAddrLabelExpr(AddrLabelExpr *E);
Expr *VisitParenExpr(ParenExpr *E);
Expr *VisitParenListExpr(ParenListExpr *E);
Expr *VisitStmtExpr(StmtExpr *E);
Expr *VisitUnaryOperator(UnaryOperator *E);
Expr *VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E);
Expr *VisitBinaryOperator(BinaryOperator *E);
Expr *VisitConditionalOperator(ConditionalOperator *E);
Expr *VisitBinaryConditionalOperator(BinaryConditionalOperator *E);
Expr *VisitOpaqueValueExpr(OpaqueValueExpr *E);
Expr *VisitArrayTypeTraitExpr(ArrayTypeTraitExpr *E);
Expr *VisitExpressionTraitExpr(ExpressionTraitExpr *E);
Expr *VisitArraySubscriptExpr(ArraySubscriptExpr *E);
Expr *VisitCompoundAssignOperator(CompoundAssignOperator *E);
Expr *VisitImplicitCastExpr(ImplicitCastExpr *E);
Expr *VisitExplicitCastExpr(ExplicitCastExpr *E);
Expr *VisitOffsetOfExpr(OffsetOfExpr *OE);
Expr *VisitCXXThrowExpr(CXXThrowExpr *E);
Expr *VisitCXXNoexceptExpr(CXXNoexceptExpr *E);
Expr *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E);
Expr *VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E);
Expr *VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E);
Expr *VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *CE);
Expr *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E);
Expr *VisitCXXNewExpr(CXXNewExpr *CE);
Expr *VisitCXXDeleteExpr(CXXDeleteExpr *E);
Expr *VisitCXXConstructExpr(CXXConstructExpr *E);
Expr *VisitCXXMemberCallExpr(CXXMemberCallExpr *E);
Expr *VisitExprWithCleanups(ExprWithCleanups *EWC);
Expr *VisitCXXThisExpr(CXXThisExpr *E);
Expr *VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E);
Expr *VisitMemberExpr(MemberExpr *E);
Expr *VisitCallExpr(CallExpr *E);
Expr *VisitInitListExpr(InitListExpr *E);
Expr *VisitArrayInitLoopExpr(ArrayInitLoopExpr *E);
Expr *VisitArrayInitIndexExpr(ArrayInitIndexExpr *E);
Expr *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E);
Expr *VisitCXXNamedCastExpr(CXXNamedCastExpr *E);
Expr *VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *E);
template<typename IIter, typename OIter>
void ImportArray(IIter Ibegin, IIter Iend, OIter Obegin) {
typedef typename std::remove_reference<decltype(*Obegin)>::type ItemT;
ASTImporter &ImporterRef = Importer;
std::transform(Ibegin, Iend, Obegin,
[&ImporterRef](ItemT From) -> ItemT {
return ImporterRef.Import(From);
});
}
template<typename IIter, typename OIter>
bool ImportArrayChecked(IIter Ibegin, IIter Iend, OIter Obegin) {
typedef typename std::remove_reference<decltype(**Obegin)>::type ItemT;
ASTImporter &ImporterRef = Importer;
bool Failed = false;
std::transform(Ibegin, Iend, Obegin,
[&ImporterRef, &Failed](ItemT *From) -> ItemT * {
ItemT *To = cast_or_null<ItemT>(
ImporterRef.Import(From));
if (!To && From)
Failed = true;
return To;
});
return Failed;
}
template<typename InContainerTy, typename OutContainerTy>
bool ImportContainerChecked(const InContainerTy &InContainer,
OutContainerTy &OutContainer) {
return ImportArrayChecked(InContainer.begin(), InContainer.end(),
OutContainer.begin());
}
template<typename InContainerTy, typename OIter>
bool ImportArrayChecked(const InContainerTy &InContainer, OIter Obegin) {
return ImportArrayChecked(InContainer.begin(), InContainer.end(), Obegin);
}
// Importing overrides.
void ImportOverrides(CXXMethodDecl *ToMethod, CXXMethodDecl *FromMethod);
};
}
//----------------------------------------------------------------------------
// Import Types
//----------------------------------------------------------------------------
using namespace clang;
QualType ASTNodeImporter::VisitType(const Type *T) {
Importer.FromDiag(SourceLocation(), diag::err_unsupported_ast_node)
<< T->getTypeClassName();
return QualType();
}
QualType ASTNodeImporter::VisitAtomicType(const AtomicType *T){
QualType UnderlyingType = Importer.Import(T->getValueType());
if(UnderlyingType.isNull())
return QualType();
return Importer.getToContext().getAtomicType(UnderlyingType);
}
QualType ASTNodeImporter::VisitBuiltinType(const BuiltinType *T) {
switch (T->getKind()) {
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case BuiltinType::Id: \
return Importer.getToContext().SingletonId;
#include "clang/Basic/OpenCLImageTypes.def"
#define SHARED_SINGLETON_TYPE(Expansion)
#define BUILTIN_TYPE(Id, SingletonId) \
case BuiltinType::Id: return Importer.getToContext().SingletonId;
#include "clang/AST/BuiltinTypes.def"
// FIXME: for Char16, Char32, and NullPtr, make sure that the "to"
// context supports C++.
// FIXME: for ObjCId, ObjCClass, and ObjCSel, make sure that the "to"
// context supports ObjC.
case BuiltinType::Char_U:
// The context we're importing from has an unsigned 'char'. If we're
// importing into a context with a signed 'char', translate to
// 'unsigned char' instead.
if (Importer.getToContext().getLangOpts().CharIsSigned)
return Importer.getToContext().UnsignedCharTy;
return Importer.getToContext().CharTy;
case BuiltinType::Char_S:
// The context we're importing from has an unsigned 'char'. If we're
// importing into a context with a signed 'char', translate to
// 'unsigned char' instead.
if (!Importer.getToContext().getLangOpts().CharIsSigned)
return Importer.getToContext().SignedCharTy;
return Importer.getToContext().CharTy;
case BuiltinType::WChar_S:
case BuiltinType::WChar_U:
// FIXME: If not in C++, shall we translate to the C equivalent of
// wchar_t?
return Importer.getToContext().WCharTy;
}
llvm_unreachable("Invalid BuiltinType Kind!");
}
QualType ASTNodeImporter::VisitDecayedType(const DecayedType *T) {
QualType OrigT = Importer.Import(T->getOriginalType());
if (OrigT.isNull())
return QualType();
return Importer.getToContext().getDecayedType(OrigT);
}
QualType ASTNodeImporter::VisitComplexType(const ComplexType *T) {
QualType ToElementType = Importer.Import(T->getElementType());
if (ToElementType.isNull())
return QualType();
return Importer.getToContext().getComplexType(ToElementType);
}
QualType ASTNodeImporter::VisitPointerType(const PointerType *T) {
QualType ToPointeeType = Importer.Import(T->getPointeeType());
if (ToPointeeType.isNull())
return QualType();
return Importer.getToContext().getPointerType(ToPointeeType);
}
QualType ASTNodeImporter::VisitBlockPointerType(const BlockPointerType *T) {
// FIXME: Check for blocks support in "to" context.
QualType ToPointeeType = Importer.Import(T->getPointeeType());
if (ToPointeeType.isNull())
return QualType();
return Importer.getToContext().getBlockPointerType(ToPointeeType);
}
QualType
ASTNodeImporter::VisitLValueReferenceType(const LValueReferenceType *T) {
// FIXME: Check for C++ support in "to" context.
QualType ToPointeeType = Importer.Import(T->getPointeeTypeAsWritten());
if (ToPointeeType.isNull())
return QualType();
return Importer.getToContext().getLValueReferenceType(ToPointeeType);
}
QualType
ASTNodeImporter::VisitRValueReferenceType(const RValueReferenceType *T) {
// FIXME: Check for C++0x support in "to" context.
QualType ToPointeeType = Importer.Import(T->getPointeeTypeAsWritten());
if (ToPointeeType.isNull())
return QualType();
return Importer.getToContext().getRValueReferenceType(ToPointeeType);
}
QualType ASTNodeImporter::VisitMemberPointerType(const MemberPointerType *T) {
// FIXME: Check for C++ support in "to" context.
QualType ToPointeeType = Importer.Import(T->getPointeeType());
if (ToPointeeType.isNull())
return QualType();
QualType ClassType = Importer.Import(QualType(T->getClass(), 0));
return Importer.getToContext().getMemberPointerType(ToPointeeType,
ClassType.getTypePtr());
}
QualType ASTNodeImporter::VisitConstantArrayType(const ConstantArrayType *T) {
QualType ToElementType = Importer.Import(T->getElementType());
if (ToElementType.isNull())
return QualType();
return Importer.getToContext().getConstantArrayType(ToElementType,
T->getSize(),
T->getSizeModifier(),
T->getIndexTypeCVRQualifiers());
}
QualType
ASTNodeImporter::VisitIncompleteArrayType(const IncompleteArrayType *T) {
QualType ToElementType = Importer.Import(T->getElementType());
if (ToElementType.isNull())
return QualType();
return Importer.getToContext().getIncompleteArrayType(ToElementType,
T->getSizeModifier(),
T->getIndexTypeCVRQualifiers());
}
QualType ASTNodeImporter::VisitVariableArrayType(const VariableArrayType *T) {
QualType ToElementType = Importer.Import(T->getElementType());
if (ToElementType.isNull())
return QualType();
Expr *Size = Importer.Import(T->getSizeExpr());
if (!Size)
return QualType();
SourceRange Brackets = Importer.Import(T->getBracketsRange());
return Importer.getToContext().getVariableArrayType(ToElementType, Size,
T->getSizeModifier(),
T->getIndexTypeCVRQualifiers(),
Brackets);
}
QualType ASTNodeImporter::VisitVectorType(const VectorType *T) {
QualType ToElementType = Importer.Import(T->getElementType());
if (ToElementType.isNull())
return QualType();
return Importer.getToContext().getVectorType(ToElementType,
T->getNumElements(),
T->getVectorKind());
}
QualType ASTNodeImporter::VisitExtVectorType(const ExtVectorType *T) {
QualType ToElementType = Importer.Import(T->getElementType());
if (ToElementType.isNull())
return QualType();
return Importer.getToContext().getExtVectorType(ToElementType,
T->getNumElements());
}
QualType
ASTNodeImporter::VisitFunctionNoProtoType(const FunctionNoProtoType *T) {
// FIXME: What happens if we're importing a function without a prototype
// into C++? Should we make it variadic?
QualType ToResultType = Importer.Import(T->getReturnType());
if (ToResultType.isNull())
return QualType();
return Importer.getToContext().getFunctionNoProtoType(ToResultType,
T->getExtInfo());
}
QualType ASTNodeImporter::VisitFunctionProtoType(const FunctionProtoType *T) {
QualType ToResultType = Importer.Import(T->getReturnType());
if (ToResultType.isNull())
return QualType();
// Import argument types
SmallVector<QualType, 4> ArgTypes;
for (const auto &A : T->param_types()) {
QualType ArgType = Importer.Import(A);
if (ArgType.isNull())
return QualType();
ArgTypes.push_back(ArgType);
}
// Import exception types
SmallVector<QualType, 4> ExceptionTypes;
for (const auto &E : T->exceptions()) {
QualType ExceptionType = Importer.Import(E);
if (ExceptionType.isNull())
return QualType();
ExceptionTypes.push_back(ExceptionType);
}
FunctionProtoType::ExtProtoInfo FromEPI = T->getExtProtoInfo();
FunctionProtoType::ExtProtoInfo ToEPI;
ToEPI.ExtInfo = FromEPI.ExtInfo;
ToEPI.Variadic = FromEPI.Variadic;
ToEPI.HasTrailingReturn = FromEPI.HasTrailingReturn;
ToEPI.TypeQuals = FromEPI.TypeQuals;
ToEPI.RefQualifier = FromEPI.RefQualifier;
ToEPI.ExceptionSpec.Type = FromEPI.ExceptionSpec.Type;
ToEPI.ExceptionSpec.Exceptions = ExceptionTypes;
ToEPI.ExceptionSpec.NoexceptExpr =
Importer.Import(FromEPI.ExceptionSpec.NoexceptExpr);
ToEPI.ExceptionSpec.SourceDecl = cast_or_null<FunctionDecl>(
Importer.Import(FromEPI.ExceptionSpec.SourceDecl));
ToEPI.ExceptionSpec.SourceTemplate = cast_or_null<FunctionDecl>(
Importer.Import(FromEPI.ExceptionSpec.SourceTemplate));
return Importer.getToContext().getFunctionType(ToResultType, ArgTypes, ToEPI);
}
QualType ASTNodeImporter::VisitParenType(const ParenType *T) {
QualType ToInnerType = Importer.Import(T->getInnerType());
if (ToInnerType.isNull())
return QualType();
return Importer.getToContext().getParenType(ToInnerType);
}
QualType ASTNodeImporter::VisitTypedefType(const TypedefType *T) {
TypedefNameDecl *ToDecl
= dyn_cast_or_null<TypedefNameDecl>(Importer.Import(T->getDecl()));
if (!ToDecl)
return QualType();
return Importer.getToContext().getTypeDeclType(ToDecl);
}
QualType ASTNodeImporter::VisitTypeOfExprType(const TypeOfExprType *T) {
Expr *ToExpr = Importer.Import(T->getUnderlyingExpr());
if (!ToExpr)
return QualType();
return Importer.getToContext().getTypeOfExprType(ToExpr);
}
QualType ASTNodeImporter::VisitTypeOfType(const TypeOfType *T) {
QualType ToUnderlyingType = Importer.Import(T->getUnderlyingType());
if (ToUnderlyingType.isNull())
return QualType();
return Importer.getToContext().getTypeOfType(ToUnderlyingType);
}
QualType ASTNodeImporter::VisitDecltypeType(const DecltypeType *T) {
// FIXME: Make sure that the "to" context supports C++0x!
Expr *ToExpr = Importer.Import(T->getUnderlyingExpr());
if (!ToExpr)
return QualType();
QualType UnderlyingType = Importer.Import(T->getUnderlyingType());
if (UnderlyingType.isNull())
return QualType();
return Importer.getToContext().getDecltypeType(ToExpr, UnderlyingType);
}
QualType ASTNodeImporter::VisitUnaryTransformType(const UnaryTransformType *T) {
QualType ToBaseType = Importer.Import(T->getBaseType());
QualType ToUnderlyingType = Importer.Import(T->getUnderlyingType());
if (ToBaseType.isNull() || ToUnderlyingType.isNull())
return QualType();
return Importer.getToContext().getUnaryTransformType(ToBaseType,
ToUnderlyingType,
T->getUTTKind());
}
QualType ASTNodeImporter::VisitAutoType(const AutoType *T) {
// FIXME: Make sure that the "to" context supports C++11!
QualType FromDeduced = T->getDeducedType();
QualType ToDeduced;
if (!FromDeduced.isNull()) {
ToDeduced = Importer.Import(FromDeduced);
if (ToDeduced.isNull())
return QualType();
}
return Importer.getToContext().getAutoType(ToDeduced, T->getKeyword(),
/*IsDependent*/false);
}
QualType ASTNodeImporter::VisitInjectedClassNameType(
const InjectedClassNameType *T) {
CXXRecordDecl *D = cast_or_null<CXXRecordDecl>(Importer.Import(T->getDecl()));
if (!D)
return QualType();
QualType InjType = Importer.Import(T->getInjectedSpecializationType());
if (InjType.isNull())
return QualType();
// FIXME: ASTContext::getInjectedClassNameType is not suitable for AST reading
// See comments in InjectedClassNameType definition for details
// return Importer.getToContext().getInjectedClassNameType(D, InjType);
enum {
TypeAlignmentInBits = 4,
TypeAlignment = 1 << TypeAlignmentInBits
};
return QualType(new (Importer.getToContext(), TypeAlignment)
InjectedClassNameType(D, InjType), 0);
}
QualType ASTNodeImporter::VisitRecordType(const RecordType *T) {
RecordDecl *ToDecl
= dyn_cast_or_null<RecordDecl>(Importer.Import(T->getDecl()));
if (!ToDecl)
return QualType();
return Importer.getToContext().getTagDeclType(ToDecl);
}
QualType ASTNodeImporter::VisitEnumType(const EnumType *T) {
EnumDecl *ToDecl
= dyn_cast_or_null<EnumDecl>(Importer.Import(T->getDecl()));
if (!ToDecl)
return QualType();
return Importer.getToContext().getTagDeclType(ToDecl);
}
QualType ASTNodeImporter::VisitAttributedType(const AttributedType *T) {
QualType FromModifiedType = T->getModifiedType();
QualType FromEquivalentType = T->getEquivalentType();
QualType ToModifiedType;
QualType ToEquivalentType;
if (!FromModifiedType.isNull()) {
ToModifiedType = Importer.Import(FromModifiedType);
if (ToModifiedType.isNull())
return QualType();
}
if (!FromEquivalentType.isNull()) {
ToEquivalentType = Importer.Import(FromEquivalentType);
if (ToEquivalentType.isNull())
return QualType();
}
return Importer.getToContext().getAttributedType(T->getAttrKind(),
ToModifiedType, ToEquivalentType);
}
QualType ASTNodeImporter::VisitTemplateTypeParmType(
const TemplateTypeParmType *T) {
TemplateTypeParmDecl *ParmDecl =
cast_or_null<TemplateTypeParmDecl>(Importer.Import(T->getDecl()));
if (!ParmDecl && T->getDecl())
return QualType();
return Importer.getToContext().getTemplateTypeParmType(
T->getDepth(), T->getIndex(), T->isParameterPack(), ParmDecl);
}
QualType ASTNodeImporter::VisitSubstTemplateTypeParmType(
const SubstTemplateTypeParmType *T) {
const TemplateTypeParmType *Replaced =
cast_or_null<TemplateTypeParmType>(Importer.Import(
QualType(T->getReplacedParameter(), 0)).getTypePtr());
if (!Replaced)
return QualType();
QualType Replacement = Importer.Import(T->getReplacementType());
if (Replacement.isNull())
return QualType();
Replacement = Replacement.getCanonicalType();
return Importer.getToContext().getSubstTemplateTypeParmType(
Replaced, Replacement);
}
QualType ASTNodeImporter::VisitTemplateSpecializationType(
const TemplateSpecializationType *T) {
TemplateName ToTemplate = Importer.Import(T->getTemplateName());
if (ToTemplate.isNull())
return QualType();
SmallVector<TemplateArgument, 2> ToTemplateArgs;
if (ImportTemplateArguments(T->getArgs(), T->getNumArgs(), ToTemplateArgs))
return QualType();
QualType ToCanonType;
if (!QualType(T, 0).isCanonical()) {
QualType FromCanonType
= Importer.getFromContext().getCanonicalType(QualType(T, 0));
ToCanonType =Importer.Import(FromCanonType);
if (ToCanonType.isNull())
return QualType();
}
return Importer.getToContext().getTemplateSpecializationType(ToTemplate,
ToTemplateArgs,
ToCanonType);
}
QualType ASTNodeImporter::VisitElaboratedType(const ElaboratedType *T) {
NestedNameSpecifier *ToQualifier = nullptr;
// Note: the qualifier in an ElaboratedType is optional.
if (T->getQualifier()) {
ToQualifier = Importer.Import(T->getQualifier());
if (!ToQualifier)
return QualType();
}
QualType ToNamedType = Importer.Import(T->getNamedType());
if (ToNamedType.isNull())
return QualType();
return Importer.getToContext().getElaboratedType(T->getKeyword(),
ToQualifier, ToNamedType);
}
QualType ASTNodeImporter::VisitObjCInterfaceType(const ObjCInterfaceType *T) {
ObjCInterfaceDecl *Class
= dyn_cast_or_null<ObjCInterfaceDecl>(Importer.Import(T->getDecl()));
if (!Class)
return QualType();
return Importer.getToContext().getObjCInterfaceType(Class);
}
QualType ASTNodeImporter::VisitObjCObjectType(const ObjCObjectType *T) {
QualType ToBaseType = Importer.Import(T->getBaseType());
if (ToBaseType.isNull())
return QualType();
SmallVector<QualType, 4> TypeArgs;
for (auto TypeArg : T->getTypeArgsAsWritten()) {
QualType ImportedTypeArg = Importer.Import(TypeArg);
if (ImportedTypeArg.isNull())
return QualType();
TypeArgs.push_back(ImportedTypeArg);
}
SmallVector<ObjCProtocolDecl *, 4> Protocols;
for (auto *P : T->quals()) {
ObjCProtocolDecl *Protocol
= dyn_cast_or_null<ObjCProtocolDecl>(Importer.Import(P));
if (!Protocol)
return QualType();
Protocols.push_back(Protocol);
}
return Importer.getToContext().getObjCObjectType(ToBaseType, TypeArgs,
Protocols,
T->isKindOfTypeAsWritten());
}
QualType
ASTNodeImporter::VisitObjCObjectPointerType(const ObjCObjectPointerType *T) {
QualType ToPointeeType = Importer.Import(T->getPointeeType());
if (ToPointeeType.isNull())
return QualType();
return Importer.getToContext().getObjCObjectPointerType(ToPointeeType);
}
//----------------------------------------------------------------------------
// Import Declarations
//----------------------------------------------------------------------------
bool ASTNodeImporter::ImportDeclParts(NamedDecl *D, DeclContext *&DC,
DeclContext *&LexicalDC,
DeclarationName &Name,
NamedDecl *&ToD,
SourceLocation &Loc) {
// Import the context of this declaration.
DC = Importer.ImportContext(D->getDeclContext());
if (!DC)
return true;
LexicalDC = DC;
if (D->getDeclContext() != D->getLexicalDeclContext()) {
LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
if (!LexicalDC)
return true;
}
// Import the name of this declaration.
Name = Importer.Import(D->getDeclName());
if (D->getDeclName() && !Name)
return true;
// Import the location of this declaration.
Loc = Importer.Import(D->getLocation());
ToD = cast_or_null<NamedDecl>(Importer.GetAlreadyImportedOrNull(D));
return false;
}
void ASTNodeImporter::ImportDefinitionIfNeeded(Decl *FromD, Decl *ToD) {
if (!FromD)
return;
if (!ToD) {
ToD = Importer.Import(FromD);
if (!ToD)
return;
}
if (RecordDecl *FromRecord = dyn_cast<RecordDecl>(FromD)) {
if (RecordDecl *ToRecord = cast_or_null<RecordDecl>(ToD)) {
if (FromRecord->getDefinition() && FromRecord->isCompleteDefinition() && !ToRecord->getDefinition()) {
ImportDefinition(FromRecord, ToRecord);
}
}
return;
}
if (EnumDecl *FromEnum = dyn_cast<EnumDecl>(FromD)) {
if (EnumDecl *ToEnum = cast_or_null<EnumDecl>(ToD)) {
if (FromEnum->getDefinition() && !ToEnum->getDefinition()) {
ImportDefinition(FromEnum, ToEnum);
}
}
return;
}
}
void
ASTNodeImporter::ImportDeclarationNameLoc(const DeclarationNameInfo &From,
DeclarationNameInfo& To) {
// NOTE: To.Name and To.Loc are already imported.
// We only have to import To.LocInfo.
switch (To.getName().getNameKind()) {
case DeclarationName::Identifier:
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
case DeclarationName::CXXUsingDirective:
case DeclarationName::CXXDeductionGuideName:
return;
case DeclarationName::CXXOperatorName: {
SourceRange Range = From.getCXXOperatorNameRange();
To.setCXXOperatorNameRange(Importer.Import(Range));
return;
}
case DeclarationName::CXXLiteralOperatorName: {
SourceLocation Loc = From.getCXXLiteralOperatorNameLoc();
To.setCXXLiteralOperatorNameLoc(Importer.Import(Loc));
return;
}
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
case DeclarationName::CXXConversionFunctionName: {
TypeSourceInfo *FromTInfo = From.getNamedTypeInfo();
To.setNamedTypeInfo(Importer.Import(FromTInfo));
return;
}
}
llvm_unreachable("Unknown name kind.");
}
void ASTNodeImporter::ImportDeclContext(DeclContext *FromDC, bool ForceImport) {
if (Importer.isMinimalImport() && !ForceImport) {
Importer.ImportContext(FromDC);
return;
}
for (auto *From : FromDC->decls())
Importer.Import(From);
}
bool ASTNodeImporter::ImportDefinition(RecordDecl *From, RecordDecl *To,
ImportDefinitionKind Kind) {
if (To->getDefinition() || To->isBeingDefined()) {
if (Kind == IDK_Everything)
ImportDeclContext(From, /*ForceImport=*/true);
return false;
}
To->startDefinition();
// Add base classes.
if (CXXRecordDecl *ToCXX = dyn_cast<CXXRecordDecl>(To)) {
CXXRecordDecl *FromCXX = cast<CXXRecordDecl>(From);
struct CXXRecordDecl::DefinitionData &ToData = ToCXX->data();
struct CXXRecordDecl::DefinitionData &FromData = FromCXX->data();
ToData.UserDeclaredConstructor = FromData.UserDeclaredConstructor;
ToData.UserDeclaredSpecialMembers = FromData.UserDeclaredSpecialMembers;
ToData.Aggregate = FromData.Aggregate;
ToData.PlainOldData = FromData.PlainOldData;
ToData.Empty = FromData.Empty;
ToData.Polymorphic = FromData.Polymorphic;
ToData.Abstract = FromData.Abstract;
ToData.IsStandardLayout = FromData.IsStandardLayout;
ToData.HasNoNonEmptyBases = FromData.HasNoNonEmptyBases;
ToData.HasPrivateFields = FromData.HasPrivateFields;
ToData.HasProtectedFields = FromData.HasProtectedFields;
ToData.HasPublicFields = FromData.HasPublicFields;
ToData.HasMutableFields = FromData.HasMutableFields;
ToData.HasVariantMembers = FromData.HasVariantMembers;
ToData.HasOnlyCMembers = FromData.HasOnlyCMembers;
ToData.HasInClassInitializer = FromData.HasInClassInitializer;
ToData.HasUninitializedReferenceMember
= FromData.HasUninitializedReferenceMember;
ToData.HasUninitializedFields = FromData.HasUninitializedFields;
ToData.HasInheritedConstructor = FromData.HasInheritedConstructor;
ToData.HasInheritedAssignment = FromData.HasInheritedAssignment;
+ ToData.NeedOverloadResolutionForCopyConstructor
+ = FromData.NeedOverloadResolutionForCopyConstructor;
ToData.NeedOverloadResolutionForMoveConstructor
= FromData.NeedOverloadResolutionForMoveConstructor;
ToData.NeedOverloadResolutionForMoveAssignment
= FromData.NeedOverloadResolutionForMoveAssignment;
ToData.NeedOverloadResolutionForDestructor
= FromData.NeedOverloadResolutionForDestructor;
+ ToData.DefaultedCopyConstructorIsDeleted
+ = FromData.DefaultedCopyConstructorIsDeleted;
ToData.DefaultedMoveConstructorIsDeleted
= FromData.DefaultedMoveConstructorIsDeleted;
ToData.DefaultedMoveAssignmentIsDeleted
= FromData.DefaultedMoveAssignmentIsDeleted;
ToData.DefaultedDestructorIsDeleted = FromData.DefaultedDestructorIsDeleted;
ToData.HasTrivialSpecialMembers = FromData.HasTrivialSpecialMembers;
ToData.HasIrrelevantDestructor = FromData.HasIrrelevantDestructor;
ToData.HasConstexprNonCopyMoveConstructor
= FromData.HasConstexprNonCopyMoveConstructor;
ToData.HasDefaultedDefaultConstructor
= FromData.HasDefaultedDefaultConstructor;
+ ToData.CanPassInRegisters = FromData.CanPassInRegisters;
ToData.DefaultedDefaultConstructorIsConstexpr
= FromData.DefaultedDefaultConstructorIsConstexpr;
ToData.HasConstexprDefaultConstructor
= FromData.HasConstexprDefaultConstructor;
ToData.HasNonLiteralTypeFieldsOrBases
= FromData.HasNonLiteralTypeFieldsOrBases;
// ComputedVisibleConversions not imported.
ToData.UserProvidedDefaultConstructor
= FromData.UserProvidedDefaultConstructor;
ToData.DeclaredSpecialMembers = FromData.DeclaredSpecialMembers;
ToData.ImplicitCopyConstructorCanHaveConstParamForVBase
= FromData.ImplicitCopyConstructorCanHaveConstParamForVBase;
ToData.ImplicitCopyConstructorCanHaveConstParamForNonVBase
= FromData.ImplicitCopyConstructorCanHaveConstParamForNonVBase;
ToData.ImplicitCopyAssignmentHasConstParam
= FromData.ImplicitCopyAssignmentHasConstParam;
ToData.HasDeclaredCopyConstructorWithConstParam
= FromData.HasDeclaredCopyConstructorWithConstParam;
ToData.HasDeclaredCopyAssignmentWithConstParam
= FromData.HasDeclaredCopyAssignmentWithConstParam;
ToData.IsLambda = FromData.IsLambda;
SmallVector<CXXBaseSpecifier *, 4> Bases;
for (const auto &Base1 : FromCXX->bases()) {
QualType T = Importer.Import(Base1.getType());
if (T.isNull())
return true;
SourceLocation EllipsisLoc;
if (Base1.isPackExpansion())
EllipsisLoc = Importer.Import(Base1.getEllipsisLoc());
// Ensure that we have a definition for the base.
ImportDefinitionIfNeeded(Base1.getType()->getAsCXXRecordDecl());
Bases.push_back(
new (Importer.getToContext())
CXXBaseSpecifier(Importer.Import(Base1.getSourceRange()),
Base1.isVirtual(),
Base1.isBaseOfClass(),
Base1.getAccessSpecifierAsWritten(),
Importer.Import(Base1.getTypeSourceInfo()),
EllipsisLoc));
}
if (!Bases.empty())
ToCXX->setBases(Bases.data(), Bases.size());
}
if (shouldForceImportDeclContext(Kind))
ImportDeclContext(From, /*ForceImport=*/true);
To->completeDefinition();
return false;
}
bool ASTNodeImporter::ImportDefinition(VarDecl *From, VarDecl *To,
ImportDefinitionKind Kind) {
if (To->getAnyInitializer())
return false;
// FIXME: Can we really import any initializer? Alternatively, we could force
// ourselves to import every declaration of a variable and then only use
// getInit() here.
To->setInit(Importer.Import(const_cast<Expr *>(From->getAnyInitializer())));
// FIXME: Other bits to merge?
return false;
}
bool ASTNodeImporter::ImportDefinition(EnumDecl *From, EnumDecl *To,
ImportDefinitionKind Kind) {
if (To->getDefinition() || To->isBeingDefined()) {
if (Kind == IDK_Everything)
ImportDeclContext(From, /*ForceImport=*/true);
return false;
}
To->startDefinition();
QualType T = Importer.Import(Importer.getFromContext().getTypeDeclType(From));
if (T.isNull())
return true;
QualType ToPromotionType = Importer.Import(From->getPromotionType());
if (ToPromotionType.isNull())
return true;
if (shouldForceImportDeclContext(Kind))
ImportDeclContext(From, /*ForceImport=*/true);
// FIXME: we might need to merge the number of positive or negative bits
// if the enumerator lists don't match.
To->completeDefinition(T, ToPromotionType,
From->getNumPositiveBits(),
From->getNumNegativeBits());
return false;
}
TemplateParameterList *ASTNodeImporter::ImportTemplateParameterList(
TemplateParameterList *Params) {
SmallVector<NamedDecl *, 4> ToParams(Params->size());
if (ImportContainerChecked(*Params, ToParams))
return nullptr;
Expr *ToRequiresClause;
if (Expr *const R = Params->getRequiresClause()) {
ToRequiresClause = Importer.Import(R);
if (!ToRequiresClause)
return nullptr;
} else {
ToRequiresClause = nullptr;
}
return TemplateParameterList::Create(Importer.getToContext(),
Importer.Import(Params->getTemplateLoc()),
Importer.Import(Params->getLAngleLoc()),
ToParams,
Importer.Import(Params->getRAngleLoc()),
ToRequiresClause);
}
TemplateArgument
ASTNodeImporter::ImportTemplateArgument(const TemplateArgument &From) {
switch (From.getKind()) {
case TemplateArgument::Null:
return TemplateArgument();
case TemplateArgument::Type: {
QualType ToType = Importer.Import(From.getAsType());
if (ToType.isNull())
return TemplateArgument();
return TemplateArgument(ToType);
}
case TemplateArgument::Integral: {
QualType ToType = Importer.Import(From.getIntegralType());
if (ToType.isNull())
return TemplateArgument();
return TemplateArgument(From, ToType);
}
case TemplateArgument::Declaration: {
ValueDecl *To = cast_or_null<ValueDecl>(Importer.Import(From.getAsDecl()));
QualType ToType = Importer.Import(From.getParamTypeForDecl());
if (!To || ToType.isNull())
return TemplateArgument();
return TemplateArgument(To, ToType);
}
case TemplateArgument::NullPtr: {
QualType ToType = Importer.Import(From.getNullPtrType());
if (ToType.isNull())
return TemplateArgument();
return TemplateArgument(ToType, /*isNullPtr*/true);
}
case TemplateArgument::Template: {
TemplateName ToTemplate = Importer.Import(From.getAsTemplate());
if (ToTemplate.isNull())
return TemplateArgument();
return TemplateArgument(ToTemplate);
}
case TemplateArgument::TemplateExpansion: {
TemplateName ToTemplate
= Importer.Import(From.getAsTemplateOrTemplatePattern());
if (ToTemplate.isNull())
return TemplateArgument();
return TemplateArgument(ToTemplate, From.getNumTemplateExpansions());
}
case TemplateArgument::Expression:
if (Expr *ToExpr = Importer.Import(From.getAsExpr()))
return TemplateArgument(ToExpr);
return TemplateArgument();
case TemplateArgument::Pack: {
SmallVector<TemplateArgument, 2> ToPack;
ToPack.reserve(From.pack_size());
if (ImportTemplateArguments(From.pack_begin(), From.pack_size(), ToPack))
return TemplateArgument();
return TemplateArgument(
llvm::makeArrayRef(ToPack).copy(Importer.getToContext()));
}
}
llvm_unreachable("Invalid template argument kind");
}
TemplateArgumentLoc ASTNodeImporter::ImportTemplateArgumentLoc(
const TemplateArgumentLoc &TALoc, bool &Error) {
Error = false;
TemplateArgument Arg = ImportTemplateArgument(TALoc.getArgument());
TemplateArgumentLocInfo FromInfo = TALoc.getLocInfo();
TemplateArgumentLocInfo ToInfo;
if (Arg.getKind() == TemplateArgument::Expression) {
Expr *E = Importer.Import(FromInfo.getAsExpr());
ToInfo = TemplateArgumentLocInfo(E);
if (!E)
Error = true;
} else if (Arg.getKind() == TemplateArgument::Type) {
if (TypeSourceInfo *TSI = Importer.Import(FromInfo.getAsTypeSourceInfo()))
ToInfo = TemplateArgumentLocInfo(TSI);
else
Error = true;
} else {
ToInfo = TemplateArgumentLocInfo(
Importer.Import(FromInfo.getTemplateQualifierLoc()),
Importer.Import(FromInfo.getTemplateNameLoc()),
Importer.Import(FromInfo.getTemplateEllipsisLoc()));
}
return TemplateArgumentLoc(Arg, ToInfo);
}
bool ASTNodeImporter::ImportTemplateArguments(const TemplateArgument *FromArgs,
unsigned NumFromArgs,
SmallVectorImpl<TemplateArgument> &ToArgs) {
for (unsigned I = 0; I != NumFromArgs; ++I) {
TemplateArgument To = ImportTemplateArgument(FromArgs[I]);
if (To.isNull() && !FromArgs[I].isNull())
return true;
ToArgs.push_back(To);
}
return false;
}
bool ASTNodeImporter::IsStructuralMatch(RecordDecl *FromRecord,
RecordDecl *ToRecord, bool Complain) {
// Eliminate a potential failure point where we attempt to re-import
// something we're trying to import while completing ToRecord.
Decl *ToOrigin = Importer.GetOriginalDecl(ToRecord);
if (ToOrigin) {
RecordDecl *ToOriginRecord = dyn_cast<RecordDecl>(ToOrigin);
if (ToOriginRecord)
ToRecord = ToOriginRecord;
}
StructuralEquivalenceContext Ctx(Importer.getFromContext(),
ToRecord->getASTContext(),
Importer.getNonEquivalentDecls(),
false, Complain);
return Ctx.IsStructurallyEquivalent(FromRecord, ToRecord);
}
bool ASTNodeImporter::IsStructuralMatch(VarDecl *FromVar, VarDecl *ToVar,
bool Complain) {
StructuralEquivalenceContext Ctx(
Importer.getFromContext(), Importer.getToContext(),
Importer.getNonEquivalentDecls(), false, Complain);
return Ctx.IsStructurallyEquivalent(FromVar, ToVar);
}
bool ASTNodeImporter::IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToEnum) {
StructuralEquivalenceContext Ctx(Importer.getFromContext(),
Importer.getToContext(),
Importer.getNonEquivalentDecls());
return Ctx.IsStructurallyEquivalent(FromEnum, ToEnum);
}
bool ASTNodeImporter::IsStructuralMatch(EnumConstantDecl *FromEC,
EnumConstantDecl *ToEC)
{
const llvm::APSInt &FromVal = FromEC->getInitVal();
const llvm::APSInt &ToVal = ToEC->getInitVal();
return FromVal.isSigned() == ToVal.isSigned() &&
FromVal.getBitWidth() == ToVal.getBitWidth() &&
FromVal == ToVal;
}
bool ASTNodeImporter::IsStructuralMatch(ClassTemplateDecl *From,
ClassTemplateDecl *To) {
StructuralEquivalenceContext Ctx(Importer.getFromContext(),
Importer.getToContext(),
Importer.getNonEquivalentDecls());
return Ctx.IsStructurallyEquivalent(From, To);
}
bool ASTNodeImporter::IsStructuralMatch(VarTemplateDecl *From,
VarTemplateDecl *To) {
StructuralEquivalenceContext Ctx(Importer.getFromContext(),
Importer.getToContext(),
Importer.getNonEquivalentDecls());
return Ctx.IsStructurallyEquivalent(From, To);
}
Decl *ASTNodeImporter::VisitDecl(Decl *D) {
Importer.FromDiag(D->getLocation(), diag::err_unsupported_ast_node)
<< D->getDeclKindName();
return nullptr;
}
Decl *ASTNodeImporter::VisitTranslationUnitDecl(TranslationUnitDecl *D) {
TranslationUnitDecl *ToD =
Importer.getToContext().getTranslationUnitDecl();
Importer.Imported(D, ToD);
return ToD;
}
Decl *ASTNodeImporter::VisitAccessSpecDecl(AccessSpecDecl *D) {
SourceLocation Loc = Importer.Import(D->getLocation());
SourceLocation ColonLoc = Importer.Import(D->getColonLoc());
// Import the context of this declaration.
DeclContext *DC = Importer.ImportContext(D->getDeclContext());
if (!DC)
return nullptr;
AccessSpecDecl *accessSpecDecl
= AccessSpecDecl::Create(Importer.getToContext(), D->getAccess(),
DC, Loc, ColonLoc);
if (!accessSpecDecl)
return nullptr;
// Lexical DeclContext and Semantic DeclContext
// is always the same for the accessSpec.
accessSpecDecl->setLexicalDeclContext(DC);
DC->addDeclInternal(accessSpecDecl);
return accessSpecDecl;
}
Decl *ASTNodeImporter::VisitStaticAssertDecl(StaticAssertDecl *D) {
DeclContext *DC = Importer.ImportContext(D->getDeclContext());
if (!DC)
return nullptr;
DeclContext *LexicalDC = DC;
// Import the location of this declaration.
SourceLocation Loc = Importer.Import(D->getLocation());
Expr *AssertExpr = Importer.Import(D->getAssertExpr());
if (!AssertExpr)
return nullptr;
StringLiteral *FromMsg = D->getMessage();
StringLiteral *ToMsg = cast_or_null<StringLiteral>(Importer.Import(FromMsg));
if (!ToMsg && FromMsg)
return nullptr;
StaticAssertDecl *ToD = StaticAssertDecl::Create(
Importer.getToContext(), DC, Loc, AssertExpr, ToMsg,
Importer.Import(D->getRParenLoc()), D->isFailed());
ToD->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(ToD);
Importer.Imported(D, ToD);
return ToD;
}
Decl *ASTNodeImporter::VisitNamespaceDecl(NamespaceDecl *D) {
// Import the major distinguishing characteristics of this namespace.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
NamespaceDecl *MergeWithNamespace = nullptr;
if (!Name) {
// This is an anonymous namespace. Adopt an existing anonymous
// namespace if we can.
// FIXME: Not testable.
if (TranslationUnitDecl *TU = dyn_cast<TranslationUnitDecl>(DC))
MergeWithNamespace = TU->getAnonymousNamespace();
else
MergeWithNamespace = cast<NamespaceDecl>(DC)->getAnonymousNamespace();
} else {
SmallVector<NamedDecl *, 4> ConflictingDecls;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(Decl::IDNS_Namespace))
continue;
if (NamespaceDecl *FoundNS = dyn_cast<NamespaceDecl>(FoundDecls[I])) {
MergeWithNamespace = FoundNS;
ConflictingDecls.clear();
break;
}
ConflictingDecls.push_back(FoundDecls[I]);
}
if (!ConflictingDecls.empty()) {
Name = Importer.HandleNameConflict(Name, DC, Decl::IDNS_Namespace,
ConflictingDecls.data(),
ConflictingDecls.size());
}
}
// Create the "to" namespace, if needed.
NamespaceDecl *ToNamespace = MergeWithNamespace;
if (!ToNamespace) {
ToNamespace = NamespaceDecl::Create(Importer.getToContext(), DC,
D->isInline(),
Importer.Import(D->getLocStart()),
Loc, Name.getAsIdentifierInfo(),
/*PrevDecl=*/nullptr);
ToNamespace->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(ToNamespace);
// If this is an anonymous namespace, register it as the anonymous
// namespace within its context.
if (!Name) {
if (TranslationUnitDecl *TU = dyn_cast<TranslationUnitDecl>(DC))
TU->setAnonymousNamespace(ToNamespace);
else
cast<NamespaceDecl>(DC)->setAnonymousNamespace(ToNamespace);
}
}
Importer.Imported(D, ToNamespace);
ImportDeclContext(D);
return ToNamespace;
}
Decl *ASTNodeImporter::VisitTypedefNameDecl(TypedefNameDecl *D, bool IsAlias) {
// Import the major distinguishing characteristics of this typedef.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// If this typedef is not in block scope, determine whether we've
// seen a typedef with the same name (that we can merge with) or any
// other entity by that name (which name lookup could conflict with).
if (!DC->isFunctionOrMethod()) {
SmallVector<NamedDecl *, 4> ConflictingDecls;
unsigned IDNS = Decl::IDNS_Ordinary;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
continue;
if (TypedefNameDecl *FoundTypedef =
dyn_cast<TypedefNameDecl>(FoundDecls[I])) {
if (Importer.IsStructurallyEquivalent(D->getUnderlyingType(),
FoundTypedef->getUnderlyingType()))
return Importer.Imported(D, FoundTypedef);
}
ConflictingDecls.push_back(FoundDecls[I]);
}
if (!ConflictingDecls.empty()) {
Name = Importer.HandleNameConflict(Name, DC, IDNS,
ConflictingDecls.data(),
ConflictingDecls.size());
if (!Name)
return nullptr;
}
}
// Import the underlying type of this typedef;
QualType T = Importer.Import(D->getUnderlyingType());
if (T.isNull())
return nullptr;
// Create the new typedef node.
TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
SourceLocation StartL = Importer.Import(D->getLocStart());
TypedefNameDecl *ToTypedef;
if (IsAlias)
ToTypedef = TypeAliasDecl::Create(Importer.getToContext(), DC,
StartL, Loc,
Name.getAsIdentifierInfo(),
TInfo);
else
ToTypedef = TypedefDecl::Create(Importer.getToContext(), DC,
StartL, Loc,
Name.getAsIdentifierInfo(),
TInfo);
ToTypedef->setAccess(D->getAccess());
ToTypedef->setLexicalDeclContext(LexicalDC);
Importer.Imported(D, ToTypedef);
LexicalDC->addDeclInternal(ToTypedef);
return ToTypedef;
}
Decl *ASTNodeImporter::VisitTypedefDecl(TypedefDecl *D) {
return VisitTypedefNameDecl(D, /*IsAlias=*/false);
}
Decl *ASTNodeImporter::VisitTypeAliasDecl(TypeAliasDecl *D) {
return VisitTypedefNameDecl(D, /*IsAlias=*/true);
}
Decl *ASTNodeImporter::VisitLabelDecl(LabelDecl *D) {
// Import the major distinguishing characteristics of this label.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
assert(LexicalDC->isFunctionOrMethod());
LabelDecl *ToLabel = D->isGnuLocal()
? LabelDecl::Create(Importer.getToContext(),
DC, Importer.Import(D->getLocation()),
Name.getAsIdentifierInfo(),
Importer.Import(D->getLocStart()))
: LabelDecl::Create(Importer.getToContext(),
DC, Importer.Import(D->getLocation()),
Name.getAsIdentifierInfo());
Importer.Imported(D, ToLabel);
LabelStmt *Label = cast_or_null<LabelStmt>(Importer.Import(D->getStmt()));
if (!Label)
return nullptr;
ToLabel->setStmt(Label);
ToLabel->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(ToLabel);
return ToLabel;
}
Decl *ASTNodeImporter::VisitEnumDecl(EnumDecl *D) {
// Import the major distinguishing characteristics of this enum.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// Figure out what enum name we're looking for.
unsigned IDNS = Decl::IDNS_Tag;
DeclarationName SearchName = Name;
if (!SearchName && D->getTypedefNameForAnonDecl()) {
SearchName = Importer.Import(D->getTypedefNameForAnonDecl()->getDeclName());
IDNS = Decl::IDNS_Ordinary;
} else if (Importer.getToContext().getLangOpts().CPlusPlus)
IDNS |= Decl::IDNS_Ordinary;
// We may already have an enum of the same name; try to find and match it.
if (!DC->isFunctionOrMethod() && SearchName) {
SmallVector<NamedDecl *, 4> ConflictingDecls;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(SearchName, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
continue;
Decl *Found = FoundDecls[I];
if (TypedefNameDecl *Typedef = dyn_cast<TypedefNameDecl>(Found)) {
if (const TagType *Tag = Typedef->getUnderlyingType()->getAs<TagType>())
Found = Tag->getDecl();
}
if (EnumDecl *FoundEnum = dyn_cast<EnumDecl>(Found)) {
if (IsStructuralMatch(D, FoundEnum))
return Importer.Imported(D, FoundEnum);
}
ConflictingDecls.push_back(FoundDecls[I]);
}
if (!ConflictingDecls.empty()) {
Name = Importer.HandleNameConflict(Name, DC, IDNS,
ConflictingDecls.data(),
ConflictingDecls.size());
}
}
// Create the enum declaration.
EnumDecl *D2 = EnumDecl::Create(Importer.getToContext(), DC,
Importer.Import(D->getLocStart()),
Loc, Name.getAsIdentifierInfo(), nullptr,
D->isScoped(), D->isScopedUsingClassTag(),
D->isFixed());
// Import the qualifier, if any.
D2->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
D2->setAccess(D->getAccess());
D2->setLexicalDeclContext(LexicalDC);
Importer.Imported(D, D2);
LexicalDC->addDeclInternal(D2);
// Import the integer type.
QualType ToIntegerType = Importer.Import(D->getIntegerType());
if (ToIntegerType.isNull())
return nullptr;
D2->setIntegerType(ToIntegerType);
// Import the definition
if (D->isCompleteDefinition() && ImportDefinition(D, D2))
return nullptr;
return D2;
}
Decl *ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
// If this record has a definition in the translation unit we're coming from,
// but this particular declaration is not that definition, import the
// definition and map to that.
TagDecl *Definition = D->getDefinition();
if (Definition && Definition != D) {
Decl *ImportedDef = Importer.Import(Definition);
if (!ImportedDef)
return nullptr;
return Importer.Imported(D, ImportedDef);
}
// Import the major distinguishing characteristics of this record.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// Figure out what structure name we're looking for.
unsigned IDNS = Decl::IDNS_Tag;
DeclarationName SearchName = Name;
if (!SearchName && D->getTypedefNameForAnonDecl()) {
SearchName = Importer.Import(D->getTypedefNameForAnonDecl()->getDeclName());
IDNS = Decl::IDNS_Ordinary;
} else if (Importer.getToContext().getLangOpts().CPlusPlus)
IDNS |= Decl::IDNS_Ordinary;
// We may already have a record of the same name; try to find and match it.
RecordDecl *AdoptDecl = nullptr;
RecordDecl *PrevDecl = nullptr;
if (!DC->isFunctionOrMethod()) {
SmallVector<NamedDecl *, 4> ConflictingDecls;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(SearchName, FoundDecls);
if (!FoundDecls.empty()) {
// We're going to have to compare D against potentially conflicting Decls, so complete it.
if (D->hasExternalLexicalStorage() && !D->isCompleteDefinition())
D->getASTContext().getExternalSource()->CompleteType(D);
}
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
continue;
Decl *Found = FoundDecls[I];
if (TypedefNameDecl *Typedef = dyn_cast<TypedefNameDecl>(Found)) {
if (const TagType *Tag = Typedef->getUnderlyingType()->getAs<TagType>())
Found = Tag->getDecl();
}
if (RecordDecl *FoundRecord = dyn_cast<RecordDecl>(Found)) {
if (D->isAnonymousStructOrUnion() &&
FoundRecord->isAnonymousStructOrUnion()) {
// If both anonymous structs/unions are in a record context, make sure
// they occur in the same location in the context records.
if (Optional<unsigned> Index1 =
StructuralEquivalenceContext::findUntaggedStructOrUnionIndex(
D)) {
if (Optional<unsigned> Index2 = StructuralEquivalenceContext::
findUntaggedStructOrUnionIndex(FoundRecord)) {
if (*Index1 != *Index2)
continue;
}
}
}
PrevDecl = FoundRecord;
if (RecordDecl *FoundDef = FoundRecord->getDefinition()) {
if ((SearchName && !D->isCompleteDefinition())
|| (D->isCompleteDefinition() &&
D->isAnonymousStructOrUnion()
== FoundDef->isAnonymousStructOrUnion() &&
IsStructuralMatch(D, FoundDef))) {
// The record types structurally match, or the "from" translation
// unit only had a forward declaration anyway; call it the same
// function.
// FIXME: For C++, we should also merge methods here.
return Importer.Imported(D, FoundDef);
}
} else if (!D->isCompleteDefinition()) {
// We have a forward declaration of this type, so adopt that forward
// declaration rather than building a new one.
// If one or both can be completed from external storage then try one
// last time to complete and compare them before doing this.
if (FoundRecord->hasExternalLexicalStorage() &&
!FoundRecord->isCompleteDefinition())
FoundRecord->getASTContext().getExternalSource()->CompleteType(FoundRecord);
if (D->hasExternalLexicalStorage())
D->getASTContext().getExternalSource()->CompleteType(D);
if (FoundRecord->isCompleteDefinition() &&
D->isCompleteDefinition() &&
!IsStructuralMatch(D, FoundRecord))
continue;
AdoptDecl = FoundRecord;
continue;
} else if (!SearchName) {
continue;
}
}
ConflictingDecls.push_back(FoundDecls[I]);
}
if (!ConflictingDecls.empty() && SearchName) {
Name = Importer.HandleNameConflict(Name, DC, IDNS,
ConflictingDecls.data(),
ConflictingDecls.size());
}
}
// Create the record declaration.
RecordDecl *D2 = AdoptDecl;
SourceLocation StartLoc = Importer.Import(D->getLocStart());
if (!D2) {
CXXRecordDecl *D2CXX = nullptr;
if (CXXRecordDecl *DCXX = llvm::dyn_cast<CXXRecordDecl>(D)) {
if (DCXX->isLambda()) {
TypeSourceInfo *TInfo = Importer.Import(DCXX->getLambdaTypeInfo());
D2CXX = CXXRecordDecl::CreateLambda(Importer.getToContext(),
DC, TInfo, Loc,
DCXX->isDependentLambda(),
DCXX->isGenericLambda(),
DCXX->getLambdaCaptureDefault());
Decl *CDecl = Importer.Import(DCXX->getLambdaContextDecl());
if (DCXX->getLambdaContextDecl() && !CDecl)
return nullptr;
D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), CDecl);
} else if (DCXX->isInjectedClassName()) {
// We have to be careful to do a similar dance to the one in
// Sema::ActOnStartCXXMemberDeclarations
CXXRecordDecl *const PrevDecl = nullptr;
const bool DelayTypeCreation = true;
D2CXX = CXXRecordDecl::Create(
Importer.getToContext(), D->getTagKind(), DC, StartLoc, Loc,
Name.getAsIdentifierInfo(), PrevDecl, DelayTypeCreation);
Importer.getToContext().getTypeDeclType(
D2CXX, llvm::dyn_cast<CXXRecordDecl>(DC));
} else {
D2CXX = CXXRecordDecl::Create(Importer.getToContext(),
D->getTagKind(),
DC, StartLoc, Loc,
Name.getAsIdentifierInfo());
}
D2 = D2CXX;
D2->setAccess(D->getAccess());
} else {
D2 = RecordDecl::Create(Importer.getToContext(), D->getTagKind(),
DC, StartLoc, Loc, Name.getAsIdentifierInfo());
}
D2->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
D2->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(D2);
if (D->isAnonymousStructOrUnion())
D2->setAnonymousStructOrUnion(true);
if (PrevDecl) {
// FIXME: do this for all Redeclarables, not just RecordDecls.
D2->setPreviousDecl(PrevDecl);
}
}
Importer.Imported(D, D2);
if (D->isCompleteDefinition() && ImportDefinition(D, D2, IDK_Default))
return nullptr;
return D2;
}
Decl *ASTNodeImporter::VisitEnumConstantDecl(EnumConstantDecl *D) {
// Import the major distinguishing characteristics of this enumerator.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
// Determine whether there are any other declarations with the same name and
// in the same context.
if (!LexicalDC->isFunctionOrMethod()) {
SmallVector<NamedDecl *, 4> ConflictingDecls;
unsigned IDNS = Decl::IDNS_Ordinary;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
continue;
if (EnumConstantDecl *FoundEnumConstant
= dyn_cast<EnumConstantDecl>(FoundDecls[I])) {
if (IsStructuralMatch(D, FoundEnumConstant))
return Importer.Imported(D, FoundEnumConstant);
}
ConflictingDecls.push_back(FoundDecls[I]);
}
if (!ConflictingDecls.empty()) {
Name = Importer.HandleNameConflict(Name, DC, IDNS,
ConflictingDecls.data(),
ConflictingDecls.size());
if (!Name)
return nullptr;
}
}
Expr *Init = Importer.Import(D->getInitExpr());
if (D->getInitExpr() && !Init)
return nullptr;
EnumConstantDecl *ToEnumerator
= EnumConstantDecl::Create(Importer.getToContext(), cast<EnumDecl>(DC), Loc,
Name.getAsIdentifierInfo(), T,
Init, D->getInitVal());
ToEnumerator->setAccess(D->getAccess());
ToEnumerator->setLexicalDeclContext(LexicalDC);
Importer.Imported(D, ToEnumerator);
LexicalDC->addDeclInternal(ToEnumerator);
return ToEnumerator;
}
Decl *ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
// Import the major distinguishing characteristics of this function.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// Try to find a function in our own ("to") context with the same name, same
// type, and in the same context as the function we're importing.
if (!LexicalDC->isFunctionOrMethod()) {
SmallVector<NamedDecl *, 4> ConflictingDecls;
unsigned IDNS = Decl::IDNS_Ordinary;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
continue;
if (FunctionDecl *FoundFunction = dyn_cast<FunctionDecl>(FoundDecls[I])) {
if (FoundFunction->hasExternalFormalLinkage() &&
D->hasExternalFormalLinkage()) {
if (Importer.IsStructurallyEquivalent(D->getType(),
FoundFunction->getType())) {
// FIXME: Actually try to merge the body and other attributes.
return Importer.Imported(D, FoundFunction);
}
// FIXME: Check for overloading more carefully, e.g., by boosting
// Sema::IsOverload out to the AST library.
// Function overloading is okay in C++.
if (Importer.getToContext().getLangOpts().CPlusPlus)
continue;
// Complain about inconsistent function types.
Importer.ToDiag(Loc, diag::err_odr_function_type_inconsistent)
<< Name << D->getType() << FoundFunction->getType();
Importer.ToDiag(FoundFunction->getLocation(),
diag::note_odr_value_here)
<< FoundFunction->getType();
}
}
ConflictingDecls.push_back(FoundDecls[I]);
}
if (!ConflictingDecls.empty()) {
Name = Importer.HandleNameConflict(Name, DC, IDNS,
ConflictingDecls.data(),
ConflictingDecls.size());
if (!Name)
return nullptr;
}
}
DeclarationNameInfo NameInfo(Name, Loc);
// Import additional name location/type info.
ImportDeclarationNameLoc(D->getNameInfo(), NameInfo);
QualType FromTy = D->getType();
bool usedDifferentExceptionSpec = false;
if (const FunctionProtoType *
FromFPT = D->getType()->getAs<FunctionProtoType>()) {
FunctionProtoType::ExtProtoInfo FromEPI = FromFPT->getExtProtoInfo();
// FunctionProtoType::ExtProtoInfo's ExceptionSpecDecl can point to the
// FunctionDecl that we are importing the FunctionProtoType for.
// To avoid an infinite recursion when importing, create the FunctionDecl
// with a simplified function type and update it afterwards.
if (FromEPI.ExceptionSpec.SourceDecl ||
FromEPI.ExceptionSpec.SourceTemplate ||
FromEPI.ExceptionSpec.NoexceptExpr) {
FunctionProtoType::ExtProtoInfo DefaultEPI;
FromTy = Importer.getFromContext().getFunctionType(
FromFPT->getReturnType(), FromFPT->getParamTypes(), DefaultEPI);
usedDifferentExceptionSpec = true;
}
}
// Import the type.
QualType T = Importer.Import(FromTy);
if (T.isNull())
return nullptr;
// Import the function parameters.
SmallVector<ParmVarDecl *, 8> Parameters;
for (auto P : D->parameters()) {
ParmVarDecl *ToP = cast_or_null<ParmVarDecl>(Importer.Import(P));
if (!ToP)
return nullptr;
Parameters.push_back(ToP);
}
// Create the imported function.
TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
FunctionDecl *ToFunction = nullptr;
SourceLocation InnerLocStart = Importer.Import(D->getInnerLocStart());
if (CXXConstructorDecl *FromConstructor = dyn_cast<CXXConstructorDecl>(D)) {
ToFunction = CXXConstructorDecl::Create(Importer.getToContext(),
cast<CXXRecordDecl>(DC),
InnerLocStart,
NameInfo, T, TInfo,
FromConstructor->isExplicit(),
D->isInlineSpecified(),
D->isImplicit(),
D->isConstexpr());
if (unsigned NumInitializers = FromConstructor->getNumCtorInitializers()) {
SmallVector<CXXCtorInitializer *, 4> CtorInitializers;
for (CXXCtorInitializer *I : FromConstructor->inits()) {
CXXCtorInitializer *ToI =
cast_or_null<CXXCtorInitializer>(Importer.Import(I));
if (!ToI && I)
return nullptr;
CtorInitializers.push_back(ToI);
}
CXXCtorInitializer **Memory =
new (Importer.getToContext()) CXXCtorInitializer *[NumInitializers];
std::copy(CtorInitializers.begin(), CtorInitializers.end(), Memory);
CXXConstructorDecl *ToCtor = llvm::cast<CXXConstructorDecl>(ToFunction);
ToCtor->setCtorInitializers(Memory);
ToCtor->setNumCtorInitializers(NumInitializers);
}
} else if (isa<CXXDestructorDecl>(D)) {
ToFunction = CXXDestructorDecl::Create(Importer.getToContext(),
cast<CXXRecordDecl>(DC),
InnerLocStart,
NameInfo, T, TInfo,
D->isInlineSpecified(),
D->isImplicit());
} else if (CXXConversionDecl *FromConversion
= dyn_cast<CXXConversionDecl>(D)) {
ToFunction = CXXConversionDecl::Create(Importer.getToContext(),
cast<CXXRecordDecl>(DC),
InnerLocStart,
NameInfo, T, TInfo,
D->isInlineSpecified(),
FromConversion->isExplicit(),
D->isConstexpr(),
Importer.Import(D->getLocEnd()));
} else if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
ToFunction = CXXMethodDecl::Create(Importer.getToContext(),
cast<CXXRecordDecl>(DC),
InnerLocStart,
NameInfo, T, TInfo,
Method->getStorageClass(),
Method->isInlineSpecified(),
D->isConstexpr(),
Importer.Import(D->getLocEnd()));
} else {
ToFunction = FunctionDecl::Create(Importer.getToContext(), DC,
InnerLocStart,
NameInfo, T, TInfo, D->getStorageClass(),
D->isInlineSpecified(),
D->hasWrittenPrototype(),
D->isConstexpr());
}
// Import the qualifier, if any.
ToFunction->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
ToFunction->setAccess(D->getAccess());
ToFunction->setLexicalDeclContext(LexicalDC);
ToFunction->setVirtualAsWritten(D->isVirtualAsWritten());
ToFunction->setTrivial(D->isTrivial());
ToFunction->setPure(D->isPure());
Importer.Imported(D, ToFunction);
// Set the parameters.
for (unsigned I = 0, N = Parameters.size(); I != N; ++I) {
Parameters[I]->setOwningFunction(ToFunction);
ToFunction->addDeclInternal(Parameters[I]);
}
ToFunction->setParams(Parameters);
if (usedDifferentExceptionSpec) {
// Update FunctionProtoType::ExtProtoInfo.
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
ToFunction->setType(T);
}
// Import the body, if any.
if (Stmt *FromBody = D->getBody()) {
if (Stmt *ToBody = Importer.Import(FromBody)) {
ToFunction->setBody(ToBody);
}
}
// FIXME: Other bits to merge?
// Add this function to the lexical context.
LexicalDC->addDeclInternal(ToFunction);
if (auto *FromCXXMethod = dyn_cast<CXXMethodDecl>(D))
ImportOverrides(cast<CXXMethodDecl>(ToFunction), FromCXXMethod);
return ToFunction;
}
Decl *ASTNodeImporter::VisitCXXMethodDecl(CXXMethodDecl *D) {
return VisitFunctionDecl(D);
}
Decl *ASTNodeImporter::VisitCXXConstructorDecl(CXXConstructorDecl *D) {
return VisitCXXMethodDecl(D);
}
Decl *ASTNodeImporter::VisitCXXDestructorDecl(CXXDestructorDecl *D) {
return VisitCXXMethodDecl(D);
}
Decl *ASTNodeImporter::VisitCXXConversionDecl(CXXConversionDecl *D) {
return VisitCXXMethodDecl(D);
}
static unsigned getFieldIndex(Decl *F) {
RecordDecl *Owner = dyn_cast<RecordDecl>(F->getDeclContext());
if (!Owner)
return 0;
unsigned Index = 1;
for (const auto *D : Owner->noload_decls()) {
if (D == F)
return Index;
if (isa<FieldDecl>(*D) || isa<IndirectFieldDecl>(*D))
++Index;
}
return Index;
}
Decl *ASTNodeImporter::VisitFieldDecl(FieldDecl *D) {
// Import the major distinguishing characteristics of a variable.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// Determine whether we've already imported this field.
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (FieldDecl *FoundField = dyn_cast<FieldDecl>(FoundDecls[I])) {
// For anonymous fields, match up by index.
if (!Name && getFieldIndex(D) != getFieldIndex(FoundField))
continue;
if (Importer.IsStructurallyEquivalent(D->getType(),
FoundField->getType())) {
Importer.Imported(D, FoundField);
return FoundField;
}
Importer.ToDiag(Loc, diag::err_odr_field_type_inconsistent)
<< Name << D->getType() << FoundField->getType();
Importer.ToDiag(FoundField->getLocation(), diag::note_odr_value_here)
<< FoundField->getType();
return nullptr;
}
}
// Import the type.
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
Expr *BitWidth = Importer.Import(D->getBitWidth());
if (!BitWidth && D->getBitWidth())
return nullptr;
FieldDecl *ToField = FieldDecl::Create(Importer.getToContext(), DC,
Importer.Import(D->getInnerLocStart()),
Loc, Name.getAsIdentifierInfo(),
T, TInfo, BitWidth, D->isMutable(),
D->getInClassInitStyle());
ToField->setAccess(D->getAccess());
ToField->setLexicalDeclContext(LexicalDC);
if (Expr *FromInitializer = D->getInClassInitializer()) {
Expr *ToInitializer = Importer.Import(FromInitializer);
if (ToInitializer)
ToField->setInClassInitializer(ToInitializer);
else
return nullptr;
}
ToField->setImplicit(D->isImplicit());
Importer.Imported(D, ToField);
LexicalDC->addDeclInternal(ToField);
return ToField;
}
Decl *ASTNodeImporter::VisitIndirectFieldDecl(IndirectFieldDecl *D) {
// Import the major distinguishing characteristics of a variable.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// Determine whether we've already imported this field.
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (IndirectFieldDecl *FoundField
= dyn_cast<IndirectFieldDecl>(FoundDecls[I])) {
// For anonymous indirect fields, match up by index.
if (!Name && getFieldIndex(D) != getFieldIndex(FoundField))
continue;
if (Importer.IsStructurallyEquivalent(D->getType(),
FoundField->getType(),
!Name.isEmpty())) {
Importer.Imported(D, FoundField);
return FoundField;
}
// If there are more anonymous fields to check, continue.
if (!Name && I < N-1)
continue;
Importer.ToDiag(Loc, diag::err_odr_field_type_inconsistent)
<< Name << D->getType() << FoundField->getType();
Importer.ToDiag(FoundField->getLocation(), diag::note_odr_value_here)
<< FoundField->getType();
return nullptr;
}
}
// Import the type.
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
NamedDecl **NamedChain =
new (Importer.getToContext())NamedDecl*[D->getChainingSize()];
unsigned i = 0;
for (auto *PI : D->chain()) {
Decl *D = Importer.Import(PI);
if (!D)
return nullptr;
NamedChain[i++] = cast<NamedDecl>(D);
}
IndirectFieldDecl *ToIndirectField = IndirectFieldDecl::Create(
Importer.getToContext(), DC, Loc, Name.getAsIdentifierInfo(), T,
{NamedChain, D->getChainingSize()});
for (const auto *Attr : D->attrs())
ToIndirectField->addAttr(Attr->clone(Importer.getToContext()));
ToIndirectField->setAccess(D->getAccess());
ToIndirectField->setLexicalDeclContext(LexicalDC);
Importer.Imported(D, ToIndirectField);
LexicalDC->addDeclInternal(ToIndirectField);
return ToIndirectField;
}
Decl *ASTNodeImporter::VisitFriendDecl(FriendDecl *D) {
// Import the major distinguishing characteristics of a declaration.
DeclContext *DC = Importer.ImportContext(D->getDeclContext());
DeclContext *LexicalDC = D->getDeclContext() == D->getLexicalDeclContext()
? DC : Importer.ImportContext(D->getLexicalDeclContext());
if (!DC || !LexicalDC)
return nullptr;
// Determine whether we've already imported this decl.
// FriendDecl is not a NamedDecl so we cannot use localUncachedLookup.
auto *RD = cast<CXXRecordDecl>(DC);
FriendDecl *ImportedFriend = RD->getFirstFriend();
StructuralEquivalenceContext Context(
Importer.getFromContext(), Importer.getToContext(),
Importer.getNonEquivalentDecls(), false, false);
while (ImportedFriend) {
if (D->getFriendDecl() && ImportedFriend->getFriendDecl()) {
if (Context.IsStructurallyEquivalent(D->getFriendDecl(),
ImportedFriend->getFriendDecl()))
return Importer.Imported(D, ImportedFriend);
} else if (D->getFriendType() && ImportedFriend->getFriendType()) {
if (Importer.IsStructurallyEquivalent(
D->getFriendType()->getType(),
ImportedFriend->getFriendType()->getType(), true))
return Importer.Imported(D, ImportedFriend);
}
ImportedFriend = ImportedFriend->getNextFriend();
}
// Not found. Create it.
FriendDecl::FriendUnion ToFU;
if (NamedDecl *FriendD = D->getFriendDecl())
ToFU = cast_or_null<NamedDecl>(Importer.Import(FriendD));
else
ToFU = Importer.Import(D->getFriendType());
if (!ToFU)
return nullptr;
SmallVector<TemplateParameterList *, 1> ToTPLists(D->NumTPLists);
TemplateParameterList **FromTPLists =
D->getTrailingObjects<TemplateParameterList *>();
for (unsigned I = 0; I < D->NumTPLists; I++) {
TemplateParameterList *List = ImportTemplateParameterList(FromTPLists[I]);
if (!List)
return nullptr;
ToTPLists[I] = List;
}
FriendDecl *FrD = FriendDecl::Create(Importer.getToContext(), DC,
Importer.Import(D->getLocation()),
ToFU, Importer.Import(D->getFriendLoc()),
ToTPLists);
Importer.Imported(D, FrD);
RD->pushFriendDecl(FrD);
FrD->setAccess(D->getAccess());
FrD->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(FrD);
return FrD;
}
Decl *ASTNodeImporter::VisitObjCIvarDecl(ObjCIvarDecl *D) {
// Import the major distinguishing characteristics of an ivar.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// Determine whether we've already imported this ivar
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (ObjCIvarDecl *FoundIvar = dyn_cast<ObjCIvarDecl>(FoundDecls[I])) {
if (Importer.IsStructurallyEquivalent(D->getType(),
FoundIvar->getType())) {
Importer.Imported(D, FoundIvar);
return FoundIvar;
}
Importer.ToDiag(Loc, diag::err_odr_ivar_type_inconsistent)
<< Name << D->getType() << FoundIvar->getType();
Importer.ToDiag(FoundIvar->getLocation(), diag::note_odr_value_here)
<< FoundIvar->getType();
return nullptr;
}
}
// Import the type.
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
Expr *BitWidth = Importer.Import(D->getBitWidth());
if (!BitWidth && D->getBitWidth())
return nullptr;
ObjCIvarDecl *ToIvar = ObjCIvarDecl::Create(Importer.getToContext(),
cast<ObjCContainerDecl>(DC),
Importer.Import(D->getInnerLocStart()),
Loc, Name.getAsIdentifierInfo(),
T, TInfo, D->getAccessControl(),
BitWidth, D->getSynthesize());
ToIvar->setLexicalDeclContext(LexicalDC);
Importer.Imported(D, ToIvar);
LexicalDC->addDeclInternal(ToIvar);
return ToIvar;
}
Decl *ASTNodeImporter::VisitVarDecl(VarDecl *D) {
// Import the major distinguishing characteristics of a variable.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// Try to find a variable in our own ("to") context with the same name and
// in the same context as the variable we're importing.
if (D->isFileVarDecl()) {
VarDecl *MergeWithVar = nullptr;
SmallVector<NamedDecl *, 4> ConflictingDecls;
unsigned IDNS = Decl::IDNS_Ordinary;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
continue;
if (VarDecl *FoundVar = dyn_cast<VarDecl>(FoundDecls[I])) {
// We have found a variable that we may need to merge with. Check it.
if (FoundVar->hasExternalFormalLinkage() &&
D->hasExternalFormalLinkage()) {
if (Importer.IsStructurallyEquivalent(D->getType(),
FoundVar->getType())) {
MergeWithVar = FoundVar;
break;
}
const ArrayType *FoundArray
= Importer.getToContext().getAsArrayType(FoundVar->getType());
const ArrayType *TArray
= Importer.getToContext().getAsArrayType(D->getType());
if (FoundArray && TArray) {
if (isa<IncompleteArrayType>(FoundArray) &&
isa<ConstantArrayType>(TArray)) {
// Import the type.
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
FoundVar->setType(T);
MergeWithVar = FoundVar;
break;
} else if (isa<IncompleteArrayType>(TArray) &&
isa<ConstantArrayType>(FoundArray)) {
MergeWithVar = FoundVar;
break;
}
}
Importer.ToDiag(Loc, diag::err_odr_variable_type_inconsistent)
<< Name << D->getType() << FoundVar->getType();
Importer.ToDiag(FoundVar->getLocation(), diag::note_odr_value_here)
<< FoundVar->getType();
}
}
ConflictingDecls.push_back(FoundDecls[I]);
}
if (MergeWithVar) {
// An equivalent variable with external linkage has been found. Link
// the two declarations, then merge them.
Importer.Imported(D, MergeWithVar);
if (VarDecl *DDef = D->getDefinition()) {
if (VarDecl *ExistingDef = MergeWithVar->getDefinition()) {
Importer.ToDiag(ExistingDef->getLocation(),
diag::err_odr_variable_multiple_def)
<< Name;
Importer.FromDiag(DDef->getLocation(), diag::note_odr_defined_here);
} else {
Expr *Init = Importer.Import(DDef->getInit());
MergeWithVar->setInit(Init);
if (DDef->isInitKnownICE()) {
EvaluatedStmt *Eval = MergeWithVar->ensureEvaluatedStmt();
Eval->CheckedICE = true;
Eval->IsICE = DDef->isInitICE();
}
}
}
return MergeWithVar;
}
if (!ConflictingDecls.empty()) {
Name = Importer.HandleNameConflict(Name, DC, IDNS,
ConflictingDecls.data(),
ConflictingDecls.size());
if (!Name)
return nullptr;
}
}
// Import the type.
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
// Create the imported variable.
TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
VarDecl *ToVar = VarDecl::Create(Importer.getToContext(), DC,
Importer.Import(D->getInnerLocStart()),
Loc, Name.getAsIdentifierInfo(),
T, TInfo,
D->getStorageClass());
ToVar->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
ToVar->setAccess(D->getAccess());
ToVar->setLexicalDeclContext(LexicalDC);
Importer.Imported(D, ToVar);
LexicalDC->addDeclInternal(ToVar);
if (!D->isFileVarDecl() &&
D->isUsed())
ToVar->setIsUsed();
// Merge the initializer.
if (ImportDefinition(D, ToVar))
return nullptr;
if (D->isConstexpr())
ToVar->setConstexpr(true);
return ToVar;
}
Decl *ASTNodeImporter::VisitImplicitParamDecl(ImplicitParamDecl *D) {
// Parameters are created in the translation unit's context, then moved
// into the function declaration's context afterward.
DeclContext *DC = Importer.getToContext().getTranslationUnitDecl();
// Import the name of this declaration.
DeclarationName Name = Importer.Import(D->getDeclName());
if (D->getDeclName() && !Name)
return nullptr;
// Import the location of this declaration.
SourceLocation Loc = Importer.Import(D->getLocation());
// Import the parameter's type.
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
// Create the imported parameter.
auto *ToParm = ImplicitParamDecl::Create(Importer.getToContext(), DC, Loc,
Name.getAsIdentifierInfo(), T,
D->getParameterKind());
return Importer.Imported(D, ToParm);
}
Decl *ASTNodeImporter::VisitParmVarDecl(ParmVarDecl *D) {
// Parameters are created in the translation unit's context, then moved
// into the function declaration's context afterward.
DeclContext *DC = Importer.getToContext().getTranslationUnitDecl();
// Import the name of this declaration.
DeclarationName Name = Importer.Import(D->getDeclName());
if (D->getDeclName() && !Name)
return nullptr;
// Import the location of this declaration.
SourceLocation Loc = Importer.Import(D->getLocation());
// Import the parameter's type.
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
// Create the imported parameter.
TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
ParmVarDecl *ToParm = ParmVarDecl::Create(Importer.getToContext(), DC,
Importer.Import(D->getInnerLocStart()),
Loc, Name.getAsIdentifierInfo(),
T, TInfo, D->getStorageClass(),
/*DefaultArg*/ nullptr);
// Set the default argument.
ToParm->setHasInheritedDefaultArg(D->hasInheritedDefaultArg());
ToParm->setKNRPromoted(D->isKNRPromoted());
Expr *ToDefArg = nullptr;
Expr *FromDefArg = nullptr;
if (D->hasUninstantiatedDefaultArg()) {
FromDefArg = D->getUninstantiatedDefaultArg();
ToDefArg = Importer.Import(FromDefArg);
ToParm->setUninstantiatedDefaultArg(ToDefArg);
} else if (D->hasUnparsedDefaultArg()) {
ToParm->setUnparsedDefaultArg();
} else if (D->hasDefaultArg()) {
FromDefArg = D->getDefaultArg();
ToDefArg = Importer.Import(FromDefArg);
ToParm->setDefaultArg(ToDefArg);
}
if (FromDefArg && !ToDefArg)
return nullptr;
if (D->isUsed())
ToParm->setIsUsed();
return Importer.Imported(D, ToParm);
}
Decl *ASTNodeImporter::VisitObjCMethodDecl(ObjCMethodDecl *D) {
// Import the major distinguishing characteristics of a method.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (ObjCMethodDecl *FoundMethod = dyn_cast<ObjCMethodDecl>(FoundDecls[I])) {
if (FoundMethod->isInstanceMethod() != D->isInstanceMethod())
continue;
// Check return types.
if (!Importer.IsStructurallyEquivalent(D->getReturnType(),
FoundMethod->getReturnType())) {
Importer.ToDiag(Loc, diag::err_odr_objc_method_result_type_inconsistent)
<< D->isInstanceMethod() << Name << D->getReturnType()
<< FoundMethod->getReturnType();
Importer.ToDiag(FoundMethod->getLocation(),
diag::note_odr_objc_method_here)
<< D->isInstanceMethod() << Name;
return nullptr;
}
// Check the number of parameters.
if (D->param_size() != FoundMethod->param_size()) {
Importer.ToDiag(Loc, diag::err_odr_objc_method_num_params_inconsistent)
<< D->isInstanceMethod() << Name
<< D->param_size() << FoundMethod->param_size();
Importer.ToDiag(FoundMethod->getLocation(),
diag::note_odr_objc_method_here)
<< D->isInstanceMethod() << Name;
return nullptr;
}
// Check parameter types.
for (ObjCMethodDecl::param_iterator P = D->param_begin(),
PEnd = D->param_end(), FoundP = FoundMethod->param_begin();
P != PEnd; ++P, ++FoundP) {
if (!Importer.IsStructurallyEquivalent((*P)->getType(),
(*FoundP)->getType())) {
Importer.FromDiag((*P)->getLocation(),
diag::err_odr_objc_method_param_type_inconsistent)
<< D->isInstanceMethod() << Name
<< (*P)->getType() << (*FoundP)->getType();
Importer.ToDiag((*FoundP)->getLocation(), diag::note_odr_value_here)
<< (*FoundP)->getType();
return nullptr;
}
}
// Check variadic/non-variadic.
// Check the number of parameters.
if (D->isVariadic() != FoundMethod->isVariadic()) {
Importer.ToDiag(Loc, diag::err_odr_objc_method_variadic_inconsistent)
<< D->isInstanceMethod() << Name;
Importer.ToDiag(FoundMethod->getLocation(),
diag::note_odr_objc_method_here)
<< D->isInstanceMethod() << Name;
return nullptr;
}
// FIXME: Any other bits we need to merge?
return Importer.Imported(D, FoundMethod);
}
}
// Import the result type.
QualType ResultTy = Importer.Import(D->getReturnType());
if (ResultTy.isNull())
return nullptr;
TypeSourceInfo *ReturnTInfo = Importer.Import(D->getReturnTypeSourceInfo());
ObjCMethodDecl *ToMethod = ObjCMethodDecl::Create(
Importer.getToContext(), Loc, Importer.Import(D->getLocEnd()),
Name.getObjCSelector(), ResultTy, ReturnTInfo, DC, D->isInstanceMethod(),
D->isVariadic(), D->isPropertyAccessor(), D->isImplicit(), D->isDefined(),
D->getImplementationControl(), D->hasRelatedResultType());
// FIXME: When we decide to merge method definitions, we'll need to
// deal with implicit parameters.
// Import the parameters
SmallVector<ParmVarDecl *, 5> ToParams;
for (auto *FromP : D->parameters()) {
ParmVarDecl *ToP = cast_or_null<ParmVarDecl>(Importer.Import(FromP));
if (!ToP)
return nullptr;
ToParams.push_back(ToP);
}
// Set the parameters.
for (unsigned I = 0, N = ToParams.size(); I != N; ++I) {
ToParams[I]->setOwningFunction(ToMethod);
ToMethod->addDeclInternal(ToParams[I]);
}
SmallVector<SourceLocation, 12> SelLocs;
D->getSelectorLocs(SelLocs);
ToMethod->setMethodParams(Importer.getToContext(), ToParams, SelLocs);
ToMethod->setLexicalDeclContext(LexicalDC);
Importer.Imported(D, ToMethod);
LexicalDC->addDeclInternal(ToMethod);
return ToMethod;
}
Decl *ASTNodeImporter::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) {
// Import the major distinguishing characteristics of a category.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
TypeSourceInfo *BoundInfo = Importer.Import(D->getTypeSourceInfo());
if (!BoundInfo)
return nullptr;
ObjCTypeParamDecl *Result = ObjCTypeParamDecl::Create(
Importer.getToContext(), DC,
D->getVariance(),
Importer.Import(D->getVarianceLoc()),
D->getIndex(),
Importer.Import(D->getLocation()),
Name.getAsIdentifierInfo(),
Importer.Import(D->getColonLoc()),
BoundInfo);
Importer.Imported(D, Result);
Result->setLexicalDeclContext(LexicalDC);
return Result;
}
Decl *ASTNodeImporter::VisitObjCCategoryDecl(ObjCCategoryDecl *D) {
// Import the major distinguishing characteristics of a category.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
ObjCInterfaceDecl *ToInterface
= cast_or_null<ObjCInterfaceDecl>(Importer.Import(D->getClassInterface()));
if (!ToInterface)
return nullptr;
// Determine if we've already encountered this category.
ObjCCategoryDecl *MergeWithCategory
= ToInterface->FindCategoryDeclaration(Name.getAsIdentifierInfo());
ObjCCategoryDecl *ToCategory = MergeWithCategory;
if (!ToCategory) {
ToCategory = ObjCCategoryDecl::Create(Importer.getToContext(), DC,
Importer.Import(D->getAtStartLoc()),
Loc,
Importer.Import(D->getCategoryNameLoc()),
Name.getAsIdentifierInfo(),
ToInterface,
/*TypeParamList=*/nullptr,
Importer.Import(D->getIvarLBraceLoc()),
Importer.Import(D->getIvarRBraceLoc()));
ToCategory->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(ToCategory);
Importer.Imported(D, ToCategory);
// Import the type parameter list after calling Imported, to avoid
// loops when bringing in their DeclContext.
ToCategory->setTypeParamList(ImportObjCTypeParamList(
D->getTypeParamList()));
// Import protocols
SmallVector<ObjCProtocolDecl *, 4> Protocols;
SmallVector<SourceLocation, 4> ProtocolLocs;
ObjCCategoryDecl::protocol_loc_iterator FromProtoLoc
= D->protocol_loc_begin();
for (ObjCCategoryDecl::protocol_iterator FromProto = D->protocol_begin(),
FromProtoEnd = D->protocol_end();
FromProto != FromProtoEnd;
++FromProto, ++FromProtoLoc) {
ObjCProtocolDecl *ToProto
= cast_or_null<ObjCProtocolDecl>(Importer.Import(*FromProto));
if (!ToProto)
return nullptr;
Protocols.push_back(ToProto);
ProtocolLocs.push_back(Importer.Import(*FromProtoLoc));
}
// FIXME: If we're merging, make sure that the protocol list is the same.
ToCategory->setProtocolList(Protocols.data(), Protocols.size(),
ProtocolLocs.data(), Importer.getToContext());
} else {
Importer.Imported(D, ToCategory);
}
// Import all of the members of this category.
ImportDeclContext(D);
// If we have an implementation, import it as well.
if (D->getImplementation()) {
ObjCCategoryImplDecl *Impl
= cast_or_null<ObjCCategoryImplDecl>(
Importer.Import(D->getImplementation()));
if (!Impl)
return nullptr;
ToCategory->setImplementation(Impl);
}
return ToCategory;
}
bool ASTNodeImporter::ImportDefinition(ObjCProtocolDecl *From,
ObjCProtocolDecl *To,
ImportDefinitionKind Kind) {
if (To->getDefinition()) {
if (shouldForceImportDeclContext(Kind))
ImportDeclContext(From);
return false;
}
// Start the protocol definition
To->startDefinition();
// Import protocols
SmallVector<ObjCProtocolDecl *, 4> Protocols;
SmallVector<SourceLocation, 4> ProtocolLocs;
ObjCProtocolDecl::protocol_loc_iterator
FromProtoLoc = From->protocol_loc_begin();
for (ObjCProtocolDecl::protocol_iterator FromProto = From->protocol_begin(),
FromProtoEnd = From->protocol_end();
FromProto != FromProtoEnd;
++FromProto, ++FromProtoLoc) {
ObjCProtocolDecl *ToProto
= cast_or_null<ObjCProtocolDecl>(Importer.Import(*FromProto));
if (!ToProto)
return true;
Protocols.push_back(ToProto);
ProtocolLocs.push_back(Importer.Import(*FromProtoLoc));
}
// FIXME: If we're merging, make sure that the protocol list is the same.
To->setProtocolList(Protocols.data(), Protocols.size(),
ProtocolLocs.data(), Importer.getToContext());
if (shouldForceImportDeclContext(Kind)) {
// Import all of the members of this protocol.
ImportDeclContext(From, /*ForceImport=*/true);
}
return false;
}
Decl *ASTNodeImporter::VisitObjCProtocolDecl(ObjCProtocolDecl *D) {
// If this protocol has a definition in the translation unit we're coming
// from, but this particular declaration is not that definition, import the
// definition and map to that.
ObjCProtocolDecl *Definition = D->getDefinition();
if (Definition && Definition != D) {
Decl *ImportedDef = Importer.Import(Definition);
if (!ImportedDef)
return nullptr;
return Importer.Imported(D, ImportedDef);
}
// Import the major distinguishing characteristics of a protocol.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
ObjCProtocolDecl *MergeWithProtocol = nullptr;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(Decl::IDNS_ObjCProtocol))
continue;
if ((MergeWithProtocol = dyn_cast<ObjCProtocolDecl>(FoundDecls[I])))
break;
}
ObjCProtocolDecl *ToProto = MergeWithProtocol;
if (!ToProto) {
ToProto = ObjCProtocolDecl::Create(Importer.getToContext(), DC,
Name.getAsIdentifierInfo(), Loc,
Importer.Import(D->getAtStartLoc()),
/*PrevDecl=*/nullptr);
ToProto->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(ToProto);
}
Importer.Imported(D, ToProto);
if (D->isThisDeclarationADefinition() && ImportDefinition(D, ToProto))
return nullptr;
return ToProto;
}
Decl *ASTNodeImporter::VisitLinkageSpecDecl(LinkageSpecDecl *D) {
DeclContext *DC = Importer.ImportContext(D->getDeclContext());
DeclContext *LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
SourceLocation ExternLoc = Importer.Import(D->getExternLoc());
SourceLocation LangLoc = Importer.Import(D->getLocation());
bool HasBraces = D->hasBraces();
LinkageSpecDecl *ToLinkageSpec =
LinkageSpecDecl::Create(Importer.getToContext(),
DC,
ExternLoc,
LangLoc,
D->getLanguage(),
HasBraces);
if (HasBraces) {
SourceLocation RBraceLoc = Importer.Import(D->getRBraceLoc());
ToLinkageSpec->setRBraceLoc(RBraceLoc);
}
ToLinkageSpec->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(ToLinkageSpec);
Importer.Imported(D, ToLinkageSpec);
return ToLinkageSpec;
}
bool ASTNodeImporter::ImportDefinition(ObjCInterfaceDecl *From,
ObjCInterfaceDecl *To,
ImportDefinitionKind Kind) {
if (To->getDefinition()) {
// Check consistency of superclass.
ObjCInterfaceDecl *FromSuper = From->getSuperClass();
if (FromSuper) {
FromSuper = cast_or_null<ObjCInterfaceDecl>(Importer.Import(FromSuper));
if (!FromSuper)
return true;
}
ObjCInterfaceDecl *ToSuper = To->getSuperClass();
if ((bool)FromSuper != (bool)ToSuper ||
(FromSuper && !declaresSameEntity(FromSuper, ToSuper))) {
Importer.ToDiag(To->getLocation(),
diag::err_odr_objc_superclass_inconsistent)
<< To->getDeclName();
if (ToSuper)
Importer.ToDiag(To->getSuperClassLoc(), diag::note_odr_objc_superclass)
<< To->getSuperClass()->getDeclName();
else
Importer.ToDiag(To->getLocation(),
diag::note_odr_objc_missing_superclass);
if (From->getSuperClass())
Importer.FromDiag(From->getSuperClassLoc(),
diag::note_odr_objc_superclass)
<< From->getSuperClass()->getDeclName();
else
Importer.FromDiag(From->getLocation(),
diag::note_odr_objc_missing_superclass);
}
if (shouldForceImportDeclContext(Kind))
ImportDeclContext(From);
return false;
}
// Start the definition.
To->startDefinition();
// If this class has a superclass, import it.
if (From->getSuperClass()) {
TypeSourceInfo *SuperTInfo = Importer.Import(From->getSuperClassTInfo());
if (!SuperTInfo)
return true;
To->setSuperClass(SuperTInfo);
}
// Import protocols
SmallVector<ObjCProtocolDecl *, 4> Protocols;
SmallVector<SourceLocation, 4> ProtocolLocs;
ObjCInterfaceDecl::protocol_loc_iterator
FromProtoLoc = From->protocol_loc_begin();
for (ObjCInterfaceDecl::protocol_iterator FromProto = From->protocol_begin(),
FromProtoEnd = From->protocol_end();
FromProto != FromProtoEnd;
++FromProto, ++FromProtoLoc) {
ObjCProtocolDecl *ToProto
= cast_or_null<ObjCProtocolDecl>(Importer.Import(*FromProto));
if (!ToProto)
return true;
Protocols.push_back(ToProto);
ProtocolLocs.push_back(Importer.Import(*FromProtoLoc));
}
// FIXME: If we're merging, make sure that the protocol list is the same.
To->setProtocolList(Protocols.data(), Protocols.size(),
ProtocolLocs.data(), Importer.getToContext());
// Import categories. When the categories themselves are imported, they'll
// hook themselves into this interface.
for (auto *Cat : From->known_categories())
Importer.Import(Cat);
// If we have an @implementation, import it as well.
if (From->getImplementation()) {
ObjCImplementationDecl *Impl = cast_or_null<ObjCImplementationDecl>(
Importer.Import(From->getImplementation()));
if (!Impl)
return true;
To->setImplementation(Impl);
}
if (shouldForceImportDeclContext(Kind)) {
// Import all of the members of this class.
ImportDeclContext(From, /*ForceImport=*/true);
}
return false;
}
ObjCTypeParamList *
ASTNodeImporter::ImportObjCTypeParamList(ObjCTypeParamList *list) {
if (!list)
return nullptr;
SmallVector<ObjCTypeParamDecl *, 4> toTypeParams;
for (auto fromTypeParam : *list) {
auto toTypeParam = cast_or_null<ObjCTypeParamDecl>(
Importer.Import(fromTypeParam));
if (!toTypeParam)
return nullptr;
toTypeParams.push_back(toTypeParam);
}
return ObjCTypeParamList::create(Importer.getToContext(),
Importer.Import(list->getLAngleLoc()),
toTypeParams,
Importer.Import(list->getRAngleLoc()));
}
Decl *ASTNodeImporter::VisitObjCInterfaceDecl(ObjCInterfaceDecl *D) {
// If this class has a definition in the translation unit we're coming from,
// but this particular declaration is not that definition, import the
// definition and map to that.
ObjCInterfaceDecl *Definition = D->getDefinition();
if (Definition && Definition != D) {
Decl *ImportedDef = Importer.Import(Definition);
if (!ImportedDef)
return nullptr;
return Importer.Imported(D, ImportedDef);
}
// Import the major distinguishing characteristics of an @interface.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// Look for an existing interface with the same name.
ObjCInterfaceDecl *MergeWithIface = nullptr;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(Decl::IDNS_Ordinary))
continue;
if ((MergeWithIface = dyn_cast<ObjCInterfaceDecl>(FoundDecls[I])))
break;
}
// Create an interface declaration, if one does not already exist.
ObjCInterfaceDecl *ToIface = MergeWithIface;
if (!ToIface) {
ToIface = ObjCInterfaceDecl::Create(Importer.getToContext(), DC,
Importer.Import(D->getAtStartLoc()),
Name.getAsIdentifierInfo(),
/*TypeParamList=*/nullptr,
/*PrevDecl=*/nullptr, Loc,
D->isImplicitInterfaceDecl());
ToIface->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(ToIface);
}
Importer.Imported(D, ToIface);
// Import the type parameter list after calling Imported, to avoid
// loops when bringing in their DeclContext.
ToIface->setTypeParamList(ImportObjCTypeParamList(
D->getTypeParamListAsWritten()));
if (D->isThisDeclarationADefinition() && ImportDefinition(D, ToIface))
return nullptr;
return ToIface;
}
Decl *ASTNodeImporter::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D) {
ObjCCategoryDecl *Category = cast_or_null<ObjCCategoryDecl>(
Importer.Import(D->getCategoryDecl()));
if (!Category)
return nullptr;
ObjCCategoryImplDecl *ToImpl = Category->getImplementation();
if (!ToImpl) {
DeclContext *DC = Importer.ImportContext(D->getDeclContext());
if (!DC)
return nullptr;
SourceLocation CategoryNameLoc = Importer.Import(D->getCategoryNameLoc());
ToImpl = ObjCCategoryImplDecl::Create(Importer.getToContext(), DC,
Importer.Import(D->getIdentifier()),
Category->getClassInterface(),
Importer.Import(D->getLocation()),
Importer.Import(D->getAtStartLoc()),
CategoryNameLoc);
DeclContext *LexicalDC = DC;
if (D->getDeclContext() != D->getLexicalDeclContext()) {
LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
if (!LexicalDC)
return nullptr;
ToImpl->setLexicalDeclContext(LexicalDC);
}
LexicalDC->addDeclInternal(ToImpl);
Category->setImplementation(ToImpl);
}
Importer.Imported(D, ToImpl);
ImportDeclContext(D);
return ToImpl;
}
Decl *ASTNodeImporter::VisitObjCImplementationDecl(ObjCImplementationDecl *D) {
// Find the corresponding interface.
ObjCInterfaceDecl *Iface = cast_or_null<ObjCInterfaceDecl>(
Importer.Import(D->getClassInterface()));
if (!Iface)
return nullptr;
// Import the superclass, if any.
ObjCInterfaceDecl *Super = nullptr;
if (D->getSuperClass()) {
Super = cast_or_null<ObjCInterfaceDecl>(
Importer.Import(D->getSuperClass()));
if (!Super)
return nullptr;
}
ObjCImplementationDecl *Impl = Iface->getImplementation();
if (!Impl) {
// We haven't imported an implementation yet. Create a new @implementation
// now.
Impl = ObjCImplementationDecl::Create(Importer.getToContext(),
Importer.ImportContext(D->getDeclContext()),
Iface, Super,
Importer.Import(D->getLocation()),
Importer.Import(D->getAtStartLoc()),
Importer.Import(D->getSuperClassLoc()),
Importer.Import(D->getIvarLBraceLoc()),
Importer.Import(D->getIvarRBraceLoc()));
if (D->getDeclContext() != D->getLexicalDeclContext()) {
DeclContext *LexicalDC
= Importer.ImportContext(D->getLexicalDeclContext());
if (!LexicalDC)
return nullptr;
Impl->setLexicalDeclContext(LexicalDC);
}
// Associate the implementation with the class it implements.
Iface->setImplementation(Impl);
Importer.Imported(D, Iface->getImplementation());
} else {
Importer.Imported(D, Iface->getImplementation());
// Verify that the existing @implementation has the same superclass.
if ((Super && !Impl->getSuperClass()) ||
(!Super && Impl->getSuperClass()) ||
(Super && Impl->getSuperClass() &&
!declaresSameEntity(Super->getCanonicalDecl(),
Impl->getSuperClass()))) {
Importer.ToDiag(Impl->getLocation(),
diag::err_odr_objc_superclass_inconsistent)
<< Iface->getDeclName();
// FIXME: It would be nice to have the location of the superclass
// below.
if (Impl->getSuperClass())
Importer.ToDiag(Impl->getLocation(),
diag::note_odr_objc_superclass)
<< Impl->getSuperClass()->getDeclName();
else
Importer.ToDiag(Impl->getLocation(),
diag::note_odr_objc_missing_superclass);
if (D->getSuperClass())
Importer.FromDiag(D->getLocation(),
diag::note_odr_objc_superclass)
<< D->getSuperClass()->getDeclName();
else
Importer.FromDiag(D->getLocation(),
diag::note_odr_objc_missing_superclass);
return nullptr;
}
}
// Import all of the members of this @implementation.
ImportDeclContext(D);
return Impl;
}
Decl *ASTNodeImporter::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
// Import the major distinguishing characteristics of an @property.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// Check whether we have already imported this property.
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (ObjCPropertyDecl *FoundProp
= dyn_cast<ObjCPropertyDecl>(FoundDecls[I])) {
// Check property types.
if (!Importer.IsStructurallyEquivalent(D->getType(),
FoundProp->getType())) {
Importer.ToDiag(Loc, diag::err_odr_objc_property_type_inconsistent)
<< Name << D->getType() << FoundProp->getType();
Importer.ToDiag(FoundProp->getLocation(), diag::note_odr_value_here)
<< FoundProp->getType();
return nullptr;
}
// FIXME: Check property attributes, getters, setters, etc.?
// Consider these properties to be equivalent.
Importer.Imported(D, FoundProp);
return FoundProp;
}
}
// Import the type.
TypeSourceInfo *TSI = Importer.Import(D->getTypeSourceInfo());
if (!TSI)
return nullptr;
// Create the new property.
ObjCPropertyDecl *ToProperty
= ObjCPropertyDecl::Create(Importer.getToContext(), DC, Loc,
Name.getAsIdentifierInfo(),
Importer.Import(D->getAtLoc()),
Importer.Import(D->getLParenLoc()),
Importer.Import(D->getType()),
TSI,
D->getPropertyImplementation());
Importer.Imported(D, ToProperty);
ToProperty->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(ToProperty);
ToProperty->setPropertyAttributes(D->getPropertyAttributes());
ToProperty->setPropertyAttributesAsWritten(
D->getPropertyAttributesAsWritten());
ToProperty->setGetterName(Importer.Import(D->getGetterName()),
Importer.Import(D->getGetterNameLoc()));
ToProperty->setSetterName(Importer.Import(D->getSetterName()),
Importer.Import(D->getSetterNameLoc()));
ToProperty->setGetterMethodDecl(
cast_or_null<ObjCMethodDecl>(Importer.Import(D->getGetterMethodDecl())));
ToProperty->setSetterMethodDecl(
cast_or_null<ObjCMethodDecl>(Importer.Import(D->getSetterMethodDecl())));
ToProperty->setPropertyIvarDecl(
cast_or_null<ObjCIvarDecl>(Importer.Import(D->getPropertyIvarDecl())));
return ToProperty;
}
Decl *ASTNodeImporter::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) {
ObjCPropertyDecl *Property = cast_or_null<ObjCPropertyDecl>(
Importer.Import(D->getPropertyDecl()));
if (!Property)
return nullptr;
DeclContext *DC = Importer.ImportContext(D->getDeclContext());
if (!DC)
return nullptr;
// Import the lexical declaration context.
DeclContext *LexicalDC = DC;
if (D->getDeclContext() != D->getLexicalDeclContext()) {
LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
if (!LexicalDC)
return nullptr;
}
ObjCImplDecl *InImpl = dyn_cast<ObjCImplDecl>(LexicalDC);
if (!InImpl)
return nullptr;
// Import the ivar (for an @synthesize).
ObjCIvarDecl *Ivar = nullptr;
if (D->getPropertyIvarDecl()) {
Ivar = cast_or_null<ObjCIvarDecl>(
Importer.Import(D->getPropertyIvarDecl()));
if (!Ivar)
return nullptr;
}
ObjCPropertyImplDecl *ToImpl
= InImpl->FindPropertyImplDecl(Property->getIdentifier(),
Property->getQueryKind());
if (!ToImpl) {
ToImpl = ObjCPropertyImplDecl::Create(Importer.getToContext(), DC,
Importer.Import(D->getLocStart()),
Importer.Import(D->getLocation()),
Property,
D->getPropertyImplementation(),
Ivar,
Importer.Import(D->getPropertyIvarDeclLoc()));
ToImpl->setLexicalDeclContext(LexicalDC);
Importer.Imported(D, ToImpl);
LexicalDC->addDeclInternal(ToImpl);
} else {
// Check that we have the same kind of property implementation (@synthesize
// vs. @dynamic).
if (D->getPropertyImplementation() != ToImpl->getPropertyImplementation()) {
Importer.ToDiag(ToImpl->getLocation(),
diag::err_odr_objc_property_impl_kind_inconsistent)
<< Property->getDeclName()
<< (ToImpl->getPropertyImplementation()
== ObjCPropertyImplDecl::Dynamic);
Importer.FromDiag(D->getLocation(),
diag::note_odr_objc_property_impl_kind)
<< D->getPropertyDecl()->getDeclName()
<< (D->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic);
return nullptr;
}
// For @synthesize, check that we have the same
if (D->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize &&
Ivar != ToImpl->getPropertyIvarDecl()) {
Importer.ToDiag(ToImpl->getPropertyIvarDeclLoc(),
diag::err_odr_objc_synthesize_ivar_inconsistent)
<< Property->getDeclName()
<< ToImpl->getPropertyIvarDecl()->getDeclName()
<< Ivar->getDeclName();
Importer.FromDiag(D->getPropertyIvarDeclLoc(),
diag::note_odr_objc_synthesize_ivar_here)
<< D->getPropertyIvarDecl()->getDeclName();
return nullptr;
}
// Merge the existing implementation with the new implementation.
Importer.Imported(D, ToImpl);
}
return ToImpl;
}
Decl *ASTNodeImporter::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
// For template arguments, we adopt the translation unit as our declaration
// context. This context will be fixed when the actual template declaration
// is created.
// FIXME: Import default argument.
return TemplateTypeParmDecl::Create(Importer.getToContext(),
Importer.getToContext().getTranslationUnitDecl(),
Importer.Import(D->getLocStart()),
Importer.Import(D->getLocation()),
D->getDepth(),
D->getIndex(),
Importer.Import(D->getIdentifier()),
D->wasDeclaredWithTypename(),
D->isParameterPack());
}
Decl *
ASTNodeImporter::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
// Import the name of this declaration.
DeclarationName Name = Importer.Import(D->getDeclName());
if (D->getDeclName() && !Name)
return nullptr;
// Import the location of this declaration.
SourceLocation Loc = Importer.Import(D->getLocation());
// Import the type of this declaration.
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
// Import type-source information.
TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
if (D->getTypeSourceInfo() && !TInfo)
return nullptr;
// FIXME: Import default argument.
return NonTypeTemplateParmDecl::Create(Importer.getToContext(),
Importer.getToContext().getTranslationUnitDecl(),
Importer.Import(D->getInnerLocStart()),
Loc, D->getDepth(), D->getPosition(),
Name.getAsIdentifierInfo(),
T, D->isParameterPack(), TInfo);
}
Decl *
ASTNodeImporter::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
// Import the name of this declaration.
DeclarationName Name = Importer.Import(D->getDeclName());
if (D->getDeclName() && !Name)
return nullptr;
// Import the location of this declaration.
SourceLocation Loc = Importer.Import(D->getLocation());
// Import template parameters.
TemplateParameterList *TemplateParams
= ImportTemplateParameterList(D->getTemplateParameters());
if (!TemplateParams)
return nullptr;
// FIXME: Import default argument.
return TemplateTemplateParmDecl::Create(Importer.getToContext(),
Importer.getToContext().getTranslationUnitDecl(),
Loc, D->getDepth(), D->getPosition(),
D->isParameterPack(),
Name.getAsIdentifierInfo(),
TemplateParams);
}
Decl *ASTNodeImporter::VisitClassTemplateDecl(ClassTemplateDecl *D) {
// If this record has a definition in the translation unit we're coming from,
// but this particular declaration is not that definition, import the
// definition and map to that.
CXXRecordDecl *Definition
= cast_or_null<CXXRecordDecl>(D->getTemplatedDecl()->getDefinition());
if (Definition && Definition != D->getTemplatedDecl()) {
Decl *ImportedDef
= Importer.Import(Definition->getDescribedClassTemplate());
if (!ImportedDef)
return nullptr;
return Importer.Imported(D, ImportedDef);
}
// Import the major distinguishing characteristics of this class template.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// We may already have a template of the same name; try to find and match it.
if (!DC->isFunctionOrMethod()) {
SmallVector<NamedDecl *, 4> ConflictingDecls;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(Decl::IDNS_Ordinary))
continue;
Decl *Found = FoundDecls[I];
if (ClassTemplateDecl *FoundTemplate
= dyn_cast<ClassTemplateDecl>(Found)) {
if (IsStructuralMatch(D, FoundTemplate)) {
// The class templates structurally match; call it the same template.
// FIXME: We may be filling in a forward declaration here. Handle
// this case!
Importer.Imported(D->getTemplatedDecl(),
FoundTemplate->getTemplatedDecl());
return Importer.Imported(D, FoundTemplate);
}
}
ConflictingDecls.push_back(FoundDecls[I]);
}
if (!ConflictingDecls.empty()) {
Name = Importer.HandleNameConflict(Name, DC, Decl::IDNS_Ordinary,
ConflictingDecls.data(),
ConflictingDecls.size());
}
if (!Name)
return nullptr;
}
CXXRecordDecl *DTemplated = D->getTemplatedDecl();
// Create the declaration that is being templated.
// Create the declaration that is being templated.
CXXRecordDecl *D2Templated = cast_or_null<CXXRecordDecl>(
Importer.Import(DTemplated));
if (!D2Templated)
return nullptr;
// Resolve possible cyclic import.
if (Decl *AlreadyImported = Importer.GetAlreadyImportedOrNull(D))
return AlreadyImported;
// Create the class template declaration itself.
TemplateParameterList *TemplateParams
= ImportTemplateParameterList(D->getTemplateParameters());
if (!TemplateParams)
return nullptr;
ClassTemplateDecl *D2 = ClassTemplateDecl::Create(Importer.getToContext(), DC,
Loc, Name, TemplateParams,
D2Templated);
D2Templated->setDescribedClassTemplate(D2);
D2->setAccess(D->getAccess());
D2->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(D2);
// Note the relationship between the class templates.
Importer.Imported(D, D2);
Importer.Imported(DTemplated, D2Templated);
if (DTemplated->isCompleteDefinition() &&
!D2Templated->isCompleteDefinition()) {
// FIXME: Import definition!
}
return D2;
}
Decl *ASTNodeImporter::VisitClassTemplateSpecializationDecl(
ClassTemplateSpecializationDecl *D) {
// If this record has a definition in the translation unit we're coming from,
// but this particular declaration is not that definition, import the
// definition and map to that.
TagDecl *Definition = D->getDefinition();
if (Definition && Definition != D) {
Decl *ImportedDef = Importer.Import(Definition);
if (!ImportedDef)
return nullptr;
return Importer.Imported(D, ImportedDef);
}
ClassTemplateDecl *ClassTemplate
= cast_or_null<ClassTemplateDecl>(Importer.Import(
D->getSpecializedTemplate()));
if (!ClassTemplate)
return nullptr;
// Import the context of this declaration.
DeclContext *DC = ClassTemplate->getDeclContext();
if (!DC)
return nullptr;
DeclContext *LexicalDC = DC;
if (D->getDeclContext() != D->getLexicalDeclContext()) {
LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
if (!LexicalDC)
return nullptr;
}
// Import the location of this declaration.
SourceLocation StartLoc = Importer.Import(D->getLocStart());
SourceLocation IdLoc = Importer.Import(D->getLocation());
// Import template arguments.
SmallVector<TemplateArgument, 2> TemplateArgs;
if (ImportTemplateArguments(D->getTemplateArgs().data(),
D->getTemplateArgs().size(),
TemplateArgs))
return nullptr;
// Try to find an existing specialization with these template arguments.
void *InsertPos = nullptr;
ClassTemplateSpecializationDecl *D2
= ClassTemplate->findSpecialization(TemplateArgs, InsertPos);
if (D2) {
// We already have a class template specialization with these template
// arguments.
// FIXME: Check for specialization vs. instantiation errors.
if (RecordDecl *FoundDef = D2->getDefinition()) {
if (!D->isCompleteDefinition() || IsStructuralMatch(D, FoundDef)) {
// The record types structurally match, or the "from" translation
// unit only had a forward declaration anyway; call it the same
// function.
return Importer.Imported(D, FoundDef);
}
}
} else {
// Create a new specialization.
if (ClassTemplatePartialSpecializationDecl *PartialSpec =
dyn_cast<ClassTemplatePartialSpecializationDecl>(D)) {
// Import TemplateArgumentListInfo
TemplateArgumentListInfo ToTAInfo;
auto &ASTTemplateArgs = *PartialSpec->getTemplateArgsAsWritten();
for (unsigned I = 0, E = ASTTemplateArgs.NumTemplateArgs; I < E; ++I) {
bool Error = false;
auto ToLoc = ImportTemplateArgumentLoc(ASTTemplateArgs[I], Error);
if (Error)
return nullptr;
ToTAInfo.addArgument(ToLoc);
}
QualType CanonInjType = Importer.Import(
PartialSpec->getInjectedSpecializationType());
if (CanonInjType.isNull())
return nullptr;
CanonInjType = CanonInjType.getCanonicalType();
TemplateParameterList *ToTPList = ImportTemplateParameterList(
PartialSpec->getTemplateParameters());
if (!ToTPList && PartialSpec->getTemplateParameters())
return nullptr;
D2 = ClassTemplatePartialSpecializationDecl::Create(
Importer.getToContext(), D->getTagKind(), DC, StartLoc, IdLoc,
ToTPList, ClassTemplate,
llvm::makeArrayRef(TemplateArgs.data(), TemplateArgs.size()),
ToTAInfo, CanonInjType, nullptr);
} else {
D2 = ClassTemplateSpecializationDecl::Create(Importer.getToContext(),
D->getTagKind(), DC,
StartLoc, IdLoc,
ClassTemplate,
TemplateArgs,
/*PrevDecl=*/nullptr);
}
D2->setSpecializationKind(D->getSpecializationKind());
// Add this specialization to the class template.
ClassTemplate->AddSpecialization(D2, InsertPos);
// Import the qualifier, if any.
D2->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
Importer.Imported(D, D2);
if (auto *TSI = D->getTypeAsWritten()) {
TypeSourceInfo *TInfo = Importer.Import(TSI);
if (!TInfo)
return nullptr;
D2->setTypeAsWritten(TInfo);
D2->setTemplateKeywordLoc(Importer.Import(D->getTemplateKeywordLoc()));
D2->setExternLoc(Importer.Import(D->getExternLoc()));
}
SourceLocation POI = Importer.Import(D->getPointOfInstantiation());
if (POI.isValid())
D2->setPointOfInstantiation(POI);
else if (D->getPointOfInstantiation().isValid())
return nullptr;
D2->setTemplateSpecializationKind(D->getTemplateSpecializationKind());
// Add the specialization to this context.
D2->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(D2);
}
Importer.Imported(D, D2);
if (D->isCompleteDefinition() && ImportDefinition(D, D2))
return nullptr;
return D2;
}
Decl *ASTNodeImporter::VisitVarTemplateDecl(VarTemplateDecl *D) {
// If this variable has a definition in the translation unit we're coming
// from,
// but this particular declaration is not that definition, import the
// definition and map to that.
VarDecl *Definition =
cast_or_null<VarDecl>(D->getTemplatedDecl()->getDefinition());
if (Definition && Definition != D->getTemplatedDecl()) {
Decl *ImportedDef = Importer.Import(Definition->getDescribedVarTemplate());
if (!ImportedDef)
return nullptr;
return Importer.Imported(D, ImportedDef);
}
// Import the major distinguishing characteristics of this variable template.
DeclContext *DC, *LexicalDC;
DeclarationName Name;
SourceLocation Loc;
NamedDecl *ToD;
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
return nullptr;
if (ToD)
return ToD;
// We may already have a template of the same name; try to find and match it.
assert(!DC->isFunctionOrMethod() &&
"Variable templates cannot be declared at function scope");
SmallVector<NamedDecl *, 4> ConflictingDecls;
SmallVector<NamedDecl *, 2> FoundDecls;
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
if (!FoundDecls[I]->isInIdentifierNamespace(Decl::IDNS_Ordinary))
continue;
Decl *Found = FoundDecls[I];
if (VarTemplateDecl *FoundTemplate = dyn_cast<VarTemplateDecl>(Found)) {
if (IsStructuralMatch(D, FoundTemplate)) {
// The variable templates structurally match; call it the same template.
Importer.Imported(D->getTemplatedDecl(),
FoundTemplate->getTemplatedDecl());
return Importer.Imported(D, FoundTemplate);
}
}
ConflictingDecls.push_back(FoundDecls[I]);
}
if (!ConflictingDecls.empty()) {
Name = Importer.HandleNameConflict(Name, DC, Decl::IDNS_Ordinary,
ConflictingDecls.data(),
ConflictingDecls.size());
}
if (!Name)
return nullptr;
VarDecl *DTemplated = D->getTemplatedDecl();
// Import the type.
QualType T = Importer.Import(DTemplated->getType());
if (T.isNull())
return nullptr;
// Create the declaration that is being templated.
SourceLocation StartLoc = Importer.Import(DTemplated->getLocStart());
SourceLocation IdLoc = Importer.Import(DTemplated->getLocation());
TypeSourceInfo *TInfo = Importer.Import(DTemplated->getTypeSourceInfo());
VarDecl *D2Templated = VarDecl::Create(Importer.getToContext(), DC, StartLoc,
IdLoc, Name.getAsIdentifierInfo(), T,
TInfo, DTemplated->getStorageClass());
D2Templated->setAccess(DTemplated->getAccess());
D2Templated->setQualifierInfo(Importer.Import(DTemplated->getQualifierLoc()));
D2Templated->setLexicalDeclContext(LexicalDC);
// Importer.Imported(DTemplated, D2Templated);
// LexicalDC->addDeclInternal(D2Templated);
// Merge the initializer.
if (ImportDefinition(DTemplated, D2Templated))
return nullptr;
// Create the variable template declaration itself.
TemplateParameterList *TemplateParams =
ImportTemplateParameterList(D->getTemplateParameters());
if (!TemplateParams)
return nullptr;
VarTemplateDecl *D2 = VarTemplateDecl::Create(
Importer.getToContext(), DC, Loc, Name, TemplateParams, D2Templated);
D2Templated->setDescribedVarTemplate(D2);
D2->setAccess(D->getAccess());
D2->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(D2);
// Note the relationship between the variable templates.
Importer.Imported(D, D2);
Importer.Imported(DTemplated, D2Templated);
if (DTemplated->isThisDeclarationADefinition() &&
!D2Templated->isThisDeclarationADefinition()) {
// FIXME: Import definition!
}
return D2;
}
Decl *ASTNodeImporter::VisitVarTemplateSpecializationDecl(
VarTemplateSpecializationDecl *D) {
// If this record has a definition in the translation unit we're coming from,
// but this particular declaration is not that definition, import the
// definition and map to that.
VarDecl *Definition = D->getDefinition();
if (Definition && Definition != D) {
Decl *ImportedDef = Importer.Import(Definition);
if (!ImportedDef)
return nullptr;
return Importer.Imported(D, ImportedDef);
}
VarTemplateDecl *VarTemplate = cast_or_null<VarTemplateDecl>(
Importer.Import(D->getSpecializedTemplate()));
if (!VarTemplate)
return nullptr;
// Import the context of this declaration.
DeclContext *DC = VarTemplate->getDeclContext();
if (!DC)
return nullptr;
DeclContext *LexicalDC = DC;
if (D->getDeclContext() != D->getLexicalDeclContext()) {
LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
if (!LexicalDC)
return nullptr;
}
// Import the location of this declaration.
SourceLocation StartLoc = Importer.Import(D->getLocStart());
SourceLocation IdLoc = Importer.Import(D->getLocation());
// Import template arguments.
SmallVector<TemplateArgument, 2> TemplateArgs;
if (ImportTemplateArguments(D->getTemplateArgs().data(),
D->getTemplateArgs().size(), TemplateArgs))
return nullptr;
// Try to find an existing specialization with these template arguments.
void *InsertPos = nullptr;
VarTemplateSpecializationDecl *D2 = VarTemplate->findSpecialization(
TemplateArgs, InsertPos);
if (D2) {
// We already have a variable template specialization with these template
// arguments.
// FIXME: Check for specialization vs. instantiation errors.
if (VarDecl *FoundDef = D2->getDefinition()) {
if (!D->isThisDeclarationADefinition() ||
IsStructuralMatch(D, FoundDef)) {
// The record types structurally match, or the "from" translation
// unit only had a forward declaration anyway; call it the same
// variable.
return Importer.Imported(D, FoundDef);
}
}
} else {
// Import the type.
QualType T = Importer.Import(D->getType());
if (T.isNull())
return nullptr;
TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
// Create a new specialization.
D2 = VarTemplateSpecializationDecl::Create(
Importer.getToContext(), DC, StartLoc, IdLoc, VarTemplate, T, TInfo,
D->getStorageClass(), TemplateArgs);
D2->setSpecializationKind(D->getSpecializationKind());
D2->setTemplateArgsInfo(D->getTemplateArgsInfo());
// Add this specialization to the class template.
VarTemplate->AddSpecialization(D2, InsertPos);
// Import the qualifier, if any.
D2->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
// Add the specialization to this context.
D2->setLexicalDeclContext(LexicalDC);
LexicalDC->addDeclInternal(D2);
}
Importer.Imported(D, D2);
if (D->isThisDeclarationADefinition() && ImportDefinition(D, D2))
return nullptr;
return D2;
}
//----------------------------------------------------------------------------
// Import Statements
//----------------------------------------------------------------------------
DeclGroupRef ASTNodeImporter::ImportDeclGroup(DeclGroupRef DG) {
if (DG.isNull())
return DeclGroupRef::Create(Importer.getToContext(), nullptr, 0);
size_t NumDecls = DG.end() - DG.begin();
SmallVector<Decl *, 1> ToDecls(NumDecls);
auto &_Importer = this->Importer;
std::transform(DG.begin(), DG.end(), ToDecls.begin(),
[&_Importer](Decl *D) -> Decl * {
return _Importer.Import(D);
});
return DeclGroupRef::Create(Importer.getToContext(),
ToDecls.begin(),
NumDecls);
}
Stmt *ASTNodeImporter::VisitStmt(Stmt *S) {
Importer.FromDiag(S->getLocStart(), diag::err_unsupported_ast_node)
<< S->getStmtClassName();
return nullptr;
}
Stmt *ASTNodeImporter::VisitGCCAsmStmt(GCCAsmStmt *S) {
SmallVector<IdentifierInfo *, 4> Names;
for (unsigned I = 0, E = S->getNumOutputs(); I != E; I++) {
IdentifierInfo *ToII = Importer.Import(S->getOutputIdentifier(I));
// ToII is nullptr when no symbolic name is given for output operand
// see ParseStmtAsm::ParseAsmOperandsOpt
if (!ToII && S->getOutputIdentifier(I))
return nullptr;
Names.push_back(ToII);
}
for (unsigned I = 0, E = S->getNumInputs(); I != E; I++) {
IdentifierInfo *ToII = Importer.Import(S->getInputIdentifier(I));
// ToII is nullptr when no symbolic name is given for input operand
// see ParseStmtAsm::ParseAsmOperandsOpt
if (!ToII && S->getInputIdentifier(I))
return nullptr;
Names.push_back(ToII);
}
SmallVector<StringLiteral *, 4> Clobbers;
for (unsigned I = 0, E = S->getNumClobbers(); I != E; I++) {
StringLiteral *Clobber = cast_or_null<StringLiteral>(
Importer.Import(S->getClobberStringLiteral(I)));
if (!Clobber)
return nullptr;
Clobbers.push_back(Clobber);
}
SmallVector<StringLiteral *, 4> Constraints;
for (unsigned I = 0, E = S->getNumOutputs(); I != E; I++) {
StringLiteral *Output = cast_or_null<StringLiteral>(
Importer.Import(S->getOutputConstraintLiteral(I)));
if (!Output)
return nullptr;
Constraints.push_back(Output);
}
for (unsigned I = 0, E = S->getNumInputs(); I != E; I++) {
StringLiteral *Input = cast_or_null<StringLiteral>(
Importer.Import(S->getInputConstraintLiteral(I)));
if (!Input)
return nullptr;
Constraints.push_back(Input);
}
SmallVector<Expr *, 4> Exprs(S->getNumOutputs() + S->getNumInputs());
if (ImportContainerChecked(S->outputs(), Exprs))
return nullptr;
if (ImportArrayChecked(S->inputs(), Exprs.begin() + S->getNumOutputs()))
return nullptr;
StringLiteral *AsmStr = cast_or_null<StringLiteral>(
Importer.Import(S->getAsmString()));
if (!AsmStr)
return nullptr;
return new (Importer.getToContext()) GCCAsmStmt(
Importer.getToContext(),
Importer.Import(S->getAsmLoc()),
S->isSimple(),
S->isVolatile(),
S->getNumOutputs(),
S->getNumInputs(),
Names.data(),
Constraints.data(),
Exprs.data(),
AsmStr,
S->getNumClobbers(),
Clobbers.data(),
Importer.Import(S->getRParenLoc()));
}
Stmt *ASTNodeImporter::VisitDeclStmt(DeclStmt *S) {
DeclGroupRef ToDG = ImportDeclGroup(S->getDeclGroup());
for (Decl *ToD : ToDG) {
if (!ToD)
return nullptr;
}
SourceLocation ToStartLoc = Importer.Import(S->getStartLoc());
SourceLocation ToEndLoc = Importer.Import(S->getEndLoc());
return new (Importer.getToContext()) DeclStmt(ToDG, ToStartLoc, ToEndLoc);
}
Stmt *ASTNodeImporter::VisitNullStmt(NullStmt *S) {
SourceLocation ToSemiLoc = Importer.Import(S->getSemiLoc());
return new (Importer.getToContext()) NullStmt(ToSemiLoc,
S->hasLeadingEmptyMacro());
}
Stmt *ASTNodeImporter::VisitCompoundStmt(CompoundStmt *S) {
llvm::SmallVector<Stmt *, 8> ToStmts(S->size());
if (ImportContainerChecked(S->body(), ToStmts))
return nullptr;
SourceLocation ToLBraceLoc = Importer.Import(S->getLBracLoc());
SourceLocation ToRBraceLoc = Importer.Import(S->getRBracLoc());
return new (Importer.getToContext()) CompoundStmt(Importer.getToContext(),
ToStmts,
ToLBraceLoc, ToRBraceLoc);
}
Stmt *ASTNodeImporter::VisitCaseStmt(CaseStmt *S) {
Expr *ToLHS = Importer.Import(S->getLHS());
if (!ToLHS)
return nullptr;
Expr *ToRHS = Importer.Import(S->getRHS());
if (!ToRHS && S->getRHS())
return nullptr;
SourceLocation ToCaseLoc = Importer.Import(S->getCaseLoc());
SourceLocation ToEllipsisLoc = Importer.Import(S->getEllipsisLoc());
SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
return new (Importer.getToContext()) CaseStmt(ToLHS, ToRHS,
ToCaseLoc, ToEllipsisLoc,
ToColonLoc);
}
Stmt *ASTNodeImporter::VisitDefaultStmt(DefaultStmt *S) {
SourceLocation ToDefaultLoc = Importer.Import(S->getDefaultLoc());
SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
if (!ToSubStmt && S->getSubStmt())
return nullptr;
return new (Importer.getToContext()) DefaultStmt(ToDefaultLoc, ToColonLoc,
ToSubStmt);
}
Stmt *ASTNodeImporter::VisitLabelStmt(LabelStmt *S) {
SourceLocation ToIdentLoc = Importer.Import(S->getIdentLoc());
LabelDecl *ToLabelDecl =
cast_or_null<LabelDecl>(Importer.Import(S->getDecl()));
if (!ToLabelDecl && S->getDecl())
return nullptr;
Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
if (!ToSubStmt && S->getSubStmt())
return nullptr;
return new (Importer.getToContext()) LabelStmt(ToIdentLoc, ToLabelDecl,
ToSubStmt);
}
Stmt *ASTNodeImporter::VisitAttributedStmt(AttributedStmt *S) {
SourceLocation ToAttrLoc = Importer.Import(S->getAttrLoc());
ArrayRef<const Attr*> FromAttrs(S->getAttrs());
SmallVector<const Attr *, 1> ToAttrs(FromAttrs.size());
ASTContext &_ToContext = Importer.getToContext();
std::transform(FromAttrs.begin(), FromAttrs.end(), ToAttrs.begin(),
[&_ToContext](const Attr *A) -> const Attr * {
return A->clone(_ToContext);
});
for (const Attr *ToA : ToAttrs) {
if (!ToA)
return nullptr;
}
Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
if (!ToSubStmt && S->getSubStmt())
return nullptr;
return AttributedStmt::Create(Importer.getToContext(), ToAttrLoc,
ToAttrs, ToSubStmt);
}
Stmt *ASTNodeImporter::VisitIfStmt(IfStmt *S) {
SourceLocation ToIfLoc = Importer.Import(S->getIfLoc());
Stmt *ToInit = Importer.Import(S->getInit());
if (!ToInit && S->getInit())
return nullptr;
VarDecl *ToConditionVariable = nullptr;
if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
ToConditionVariable =
dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
if (!ToConditionVariable)
return nullptr;
}
Expr *ToCondition = Importer.Import(S->getCond());
if (!ToCondition && S->getCond())
return nullptr;
Stmt *ToThenStmt = Importer.Import(S->getThen());
if (!ToThenStmt && S->getThen())
return nullptr;
SourceLocation ToElseLoc = Importer.Import(S->getElseLoc());
Stmt *ToElseStmt = Importer.Import(S->getElse());
if (!ToElseStmt && S->getElse())
return nullptr;
return new (Importer.getToContext()) IfStmt(Importer.getToContext(),
ToIfLoc, S->isConstexpr(),
ToInit,
ToConditionVariable,
ToCondition, ToThenStmt,
ToElseLoc, ToElseStmt);
}
Stmt *ASTNodeImporter::VisitSwitchStmt(SwitchStmt *S) {
Stmt *ToInit = Importer.Import(S->getInit());
if (!ToInit && S->getInit())
return nullptr;
VarDecl *ToConditionVariable = nullptr;
if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
ToConditionVariable =
dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
if (!ToConditionVariable)
return nullptr;
}
Expr *ToCondition = Importer.Import(S->getCond());
if (!ToCondition && S->getCond())
return nullptr;
SwitchStmt *ToStmt = new (Importer.getToContext()) SwitchStmt(
Importer.getToContext(), ToInit,
ToConditionVariable, ToCondition);
Stmt *ToBody = Importer.Import(S->getBody());
if (!ToBody && S->getBody())
return nullptr;
ToStmt->setBody(ToBody);
ToStmt->setSwitchLoc(Importer.Import(S->getSwitchLoc()));
// Now we have to re-chain the cases.
SwitchCase *LastChainedSwitchCase = nullptr;
for (SwitchCase *SC = S->getSwitchCaseList(); SC != nullptr;
SC = SC->getNextSwitchCase()) {
SwitchCase *ToSC = dyn_cast_or_null<SwitchCase>(Importer.Import(SC));
if (!ToSC)
return nullptr;
if (LastChainedSwitchCase)
LastChainedSwitchCase->setNextSwitchCase(ToSC);
else
ToStmt->setSwitchCaseList(ToSC);
LastChainedSwitchCase = ToSC;
}
return ToStmt;
}
Stmt *ASTNodeImporter::VisitWhileStmt(WhileStmt *S) {
VarDecl *ToConditionVariable = nullptr;
if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
ToConditionVariable =
dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
if (!ToConditionVariable)
return nullptr;
}
Expr *ToCondition = Importer.Import(S->getCond());
if (!ToCondition && S->getCond())
return nullptr;
Stmt *ToBody = Importer.Import(S->getBody());
if (!ToBody && S->getBody())
return nullptr;
SourceLocation ToWhileLoc = Importer.Import(S->getWhileLoc());
return new (Importer.getToContext()) WhileStmt(Importer.getToContext(),
ToConditionVariable,
ToCondition, ToBody,
ToWhileLoc);
}
Stmt *ASTNodeImporter::VisitDoStmt(DoStmt *S) {
Stmt *ToBody = Importer.Import(S->getBody());
if (!ToBody && S->getBody())
return nullptr;
Expr *ToCondition = Importer.Import(S->getCond());
if (!ToCondition && S->getCond())
return nullptr;
SourceLocation ToDoLoc = Importer.Import(S->getDoLoc());
SourceLocation ToWhileLoc = Importer.Import(S->getWhileLoc());
SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
return new (Importer.getToContext()) DoStmt(ToBody, ToCondition,
ToDoLoc, ToWhileLoc,
ToRParenLoc);
}
Stmt *ASTNodeImporter::VisitForStmt(ForStmt *S) {
Stmt *ToInit = Importer.Import(S->getInit());
if (!ToInit && S->getInit())
return nullptr;
Expr *ToCondition = Importer.Import(S->getCond());
if (!ToCondition && S->getCond())
return nullptr;
VarDecl *ToConditionVariable = nullptr;
if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
ToConditionVariable =
dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
if (!ToConditionVariable)
return nullptr;
}
Expr *ToInc = Importer.Import(S->getInc());
if (!ToInc && S->getInc())
return nullptr;
Stmt *ToBody = Importer.Import(S->getBody());
if (!ToBody && S->getBody())
return nullptr;
SourceLocation ToForLoc = Importer.Import(S->getForLoc());
SourceLocation ToLParenLoc = Importer.Import(S->getLParenLoc());
SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
return new (Importer.getToContext()) ForStmt(Importer.getToContext(),
ToInit, ToCondition,
ToConditionVariable,
ToInc, ToBody,
ToForLoc, ToLParenLoc,
ToRParenLoc);
}
Stmt *ASTNodeImporter::VisitGotoStmt(GotoStmt *S) {
LabelDecl *ToLabel = nullptr;
if (LabelDecl *FromLabel = S->getLabel()) {
ToLabel = dyn_cast_or_null<LabelDecl>(Importer.Import(FromLabel));
if (!ToLabel)
return nullptr;
}
SourceLocation ToGotoLoc = Importer.Import(S->getGotoLoc());
SourceLocation ToLabelLoc = Importer.Import(S->getLabelLoc());
return new (Importer.getToContext()) GotoStmt(ToLabel,
ToGotoLoc, ToLabelLoc);
}
Stmt *ASTNodeImporter::VisitIndirectGotoStmt(IndirectGotoStmt *S) {
SourceLocation ToGotoLoc = Importer.Import(S->getGotoLoc());
SourceLocation ToStarLoc = Importer.Import(S->getStarLoc());
Expr *ToTarget = Importer.Import(S->getTarget());
if (!ToTarget && S->getTarget())
return nullptr;
return new (Importer.getToContext()) IndirectGotoStmt(ToGotoLoc, ToStarLoc,
ToTarget);
}
Stmt *ASTNodeImporter::VisitContinueStmt(ContinueStmt *S) {
SourceLocation ToContinueLoc = Importer.Import(S->getContinueLoc());
return new (Importer.getToContext()) ContinueStmt(ToContinueLoc);
}
Stmt *ASTNodeImporter::VisitBreakStmt(BreakStmt *S) {
SourceLocation ToBreakLoc = Importer.Import(S->getBreakLoc());
return new (Importer.getToContext()) BreakStmt(ToBreakLoc);
}
Stmt *ASTNodeImporter::VisitReturnStmt(ReturnStmt *S) {
SourceLocation ToRetLoc = Importer.Import(S->getReturnLoc());
Expr *ToRetExpr = Importer.Import(S->getRetValue());
if (!ToRetExpr && S->getRetValue())
return nullptr;
VarDecl *NRVOCandidate = const_cast<VarDecl*>(S->getNRVOCandidate());
VarDecl *ToNRVOCandidate = cast_or_null<VarDecl>(Importer.Import(NRVOCandidate));
if (!ToNRVOCandidate && NRVOCandidate)
return nullptr;
return new (Importer.getToContext()) ReturnStmt(ToRetLoc, ToRetExpr,
ToNRVOCandidate);
}
Stmt *ASTNodeImporter::VisitCXXCatchStmt(CXXCatchStmt *S) {
SourceLocation ToCatchLoc = Importer.Import(S->getCatchLoc());
VarDecl *ToExceptionDecl = nullptr;
if (VarDecl *FromExceptionDecl = S->getExceptionDecl()) {
ToExceptionDecl =
dyn_cast_or_null<VarDecl>(Importer.Import(FromExceptionDecl));
if (!ToExceptionDecl)
return nullptr;
}
Stmt *ToHandlerBlock = Importer.Import(S->getHandlerBlock());
if (!ToHandlerBlock && S->getHandlerBlock())
return nullptr;
return new (Importer.getToContext()) CXXCatchStmt(ToCatchLoc,
ToExceptionDecl,
ToHandlerBlock);
}
Stmt *ASTNodeImporter::VisitCXXTryStmt(CXXTryStmt *S) {
SourceLocation ToTryLoc = Importer.Import(S->getTryLoc());
Stmt *ToTryBlock = Importer.Import(S->getTryBlock());
if (!ToTryBlock && S->getTryBlock())
return nullptr;
SmallVector<Stmt *, 1> ToHandlers(S->getNumHandlers());
for (unsigned HI = 0, HE = S->getNumHandlers(); HI != HE; ++HI) {
CXXCatchStmt *FromHandler = S->getHandler(HI);
if (Stmt *ToHandler = Importer.Import(FromHandler))
ToHandlers[HI] = ToHandler;
else
return nullptr;
}
return CXXTryStmt::Create(Importer.getToContext(), ToTryLoc, ToTryBlock,
ToHandlers);
}
Stmt *ASTNodeImporter::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
DeclStmt *ToRange =
dyn_cast_or_null<DeclStmt>(Importer.Import(S->getRangeStmt()));
if (!ToRange && S->getRangeStmt())
return nullptr;
DeclStmt *ToBegin =
dyn_cast_or_null<DeclStmt>(Importer.Import(S->getBeginStmt()));
if (!ToBegin && S->getBeginStmt())
return nullptr;
DeclStmt *ToEnd =
dyn_cast_or_null<DeclStmt>(Importer.Import(S->getEndStmt()));
if (!ToEnd && S->getEndStmt())
return nullptr;
Expr *ToCond = Importer.Import(S->getCond());
if (!ToCond && S->getCond())
return nullptr;
Expr *ToInc = Importer.Import(S->getInc());
if (!ToInc && S->getInc())
return nullptr;
DeclStmt *ToLoopVar =
dyn_cast_or_null<DeclStmt>(Importer.Import(S->getLoopVarStmt()));
if (!ToLoopVar && S->getLoopVarStmt())
return nullptr;
Stmt *ToBody = Importer.Import(S->getBody());
if (!ToBody && S->getBody())
return nullptr;
SourceLocation ToForLoc = Importer.Import(S->getForLoc());
SourceLocation ToCoawaitLoc = Importer.Import(S->getCoawaitLoc());
SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
return new (Importer.getToContext()) CXXForRangeStmt(ToRange, ToBegin, ToEnd,
ToCond, ToInc,
ToLoopVar, ToBody,
ToForLoc, ToCoawaitLoc,
ToColonLoc, ToRParenLoc);
}
Stmt *ASTNodeImporter::VisitObjCForCollectionStmt(ObjCForCollectionStmt *S) {
Stmt *ToElem = Importer.Import(S->getElement());
if (!ToElem && S->getElement())
return nullptr;
Expr *ToCollect = Importer.Import(S->getCollection());
if (!ToCollect && S->getCollection())
return nullptr;
Stmt *ToBody = Importer.Import(S->getBody());
if (!ToBody && S->getBody())
return nullptr;
SourceLocation ToForLoc = Importer.Import(S->getForLoc());
SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
return new (Importer.getToContext()) ObjCForCollectionStmt(ToElem,
ToCollect,
ToBody, ToForLoc,
ToRParenLoc);
}
Stmt *ASTNodeImporter::VisitObjCAtCatchStmt(ObjCAtCatchStmt *S) {
SourceLocation ToAtCatchLoc = Importer.Import(S->getAtCatchLoc());
SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
VarDecl *ToExceptionDecl = nullptr;
if (VarDecl *FromExceptionDecl = S->getCatchParamDecl()) {
ToExceptionDecl =
dyn_cast_or_null<VarDecl>(Importer.Import(FromExceptionDecl));
if (!ToExceptionDecl)
return nullptr;
}
Stmt *ToBody = Importer.Import(S->getCatchBody());
if (!ToBody && S->getCatchBody())
return nullptr;
return new (Importer.getToContext()) ObjCAtCatchStmt(ToAtCatchLoc,
ToRParenLoc,
ToExceptionDecl,
ToBody);
}
Stmt *ASTNodeImporter::VisitObjCAtFinallyStmt(ObjCAtFinallyStmt *S) {
SourceLocation ToAtFinallyLoc = Importer.Import(S->getAtFinallyLoc());
Stmt *ToAtFinallyStmt = Importer.Import(S->getFinallyBody());
if (!ToAtFinallyStmt && S->getFinallyBody())
return nullptr;
return new (Importer.getToContext()) ObjCAtFinallyStmt(ToAtFinallyLoc,
ToAtFinallyStmt);
}
Stmt *ASTNodeImporter::VisitObjCAtTryStmt(ObjCAtTryStmt *S) {
SourceLocation ToAtTryLoc = Importer.Import(S->getAtTryLoc());
Stmt *ToAtTryStmt = Importer.Import(S->getTryBody());
if (!ToAtTryStmt && S->getTryBody())
return nullptr;
SmallVector<Stmt *, 1> ToCatchStmts(S->getNumCatchStmts());
for (unsigned CI = 0, CE = S->getNumCatchStmts(); CI != CE; ++CI) {
ObjCAtCatchStmt *FromCatchStmt = S->getCatchStmt(CI);
if (Stmt *ToCatchStmt = Importer.Import(FromCatchStmt))
ToCatchStmts[CI] = ToCatchStmt;
else
return nullptr;
}
Stmt *ToAtFinallyStmt = Importer.Import(S->getFinallyStmt());
if (!ToAtFinallyStmt && S->getFinallyStmt())
return nullptr;
return ObjCAtTryStmt::Create(Importer.getToContext(),
ToAtTryLoc, ToAtTryStmt,
ToCatchStmts.begin(), ToCatchStmts.size(),
ToAtFinallyStmt);
}
Stmt *ASTNodeImporter::VisitObjCAtSynchronizedStmt
(ObjCAtSynchronizedStmt *S) {
SourceLocation ToAtSynchronizedLoc =
Importer.Import(S->getAtSynchronizedLoc());
Expr *ToSynchExpr = Importer.Import(S->getSynchExpr());
if (!ToSynchExpr && S->getSynchExpr())
return nullptr;
Stmt *ToSynchBody = Importer.Import(S->getSynchBody());
if (!ToSynchBody && S->getSynchBody())
return nullptr;
return new (Importer.getToContext()) ObjCAtSynchronizedStmt(
ToAtSynchronizedLoc, ToSynchExpr, ToSynchBody);
}
Stmt *ASTNodeImporter::VisitObjCAtThrowStmt(ObjCAtThrowStmt *S) {
SourceLocation ToAtThrowLoc = Importer.Import(S->getThrowLoc());
Expr *ToThrow = Importer.Import(S->getThrowExpr());
if (!ToThrow && S->getThrowExpr())
return nullptr;
return new (Importer.getToContext()) ObjCAtThrowStmt(ToAtThrowLoc, ToThrow);
}
Stmt *ASTNodeImporter::VisitObjCAutoreleasePoolStmt
(ObjCAutoreleasePoolStmt *S) {
SourceLocation ToAtLoc = Importer.Import(S->getAtLoc());
Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
if (!ToSubStmt && S->getSubStmt())
return nullptr;
return new (Importer.getToContext()) ObjCAutoreleasePoolStmt(ToAtLoc,
ToSubStmt);
}
//----------------------------------------------------------------------------
// Import Expressions
//----------------------------------------------------------------------------
Expr *ASTNodeImporter::VisitExpr(Expr *E) {
Importer.FromDiag(E->getLocStart(), diag::err_unsupported_ast_node)
<< E->getStmtClassName();
return nullptr;
}
Expr *ASTNodeImporter::VisitVAArgExpr(VAArgExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *SubExpr = Importer.Import(E->getSubExpr());
if (!SubExpr && E->getSubExpr())
return nullptr;
TypeSourceInfo *TInfo = Importer.Import(E->getWrittenTypeInfo());
if (!TInfo)
return nullptr;
return new (Importer.getToContext()) VAArgExpr(
Importer.Import(E->getBuiltinLoc()), SubExpr, TInfo,
Importer.Import(E->getRParenLoc()), T, E->isMicrosoftABI());
}
Expr *ASTNodeImporter::VisitGNUNullExpr(GNUNullExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
return new (Importer.getToContext()) GNUNullExpr(
T, Importer.Import(E->getLocStart()));
}
Expr *ASTNodeImporter::VisitPredefinedExpr(PredefinedExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
StringLiteral *SL = cast_or_null<StringLiteral>(
Importer.Import(E->getFunctionName()));
if (!SL && E->getFunctionName())
return nullptr;
return new (Importer.getToContext()) PredefinedExpr(
Importer.Import(E->getLocStart()), T, E->getIdentType(), SL);
}
Expr *ASTNodeImporter::VisitDeclRefExpr(DeclRefExpr *E) {
ValueDecl *ToD = cast_or_null<ValueDecl>(Importer.Import(E->getDecl()));
if (!ToD)
return nullptr;
NamedDecl *FoundD = nullptr;
if (E->getDecl() != E->getFoundDecl()) {
FoundD = cast_or_null<NamedDecl>(Importer.Import(E->getFoundDecl()));
if (!FoundD)
return nullptr;
}
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
TemplateArgumentListInfo ToTAInfo;
TemplateArgumentListInfo *ResInfo = nullptr;
if (E->hasExplicitTemplateArgs()) {
for (const auto &FromLoc : E->template_arguments()) {
bool Error = false;
TemplateArgumentLoc ToTALoc = ImportTemplateArgumentLoc(FromLoc, Error);
if (Error)
return nullptr;
ToTAInfo.addArgument(ToTALoc);
}
ResInfo = &ToTAInfo;
}
DeclRefExpr *DRE = DeclRefExpr::Create(Importer.getToContext(),
Importer.Import(E->getQualifierLoc()),
Importer.Import(E->getTemplateKeywordLoc()),
ToD,
E->refersToEnclosingVariableOrCapture(),
Importer.Import(E->getLocation()),
T, E->getValueKind(),
FoundD, ResInfo);
if (E->hadMultipleCandidates())
DRE->setHadMultipleCandidates(true);
return DRE;
}
Expr *ASTNodeImporter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
return new (Importer.getToContext()) ImplicitValueInitExpr(T);
}
ASTNodeImporter::Designator
ASTNodeImporter::ImportDesignator(const Designator &D) {
if (D.isFieldDesignator()) {
IdentifierInfo *ToFieldName = Importer.Import(D.getFieldName());
// Caller checks for import error
return Designator(ToFieldName, Importer.Import(D.getDotLoc()),
Importer.Import(D.getFieldLoc()));
}
if (D.isArrayDesignator())
return Designator(D.getFirstExprIndex(),
Importer.Import(D.getLBracketLoc()),
Importer.Import(D.getRBracketLoc()));
assert(D.isArrayRangeDesignator());
return Designator(D.getFirstExprIndex(),
Importer.Import(D.getLBracketLoc()),
Importer.Import(D.getEllipsisLoc()),
Importer.Import(D.getRBracketLoc()));
}
Expr *ASTNodeImporter::VisitDesignatedInitExpr(DesignatedInitExpr *DIE) {
Expr *Init = cast_or_null<Expr>(Importer.Import(DIE->getInit()));
if (!Init)
return nullptr;
SmallVector<Expr *, 4> IndexExprs(DIE->getNumSubExprs() - 1);
// List elements from the second, the first is Init itself
for (unsigned I = 1, E = DIE->getNumSubExprs(); I < E; I++) {
if (Expr *Arg = cast_or_null<Expr>(Importer.Import(DIE->getSubExpr(I))))
IndexExprs[I - 1] = Arg;
else
return nullptr;
}
SmallVector<Designator, 4> Designators(DIE->size());
llvm::transform(DIE->designators(), Designators.begin(),
[this](const Designator &D) -> Designator {
return ImportDesignator(D);
});
for (const Designator &D : DIE->designators())
if (D.isFieldDesignator() && !D.getFieldName())
return nullptr;
return DesignatedInitExpr::Create(
Importer.getToContext(), Designators,
IndexExprs, Importer.Import(DIE->getEqualOrColonLoc()),
DIE->usesGNUSyntax(), Init);
}
Expr *ASTNodeImporter::VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
return new (Importer.getToContext())
CXXNullPtrLiteralExpr(T, Importer.Import(E->getLocation()));
}
Expr *ASTNodeImporter::VisitIntegerLiteral(IntegerLiteral *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
return IntegerLiteral::Create(Importer.getToContext(),
E->getValue(), T,
Importer.Import(E->getLocation()));
}
Expr *ASTNodeImporter::VisitFloatingLiteral(FloatingLiteral *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
return FloatingLiteral::Create(Importer.getToContext(),
E->getValue(), E->isExact(), T,
Importer.Import(E->getLocation()));
}
Expr *ASTNodeImporter::VisitCharacterLiteral(CharacterLiteral *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
return new (Importer.getToContext()) CharacterLiteral(E->getValue(),
E->getKind(), T,
Importer.Import(E->getLocation()));
}
Expr *ASTNodeImporter::VisitStringLiteral(StringLiteral *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
SmallVector<SourceLocation, 4> Locations(E->getNumConcatenated());
ImportArray(E->tokloc_begin(), E->tokloc_end(), Locations.begin());
return StringLiteral::Create(Importer.getToContext(), E->getBytes(),
E->getKind(), E->isPascal(), T,
Locations.data(), Locations.size());
}
Expr *ASTNodeImporter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
TypeSourceInfo *TInfo = Importer.Import(E->getTypeSourceInfo());
if (!TInfo)
return nullptr;
Expr *Init = Importer.Import(E->getInitializer());
if (!Init)
return nullptr;
return new (Importer.getToContext()) CompoundLiteralExpr(
Importer.Import(E->getLParenLoc()), TInfo, T, E->getValueKind(),
Init, E->isFileScope());
}
Expr *ASTNodeImporter::VisitAtomicExpr(AtomicExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
SmallVector<Expr *, 6> Exprs(E->getNumSubExprs());
if (ImportArrayChecked(
E->getSubExprs(), E->getSubExprs() + E->getNumSubExprs(),
Exprs.begin()))
return nullptr;
return new (Importer.getToContext()) AtomicExpr(
Importer.Import(E->getBuiltinLoc()), Exprs, T, E->getOp(),
Importer.Import(E->getRParenLoc()));
}
Expr *ASTNodeImporter::VisitAddrLabelExpr(AddrLabelExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
LabelDecl *ToLabel = cast_or_null<LabelDecl>(Importer.Import(E->getLabel()));
if (!ToLabel)
return nullptr;
return new (Importer.getToContext()) AddrLabelExpr(
Importer.Import(E->getAmpAmpLoc()), Importer.Import(E->getLabelLoc()),
ToLabel, T);
}
Expr *ASTNodeImporter::VisitParenExpr(ParenExpr *E) {
Expr *SubExpr = Importer.Import(E->getSubExpr());
if (!SubExpr)
return nullptr;
return new (Importer.getToContext())
ParenExpr(Importer.Import(E->getLParen()),
Importer.Import(E->getRParen()),
SubExpr);
}
Expr *ASTNodeImporter::VisitParenListExpr(ParenListExpr *E) {
SmallVector<Expr *, 4> Exprs(E->getNumExprs());
if (ImportContainerChecked(E->exprs(), Exprs))
return nullptr;
return new (Importer.getToContext()) ParenListExpr(
Importer.getToContext(), Importer.Import(E->getLParenLoc()),
Exprs, Importer.Import(E->getLParenLoc()));
}
Expr *ASTNodeImporter::VisitStmtExpr(StmtExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
CompoundStmt *ToSubStmt = cast_or_null<CompoundStmt>(
Importer.Import(E->getSubStmt()));
if (!ToSubStmt && E->getSubStmt())
return nullptr;
return new (Importer.getToContext()) StmtExpr(ToSubStmt, T,
Importer.Import(E->getLParenLoc()), Importer.Import(E->getRParenLoc()));
}
Expr *ASTNodeImporter::VisitUnaryOperator(UnaryOperator *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *SubExpr = Importer.Import(E->getSubExpr());
if (!SubExpr)
return nullptr;
return new (Importer.getToContext()) UnaryOperator(SubExpr, E->getOpcode(),
T, E->getValueKind(),
E->getObjectKind(),
Importer.Import(E->getOperatorLoc()));
}
Expr *ASTNodeImporter::VisitUnaryExprOrTypeTraitExpr(
UnaryExprOrTypeTraitExpr *E) {
QualType ResultType = Importer.Import(E->getType());
if (E->isArgumentType()) {
TypeSourceInfo *TInfo = Importer.Import(E->getArgumentTypeInfo());
if (!TInfo)
return nullptr;
return new (Importer.getToContext()) UnaryExprOrTypeTraitExpr(E->getKind(),
TInfo, ResultType,
Importer.Import(E->getOperatorLoc()),
Importer.Import(E->getRParenLoc()));
}
Expr *SubExpr = Importer.Import(E->getArgumentExpr());
if (!SubExpr)
return nullptr;
return new (Importer.getToContext()) UnaryExprOrTypeTraitExpr(E->getKind(),
SubExpr, ResultType,
Importer.Import(E->getOperatorLoc()),
Importer.Import(E->getRParenLoc()));
}
Expr *ASTNodeImporter::VisitBinaryOperator(BinaryOperator *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *LHS = Importer.Import(E->getLHS());
if (!LHS)
return nullptr;
Expr *RHS = Importer.Import(E->getRHS());
if (!RHS)
return nullptr;
return new (Importer.getToContext()) BinaryOperator(LHS, RHS, E->getOpcode(),
T, E->getValueKind(),
E->getObjectKind(),
Importer.Import(E->getOperatorLoc()),
E->getFPFeatures());
}
Expr *ASTNodeImporter::VisitConditionalOperator(ConditionalOperator *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *ToLHS = Importer.Import(E->getLHS());
if (!ToLHS)
return nullptr;
Expr *ToRHS = Importer.Import(E->getRHS());
if (!ToRHS)
return nullptr;
Expr *ToCond = Importer.Import(E->getCond());
if (!ToCond)
return nullptr;
return new (Importer.getToContext()) ConditionalOperator(
ToCond, Importer.Import(E->getQuestionLoc()),
ToLHS, Importer.Import(E->getColonLoc()),
ToRHS, T, E->getValueKind(), E->getObjectKind());
}
Expr *ASTNodeImporter::VisitBinaryConditionalOperator(
BinaryConditionalOperator *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *Common = Importer.Import(E->getCommon());
if (!Common)
return nullptr;
Expr *Cond = Importer.Import(E->getCond());
if (!Cond)
return nullptr;
OpaqueValueExpr *OpaqueValue = cast_or_null<OpaqueValueExpr>(
Importer.Import(E->getOpaqueValue()));
if (!OpaqueValue)
return nullptr;
Expr *TrueExpr = Importer.Import(E->getTrueExpr());
if (!TrueExpr)
return nullptr;
Expr *FalseExpr = Importer.Import(E->getFalseExpr());
if (!FalseExpr)
return nullptr;
return new (Importer.getToContext()) BinaryConditionalOperator(
Common, OpaqueValue, Cond, TrueExpr, FalseExpr,
Importer.Import(E->getQuestionLoc()), Importer.Import(E->getColonLoc()),
T, E->getValueKind(), E->getObjectKind());
}
Expr *ASTNodeImporter::VisitArrayTypeTraitExpr(ArrayTypeTraitExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
TypeSourceInfo *ToQueried = Importer.Import(E->getQueriedTypeSourceInfo());
if (!ToQueried)
return nullptr;
Expr *Dim = Importer.Import(E->getDimensionExpression());
if (!Dim && E->getDimensionExpression())
return nullptr;
return new (Importer.getToContext()) ArrayTypeTraitExpr(
Importer.Import(E->getLocStart()), E->getTrait(), ToQueried,
E->getValue(), Dim, Importer.Import(E->getLocEnd()), T);
}
Expr *ASTNodeImporter::VisitExpressionTraitExpr(ExpressionTraitExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *ToQueried = Importer.Import(E->getQueriedExpression());
if (!ToQueried)
return nullptr;
return new (Importer.getToContext()) ExpressionTraitExpr(
Importer.Import(E->getLocStart()), E->getTrait(), ToQueried,
E->getValue(), Importer.Import(E->getLocEnd()), T);
}
Expr *ASTNodeImporter::VisitOpaqueValueExpr(OpaqueValueExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *SourceExpr = Importer.Import(E->getSourceExpr());
if (!SourceExpr && E->getSourceExpr())
return nullptr;
return new (Importer.getToContext()) OpaqueValueExpr(
Importer.Import(E->getLocation()), T, E->getValueKind(),
E->getObjectKind(), SourceExpr);
}
Expr *ASTNodeImporter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *ToLHS = Importer.Import(E->getLHS());
if (!ToLHS)
return nullptr;
Expr *ToRHS = Importer.Import(E->getRHS());
if (!ToRHS)
return nullptr;
return new (Importer.getToContext()) ArraySubscriptExpr(
ToLHS, ToRHS, T, E->getValueKind(), E->getObjectKind(),
Importer.Import(E->getRBracketLoc()));
}
Expr *ASTNodeImporter::VisitCompoundAssignOperator(CompoundAssignOperator *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
QualType CompLHSType = Importer.Import(E->getComputationLHSType());
if (CompLHSType.isNull())
return nullptr;
QualType CompResultType = Importer.Import(E->getComputationResultType());
if (CompResultType.isNull())
return nullptr;
Expr *LHS = Importer.Import(E->getLHS());
if (!LHS)
return nullptr;
Expr *RHS = Importer.Import(E->getRHS());
if (!RHS)
return nullptr;
return new (Importer.getToContext())
CompoundAssignOperator(LHS, RHS, E->getOpcode(),
T, E->getValueKind(),
E->getObjectKind(),
CompLHSType, CompResultType,
Importer.Import(E->getOperatorLoc()),
E->getFPFeatures());
}
bool ASTNodeImporter::ImportCastPath(CastExpr *CE, CXXCastPath &Path) {
for (auto I = CE->path_begin(), E = CE->path_end(); I != E; ++I) {
if (CXXBaseSpecifier *Spec = Importer.Import(*I))
Path.push_back(Spec);
else
return true;
}
return false;
}
Expr *ASTNodeImporter::VisitImplicitCastExpr(ImplicitCastExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *SubExpr = Importer.Import(E->getSubExpr());
if (!SubExpr)
return nullptr;
CXXCastPath BasePath;
if (ImportCastPath(E, BasePath))
return nullptr;
return ImplicitCastExpr::Create(Importer.getToContext(), T, E->getCastKind(),
SubExpr, &BasePath, E->getValueKind());
}
Expr *ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *SubExpr = Importer.Import(E->getSubExpr());
if (!SubExpr)
return nullptr;
TypeSourceInfo *TInfo = Importer.Import(E->getTypeInfoAsWritten());
if (!TInfo && E->getTypeInfoAsWritten())
return nullptr;
CXXCastPath BasePath;
if (ImportCastPath(E, BasePath))
return nullptr;
switch (E->getStmtClass()) {
case Stmt::CStyleCastExprClass: {
CStyleCastExpr *CCE = cast<CStyleCastExpr>(E);
return CStyleCastExpr::Create(Importer.getToContext(), T,
E->getValueKind(), E->getCastKind(),
SubExpr, &BasePath, TInfo,
Importer.Import(CCE->getLParenLoc()),
Importer.Import(CCE->getRParenLoc()));
}
case Stmt::CXXFunctionalCastExprClass: {
CXXFunctionalCastExpr *FCE = cast<CXXFunctionalCastExpr>(E);
return CXXFunctionalCastExpr::Create(Importer.getToContext(), T,
E->getValueKind(), TInfo,
E->getCastKind(), SubExpr, &BasePath,
Importer.Import(FCE->getLParenLoc()),
Importer.Import(FCE->getRParenLoc()));
}
case Stmt::ObjCBridgedCastExprClass: {
ObjCBridgedCastExpr *OCE = cast<ObjCBridgedCastExpr>(E);
return new (Importer.getToContext()) ObjCBridgedCastExpr(
Importer.Import(OCE->getLParenLoc()), OCE->getBridgeKind(),
E->getCastKind(), Importer.Import(OCE->getBridgeKeywordLoc()),
TInfo, SubExpr);
}
default:
break; // just fall through
}
CXXNamedCastExpr *Named = cast<CXXNamedCastExpr>(E);
SourceLocation ExprLoc = Importer.Import(Named->getOperatorLoc()),
RParenLoc = Importer.Import(Named->getRParenLoc());
SourceRange Brackets = Importer.Import(Named->getAngleBrackets());
switch (E->getStmtClass()) {
case Stmt::CXXStaticCastExprClass:
return CXXStaticCastExpr::Create(Importer.getToContext(), T,
E->getValueKind(), E->getCastKind(),
SubExpr, &BasePath, TInfo,
ExprLoc, RParenLoc, Brackets);
case Stmt::CXXDynamicCastExprClass:
return CXXDynamicCastExpr::Create(Importer.getToContext(), T,
E->getValueKind(), E->getCastKind(),
SubExpr, &BasePath, TInfo,
ExprLoc, RParenLoc, Brackets);
case Stmt::CXXReinterpretCastExprClass:
return CXXReinterpretCastExpr::Create(Importer.getToContext(), T,
E->getValueKind(), E->getCastKind(),
SubExpr, &BasePath, TInfo,
ExprLoc, RParenLoc, Brackets);
case Stmt::CXXConstCastExprClass:
return CXXConstCastExpr::Create(Importer.getToContext(), T,
E->getValueKind(), SubExpr, TInfo, ExprLoc,
RParenLoc, Brackets);
default:
llvm_unreachable("Cast expression of unsupported type!");
return nullptr;
}
}
Expr *ASTNodeImporter::VisitOffsetOfExpr(OffsetOfExpr *OE) {
QualType T = Importer.Import(OE->getType());
if (T.isNull())
return nullptr;
SmallVector<OffsetOfNode, 4> Nodes;
for (int I = 0, E = OE->getNumComponents(); I < E; ++I) {
const OffsetOfNode &Node = OE->getComponent(I);
switch (Node.getKind()) {
case OffsetOfNode::Array:
Nodes.push_back(OffsetOfNode(Importer.Import(Node.getLocStart()),
Node.getArrayExprIndex(),
Importer.Import(Node.getLocEnd())));
break;
case OffsetOfNode::Base: {
CXXBaseSpecifier *BS = Importer.Import(Node.getBase());
if (!BS && Node.getBase())
return nullptr;
Nodes.push_back(OffsetOfNode(BS));
break;
}
case OffsetOfNode::Field: {
FieldDecl *FD = cast_or_null<FieldDecl>(Importer.Import(Node.getField()));
if (!FD)
return nullptr;
Nodes.push_back(OffsetOfNode(Importer.Import(Node.getLocStart()), FD,
Importer.Import(Node.getLocEnd())));
break;
}
case OffsetOfNode::Identifier: {
IdentifierInfo *ToII = Importer.Import(Node.getFieldName());
if (!ToII)
return nullptr;
Nodes.push_back(OffsetOfNode(Importer.Import(Node.getLocStart()), ToII,
Importer.Import(Node.getLocEnd())));
break;
}
}
}
SmallVector<Expr *, 4> Exprs(OE->getNumExpressions());
for (int I = 0, E = OE->getNumExpressions(); I < E; ++I) {
Expr *ToIndexExpr = Importer.Import(OE->getIndexExpr(I));
if (!ToIndexExpr)
return nullptr;
Exprs[I] = ToIndexExpr;
}
TypeSourceInfo *TInfo = Importer.Import(OE->getTypeSourceInfo());
if (!TInfo && OE->getTypeSourceInfo())
return nullptr;
return OffsetOfExpr::Create(Importer.getToContext(), T,
Importer.Import(OE->getOperatorLoc()),
TInfo, Nodes, Exprs,
Importer.Import(OE->getRParenLoc()));
}
Expr *ASTNodeImporter::VisitCXXNoexceptExpr(CXXNoexceptExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *Operand = Importer.Import(E->getOperand());
if (!Operand)
return nullptr;
CanThrowResult CanThrow;
if (E->isValueDependent())
CanThrow = CT_Dependent;
else
CanThrow = E->getValue() ? CT_Can : CT_Cannot;
return new (Importer.getToContext()) CXXNoexceptExpr(
T, Operand, CanThrow,
Importer.Import(E->getLocStart()), Importer.Import(E->getLocEnd()));
}
Expr *ASTNodeImporter::VisitCXXThrowExpr(CXXThrowExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *SubExpr = Importer.Import(E->getSubExpr());
if (!SubExpr && E->getSubExpr())
return nullptr;
return new (Importer.getToContext()) CXXThrowExpr(
SubExpr, T, Importer.Import(E->getThrowLoc()),
E->isThrownVariableInScope());
}
Expr *ASTNodeImporter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) {
ParmVarDecl *Param = cast_or_null<ParmVarDecl>(
Importer.Import(E->getParam()));
if (!Param)
return nullptr;
return CXXDefaultArgExpr::Create(
Importer.getToContext(), Importer.Import(E->getUsedLocation()), Param);
}
Expr *ASTNodeImporter::VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
TypeSourceInfo *TypeInfo = Importer.Import(E->getTypeSourceInfo());
if (!TypeInfo)
return nullptr;
return new (Importer.getToContext()) CXXScalarValueInitExpr(
T, TypeInfo, Importer.Import(E->getRParenLoc()));
}
Expr *ASTNodeImporter::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
Expr *SubExpr = Importer.Import(E->getSubExpr());
if (!SubExpr)
return nullptr;
auto *Dtor = cast_or_null<CXXDestructorDecl>(
Importer.Import(const_cast<CXXDestructorDecl *>(
E->getTemporary()->getDestructor())));
if (!Dtor)
return nullptr;
ASTContext &ToCtx = Importer.getToContext();
CXXTemporary *Temp = CXXTemporary::Create(ToCtx, Dtor);
return CXXBindTemporaryExpr::Create(ToCtx, Temp, SubExpr);
}
Expr *ASTNodeImporter::VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *CE) {
QualType T = Importer.Import(CE->getType());
if (T.isNull())
return nullptr;
SmallVector<Expr *, 8> Args(CE->getNumArgs());
if (ImportContainerChecked(CE->arguments(), Args))
return nullptr;
auto *Ctor = cast_or_null<CXXConstructorDecl>(
Importer.Import(CE->getConstructor()));
if (!Ctor)
return nullptr;
return CXXTemporaryObjectExpr::Create(
Importer.getToContext(), T,
Importer.Import(CE->getLocStart()),
Ctor,
CE->isElidable(),
Args,
CE->hadMultipleCandidates(),
CE->isListInitialization(),
CE->isStdInitListInitialization(),
CE->requiresZeroInitialization(),
CE->getConstructionKind(),
Importer.Import(CE->getParenOrBraceRange()));
}
Expr *
ASTNodeImporter::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *TempE = Importer.Import(E->GetTemporaryExpr());
if (!TempE)
return nullptr;
ValueDecl *ExtendedBy = cast_or_null<ValueDecl>(
Importer.Import(const_cast<ValueDecl *>(E->getExtendingDecl())));
if (!ExtendedBy && E->getExtendingDecl())
return nullptr;
auto *ToMTE = new (Importer.getToContext()) MaterializeTemporaryExpr(
T, TempE, E->isBoundToLvalueReference());
// FIXME: Should ManglingNumber get numbers associated with 'to' context?
ToMTE->setExtendingDecl(ExtendedBy, E->getManglingNumber());
return ToMTE;
}
Expr *ASTNodeImporter::VisitCXXNewExpr(CXXNewExpr *CE) {
QualType T = Importer.Import(CE->getType());
if (T.isNull())
return nullptr;
SmallVector<Expr *, 4> PlacementArgs(CE->getNumPlacementArgs());
if (ImportContainerChecked(CE->placement_arguments(), PlacementArgs))
return nullptr;
FunctionDecl *OperatorNewDecl = cast_or_null<FunctionDecl>(
Importer.Import(CE->getOperatorNew()));
if (!OperatorNewDecl && CE->getOperatorNew())
return nullptr;
FunctionDecl *OperatorDeleteDecl = cast_or_null<FunctionDecl>(
Importer.Import(CE->getOperatorDelete()));
if (!OperatorDeleteDecl && CE->getOperatorDelete())
return nullptr;
Expr *ToInit = Importer.Import(CE->getInitializer());
if (!ToInit && CE->getInitializer())
return nullptr;
TypeSourceInfo *TInfo = Importer.Import(CE->getAllocatedTypeSourceInfo());
if (!TInfo)
return nullptr;
Expr *ToArrSize = Importer.Import(CE->getArraySize());
if (!ToArrSize && CE->getArraySize())
return nullptr;
return new (Importer.getToContext()) CXXNewExpr(
Importer.getToContext(),
CE->isGlobalNew(),
OperatorNewDecl, OperatorDeleteDecl,
CE->passAlignment(),
CE->doesUsualArrayDeleteWantSize(),
PlacementArgs,
Importer.Import(CE->getTypeIdParens()),
ToArrSize, CE->getInitializationStyle(), ToInit, T, TInfo,
Importer.Import(CE->getSourceRange()),
Importer.Import(CE->getDirectInitRange()));
}
Expr *ASTNodeImporter::VisitCXXDeleteExpr(CXXDeleteExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
FunctionDecl *OperatorDeleteDecl = cast_or_null<FunctionDecl>(
Importer.Import(E->getOperatorDelete()));
if (!OperatorDeleteDecl && E->getOperatorDelete())
return nullptr;
Expr *ToArg = Importer.Import(E->getArgument());
if (!ToArg && E->getArgument())
return nullptr;
return new (Importer.getToContext()) CXXDeleteExpr(
T, E->isGlobalDelete(),
E->isArrayForm(),
E->isArrayFormAsWritten(),
E->doesUsualArrayDeleteWantSize(),
OperatorDeleteDecl,
ToArg,
Importer.Import(E->getLocStart()));
}
Expr *ASTNodeImporter::VisitCXXConstructExpr(CXXConstructExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
CXXConstructorDecl *ToCCD =
dyn_cast_or_null<CXXConstructorDecl>(Importer.Import(E->getConstructor()));
if (!ToCCD)
return nullptr;
SmallVector<Expr *, 6> ToArgs(E->getNumArgs());
if (ImportContainerChecked(E->arguments(), ToArgs))
return nullptr;
return CXXConstructExpr::Create(Importer.getToContext(), T,
Importer.Import(E->getLocation()),
ToCCD, E->isElidable(),
ToArgs, E->hadMultipleCandidates(),
E->isListInitialization(),
E->isStdInitListInitialization(),
E->requiresZeroInitialization(),
E->getConstructionKind(),
Importer.Import(E->getParenOrBraceRange()));
}
Expr *ASTNodeImporter::VisitExprWithCleanups(ExprWithCleanups *EWC) {
Expr *SubExpr = Importer.Import(EWC->getSubExpr());
if (!SubExpr && EWC->getSubExpr())
return nullptr;
SmallVector<ExprWithCleanups::CleanupObject, 8> Objs(EWC->getNumObjects());
for (unsigned I = 0, E = EWC->getNumObjects(); I < E; I++)
if (ExprWithCleanups::CleanupObject Obj =
cast_or_null<BlockDecl>(Importer.Import(EWC->getObject(I))))
Objs[I] = Obj;
else
return nullptr;
return ExprWithCleanups::Create(Importer.getToContext(),
SubExpr, EWC->cleanupsHaveSideEffects(),
Objs);
}
Expr *ASTNodeImporter::VisitCXXMemberCallExpr(CXXMemberCallExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *ToFn = Importer.Import(E->getCallee());
if (!ToFn)
return nullptr;
SmallVector<Expr *, 4> ToArgs(E->getNumArgs());
if (ImportContainerChecked(E->arguments(), ToArgs))
return nullptr;
return new (Importer.getToContext()) CXXMemberCallExpr(
Importer.getToContext(), ToFn, ToArgs, T, E->getValueKind(),
Importer.Import(E->getRParenLoc()));
}
Expr *ASTNodeImporter::VisitCXXThisExpr(CXXThisExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
return new (Importer.getToContext())
CXXThisExpr(Importer.Import(E->getLocation()), T, E->isImplicit());
}
Expr *ASTNodeImporter::VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
return new (Importer.getToContext())
CXXBoolLiteralExpr(E->getValue(), T, Importer.Import(E->getLocation()));
}
Expr *ASTNodeImporter::VisitMemberExpr(MemberExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *ToBase = Importer.Import(E->getBase());
if (!ToBase && E->getBase())
return nullptr;
ValueDecl *ToMember = dyn_cast<ValueDecl>(Importer.Import(E->getMemberDecl()));
if (!ToMember && E->getMemberDecl())
return nullptr;
DeclAccessPair ToFoundDecl = DeclAccessPair::make(
dyn_cast<NamedDecl>(Importer.Import(E->getFoundDecl().getDecl())),
E->getFoundDecl().getAccess());
DeclarationNameInfo ToMemberNameInfo(
Importer.Import(E->getMemberNameInfo().getName()),
Importer.Import(E->getMemberNameInfo().getLoc()));
if (E->hasExplicitTemplateArgs()) {
return nullptr; // FIXME: handle template arguments
}
return MemberExpr::Create(Importer.getToContext(), ToBase,
E->isArrow(),
Importer.Import(E->getOperatorLoc()),
Importer.Import(E->getQualifierLoc()),
Importer.Import(E->getTemplateKeywordLoc()),
ToMember, ToFoundDecl, ToMemberNameInfo,
nullptr, T, E->getValueKind(),
E->getObjectKind());
}
Expr *ASTNodeImporter::VisitCallExpr(CallExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
Expr *ToCallee = Importer.Import(E->getCallee());
if (!ToCallee && E->getCallee())
return nullptr;
unsigned NumArgs = E->getNumArgs();
llvm::SmallVector<Expr *, 2> ToArgs(NumArgs);
for (unsigned ai = 0, ae = NumArgs; ai != ae; ++ai) {
Expr *FromArg = E->getArg(ai);
Expr *ToArg = Importer.Import(FromArg);
if (!ToArg)
return nullptr;
ToArgs[ai] = ToArg;
}
Expr **ToArgs_Copied = new (Importer.getToContext())
Expr*[NumArgs];
for (unsigned ai = 0, ae = NumArgs; ai != ae; ++ai)
ToArgs_Copied[ai] = ToArgs[ai];
return new (Importer.getToContext())
CallExpr(Importer.getToContext(), ToCallee,
llvm::makeArrayRef(ToArgs_Copied, NumArgs), T, E->getValueKind(),
Importer.Import(E->getRParenLoc()));
}
Expr *ASTNodeImporter::VisitInitListExpr(InitListExpr *ILE) {
QualType T = Importer.Import(ILE->getType());
if (T.isNull())
return nullptr;
llvm::SmallVector<Expr *, 4> Exprs(ILE->getNumInits());
if (ImportContainerChecked(ILE->inits(), Exprs))
return nullptr;
ASTContext &ToCtx = Importer.getToContext();
InitListExpr *To = new (ToCtx) InitListExpr(
ToCtx, Importer.Import(ILE->getLBraceLoc()),
Exprs, Importer.Import(ILE->getLBraceLoc()));
To->setType(T);
if (ILE->hasArrayFiller()) {
Expr *Filler = Importer.Import(ILE->getArrayFiller());
if (!Filler)
return nullptr;
To->setArrayFiller(Filler);
}
if (FieldDecl *FromFD = ILE->getInitializedFieldInUnion()) {
FieldDecl *ToFD = cast_or_null<FieldDecl>(Importer.Import(FromFD));
if (!ToFD)
return nullptr;
To->setInitializedFieldInUnion(ToFD);
}
if (InitListExpr *SyntForm = ILE->getSyntacticForm()) {
InitListExpr *ToSyntForm = cast_or_null<InitListExpr>(
Importer.Import(SyntForm));
if (!ToSyntForm)
return nullptr;
To->setSyntacticForm(ToSyntForm);
}
To->sawArrayRangeDesignator(ILE->hadArrayRangeDesignator());
To->setValueDependent(ILE->isValueDependent());
To->setInstantiationDependent(ILE->isInstantiationDependent());
return To;
}
Expr *ASTNodeImporter::VisitArrayInitLoopExpr(ArrayInitLoopExpr *E) {
QualType ToType = Importer.Import(E->getType());
if (ToType.isNull())
return nullptr;
Expr *ToCommon = Importer.Import(E->getCommonExpr());
if (!ToCommon && E->getCommonExpr())
return nullptr;
Expr *ToSubExpr = Importer.Import(E->getSubExpr());
if (!ToSubExpr && E->getSubExpr())
return nullptr;
return new (Importer.getToContext())
ArrayInitLoopExpr(ToType, ToCommon, ToSubExpr);
}
Expr *ASTNodeImporter::VisitArrayInitIndexExpr(ArrayInitIndexExpr *E) {
QualType ToType = Importer.Import(E->getType());
if (ToType.isNull())
return nullptr;
return new (Importer.getToContext()) ArrayInitIndexExpr(ToType);
}
Expr *ASTNodeImporter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) {
FieldDecl *ToField = llvm::dyn_cast_or_null<FieldDecl>(
Importer.Import(DIE->getField()));
if (!ToField && DIE->getField())
return nullptr;
return CXXDefaultInitExpr::Create(
Importer.getToContext(), Importer.Import(DIE->getLocStart()), ToField);
}
Expr *ASTNodeImporter::VisitCXXNamedCastExpr(CXXNamedCastExpr *E) {
QualType ToType = Importer.Import(E->getType());
if (ToType.isNull() && !E->getType().isNull())
return nullptr;
ExprValueKind VK = E->getValueKind();
CastKind CK = E->getCastKind();
Expr *ToOp = Importer.Import(E->getSubExpr());
if (!ToOp && E->getSubExpr())
return nullptr;
CXXCastPath BasePath;
if (ImportCastPath(E, BasePath))
return nullptr;
TypeSourceInfo *ToWritten = Importer.Import(E->getTypeInfoAsWritten());
SourceLocation ToOperatorLoc = Importer.Import(E->getOperatorLoc());
SourceLocation ToRParenLoc = Importer.Import(E->getRParenLoc());
SourceRange ToAngleBrackets = Importer.Import(E->getAngleBrackets());
if (isa<CXXStaticCastExpr>(E)) {
return CXXStaticCastExpr::Create(
Importer.getToContext(), ToType, VK, CK, ToOp, &BasePath,
ToWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets);
} else if (isa<CXXDynamicCastExpr>(E)) {
return CXXDynamicCastExpr::Create(
Importer.getToContext(), ToType, VK, CK, ToOp, &BasePath,
ToWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets);
} else if (isa<CXXReinterpretCastExpr>(E)) {
return CXXReinterpretCastExpr::Create(
Importer.getToContext(), ToType, VK, CK, ToOp, &BasePath,
ToWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets);
} else {
return nullptr;
}
}
Expr *ASTNodeImporter::VisitSubstNonTypeTemplateParmExpr(
SubstNonTypeTemplateParmExpr *E) {
QualType T = Importer.Import(E->getType());
if (T.isNull())
return nullptr;
NonTypeTemplateParmDecl *Param = cast_or_null<NonTypeTemplateParmDecl>(
Importer.Import(E->getParameter()));
if (!Param)
return nullptr;
Expr *Replacement = Importer.Import(E->getReplacement());
if (!Replacement)
return nullptr;
return new (Importer.getToContext()) SubstNonTypeTemplateParmExpr(
T, E->getValueKind(), Importer.Import(E->getExprLoc()), Param,
Replacement);
}
void ASTNodeImporter::ImportOverrides(CXXMethodDecl *ToMethod,
CXXMethodDecl *FromMethod) {
for (auto *FromOverriddenMethod : FromMethod->overridden_methods())
ToMethod->addOverriddenMethod(
cast<CXXMethodDecl>(Importer.Import(const_cast<CXXMethodDecl*>(
FromOverriddenMethod))));
}
ASTImporter::ASTImporter(ASTContext &ToContext, FileManager &ToFileManager,
ASTContext &FromContext, FileManager &FromFileManager,
bool MinimalImport)
: ToContext(ToContext), FromContext(FromContext),
ToFileManager(ToFileManager), FromFileManager(FromFileManager),
Minimal(MinimalImport), LastDiagFromFrom(false)
{
ImportedDecls[FromContext.getTranslationUnitDecl()]
= ToContext.getTranslationUnitDecl();
}
ASTImporter::~ASTImporter() { }
QualType ASTImporter::Import(QualType FromT) {
if (FromT.isNull())
return QualType();
const Type *fromTy = FromT.getTypePtr();
// Check whether we've already imported this type.
llvm::DenseMap<const Type *, const Type *>::iterator Pos
= ImportedTypes.find(fromTy);
if (Pos != ImportedTypes.end())
return ToContext.getQualifiedType(Pos->second, FromT.getLocalQualifiers());
// Import the type
ASTNodeImporter Importer(*this);
QualType ToT = Importer.Visit(fromTy);
if (ToT.isNull())
return ToT;
// Record the imported type.
ImportedTypes[fromTy] = ToT.getTypePtr();
return ToContext.getQualifiedType(ToT, FromT.getLocalQualifiers());
}
TypeSourceInfo *ASTImporter::Import(TypeSourceInfo *FromTSI) {
if (!FromTSI)
return FromTSI;
// FIXME: For now we just create a "trivial" type source info based
// on the type and a single location. Implement a real version of this.
QualType T = Import(FromTSI->getType());
if (T.isNull())
return nullptr;
return ToContext.getTrivialTypeSourceInfo(T,
Import(FromTSI->getTypeLoc().getLocStart()));
}
Decl *ASTImporter::GetAlreadyImportedOrNull(Decl *FromD) {
llvm::DenseMap<Decl *, Decl *>::iterator Pos = ImportedDecls.find(FromD);
if (Pos != ImportedDecls.end()) {
Decl *ToD = Pos->second;
ASTNodeImporter(*this).ImportDefinitionIfNeeded(FromD, ToD);
return ToD;
} else {
return nullptr;
}
}
Decl *ASTImporter::Import(Decl *FromD) {
if (!FromD)
return nullptr;
ASTNodeImporter Importer(*this);
// Check whether we've already imported this declaration.
llvm::DenseMap<Decl *, Decl *>::iterator Pos = ImportedDecls.find(FromD);
if (Pos != ImportedDecls.end()) {
Decl *ToD = Pos->second;
Importer.ImportDefinitionIfNeeded(FromD, ToD);
return ToD;
}
// Import the type
Decl *ToD = Importer.Visit(FromD);
if (!ToD)
return nullptr;
// Record the imported declaration.
ImportedDecls[FromD] = ToD;
if (TagDecl *FromTag = dyn_cast<TagDecl>(FromD)) {
// Keep track of anonymous tags that have an associated typedef.
if (FromTag->getTypedefNameForAnonDecl())
AnonTagsWithPendingTypedefs.push_back(FromTag);
} else if (TypedefNameDecl *FromTypedef = dyn_cast<TypedefNameDecl>(FromD)) {
// When we've finished transforming a typedef, see whether it was the
// typedef for an anonymous tag.
for (SmallVectorImpl<TagDecl *>::iterator
FromTag = AnonTagsWithPendingTypedefs.begin(),
FromTagEnd = AnonTagsWithPendingTypedefs.end();
FromTag != FromTagEnd; ++FromTag) {
if ((*FromTag)->getTypedefNameForAnonDecl() == FromTypedef) {
if (TagDecl *ToTag = cast_or_null<TagDecl>(Import(*FromTag))) {
// We found the typedef for an anonymous tag; link them.
ToTag->setTypedefNameForAnonDecl(cast<TypedefNameDecl>(ToD));
AnonTagsWithPendingTypedefs.erase(FromTag);
break;
}
}
}
}
return ToD;
}
DeclContext *ASTImporter::ImportContext(DeclContext *FromDC) {
if (!FromDC)
return FromDC;
DeclContext *ToDC = cast_or_null<DeclContext>(Import(cast<Decl>(FromDC)));
if (!ToDC)
return nullptr;
// When we're using a record/enum/Objective-C class/protocol as a context, we
// need it to have a definition.
if (RecordDecl *ToRecord = dyn_cast<RecordDecl>(ToDC)) {
RecordDecl *FromRecord = cast<RecordDecl>(FromDC);
if (ToRecord->isCompleteDefinition()) {
// Do nothing.
} else if (FromRecord->isCompleteDefinition()) {
ASTNodeImporter(*this).ImportDefinition(FromRecord, ToRecord,
ASTNodeImporter::IDK_Basic);
} else {
CompleteDecl(ToRecord);
}
} else if (EnumDecl *ToEnum = dyn_cast<EnumDecl>(ToDC)) {
EnumDecl *FromEnum = cast<EnumDecl>(FromDC);
if (ToEnum->isCompleteDefinition()) {
// Do nothing.
} else if (FromEnum->isCompleteDefinition()) {
ASTNodeImporter(*this).ImportDefinition(FromEnum, ToEnum,
ASTNodeImporter::IDK_Basic);
} else {
CompleteDecl(ToEnum);
}
} else if (ObjCInterfaceDecl *ToClass = dyn_cast<ObjCInterfaceDecl>(ToDC)) {
ObjCInterfaceDecl *FromClass = cast<ObjCInterfaceDecl>(FromDC);
if (ToClass->getDefinition()) {
// Do nothing.
} else if (ObjCInterfaceDecl *FromDef = FromClass->getDefinition()) {
ASTNodeImporter(*this).ImportDefinition(FromDef, ToClass,
ASTNodeImporter::IDK_Basic);
} else {
CompleteDecl(ToClass);
}
} else if (ObjCProtocolDecl *ToProto = dyn_cast<ObjCProtocolDecl>(ToDC)) {
ObjCProtocolDecl *FromProto = cast<ObjCProtocolDecl>(FromDC);
if (ToProto->getDefinition()) {
// Do nothing.
} else if (ObjCProtocolDecl *FromDef = FromProto->getDefinition()) {
ASTNodeImporter(*this).ImportDefinition(FromDef, ToProto,
ASTNodeImporter::IDK_Basic);
} else {
CompleteDecl(ToProto);
}
}
return ToDC;
}
Expr *ASTImporter::Import(Expr *FromE) {
if (!FromE)
return nullptr;
return cast_or_null<Expr>(Import(cast<Stmt>(FromE)));
}
Stmt *ASTImporter::Import(Stmt *FromS) {
if (!FromS)
return nullptr;
// Check whether we've already imported this declaration.
llvm::DenseMap<Stmt *, Stmt *>::iterator Pos = ImportedStmts.find(FromS);
if (Pos != ImportedStmts.end())
return Pos->second;
// Import the type
ASTNodeImporter Importer(*this);
Stmt *ToS = Importer.Visit(FromS);
if (!ToS)
return nullptr;
// Record the imported declaration.
ImportedStmts[FromS] = ToS;
return ToS;
}
NestedNameSpecifier *ASTImporter::Import(NestedNameSpecifier *FromNNS) {
if (!FromNNS)
return nullptr;
NestedNameSpecifier *prefix = Import(FromNNS->getPrefix());
switch (FromNNS->getKind()) {
case NestedNameSpecifier::Identifier:
if (IdentifierInfo *II = Import(FromNNS->getAsIdentifier())) {
return NestedNameSpecifier::Create(ToContext, prefix, II);
}
return nullptr;
case NestedNameSpecifier::Namespace:
if (NamespaceDecl *NS =
cast_or_null<NamespaceDecl>(Import(FromNNS->getAsNamespace()))) {
return NestedNameSpecifier::Create(ToContext, prefix, NS);
}
return nullptr;
case NestedNameSpecifier::NamespaceAlias:
if (NamespaceAliasDecl *NSAD =
cast_or_null<NamespaceAliasDecl>(Import(FromNNS->getAsNamespaceAlias()))) {
return NestedNameSpecifier::Create(ToContext, prefix, NSAD);
}
return nullptr;
case NestedNameSpecifier::Global:
return NestedNameSpecifier::GlobalSpecifier(ToContext);
case NestedNameSpecifier::Super:
if (CXXRecordDecl *RD =
cast_or_null<CXXRecordDecl>(Import(FromNNS->getAsRecordDecl()))) {
return NestedNameSpecifier::SuperSpecifier(ToContext, RD);
}
return nullptr;
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate: {
QualType T = Import(QualType(FromNNS->getAsType(), 0u));
if (!T.isNull()) {
bool bTemplate = FromNNS->getKind() ==
NestedNameSpecifier::TypeSpecWithTemplate;
return NestedNameSpecifier::Create(ToContext, prefix,
bTemplate, T.getTypePtr());
}
}
return nullptr;
}
llvm_unreachable("Invalid nested name specifier kind");
}
NestedNameSpecifierLoc ASTImporter::Import(NestedNameSpecifierLoc FromNNS) {
// Copied from NestedNameSpecifier mostly.
SmallVector<NestedNameSpecifierLoc , 8> NestedNames;
NestedNameSpecifierLoc NNS = FromNNS;
// Push each of the nested-name-specifiers's onto a stack for
// serialization in reverse order.
while (NNS) {
NestedNames.push_back(NNS);
NNS = NNS.getPrefix();
}
NestedNameSpecifierLocBuilder Builder;
while (!NestedNames.empty()) {
NNS = NestedNames.pop_back_val();
NestedNameSpecifier *Spec = Import(NNS.getNestedNameSpecifier());
if (!Spec)
return NestedNameSpecifierLoc();
NestedNameSpecifier::SpecifierKind Kind = Spec->getKind();
switch (Kind) {
case NestedNameSpecifier::Identifier:
Builder.Extend(getToContext(),
Spec->getAsIdentifier(),
Import(NNS.getLocalBeginLoc()),
Import(NNS.getLocalEndLoc()));
break;
case NestedNameSpecifier::Namespace:
Builder.Extend(getToContext(),
Spec->getAsNamespace(),
Import(NNS.getLocalBeginLoc()),
Import(NNS.getLocalEndLoc()));
break;
case NestedNameSpecifier::NamespaceAlias:
Builder.Extend(getToContext(),
Spec->getAsNamespaceAlias(),
Import(NNS.getLocalBeginLoc()),
Import(NNS.getLocalEndLoc()));
break;
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate: {
TypeSourceInfo *TSI = getToContext().getTrivialTypeSourceInfo(
QualType(Spec->getAsType(), 0));
Builder.Extend(getToContext(),
Import(NNS.getLocalBeginLoc()),
TSI->getTypeLoc(),
Import(NNS.getLocalEndLoc()));
break;
}
case NestedNameSpecifier::Global:
Builder.MakeGlobal(getToContext(), Import(NNS.getLocalBeginLoc()));
break;
case NestedNameSpecifier::Super: {
SourceRange ToRange = Import(NNS.getSourceRange());
Builder.MakeSuper(getToContext(),
Spec->getAsRecordDecl(),
ToRange.getBegin(),
ToRange.getEnd());
}
}
}
return Builder.getWithLocInContext(getToContext());
}
TemplateName ASTImporter::Import(TemplateName From) {
switch (From.getKind()) {
case TemplateName::Template:
if (TemplateDecl *ToTemplate
= cast_or_null<TemplateDecl>(Import(From.getAsTemplateDecl())))
return TemplateName(ToTemplate);
return TemplateName();
case TemplateName::OverloadedTemplate: {
OverloadedTemplateStorage *FromStorage = From.getAsOverloadedTemplate();
UnresolvedSet<2> ToTemplates;
for (OverloadedTemplateStorage::iterator I = FromStorage->begin(),
E = FromStorage->end();
I != E; ++I) {
if (NamedDecl *To = cast_or_null<NamedDecl>(Import(*I)))
ToTemplates.addDecl(To);
else
return TemplateName();
}
return ToContext.getOverloadedTemplateName(ToTemplates.begin(),
ToTemplates.end());
}
case TemplateName::QualifiedTemplate: {
QualifiedTemplateName *QTN = From.getAsQualifiedTemplateName();
NestedNameSpecifier *Qualifier = Import(QTN->getQualifier());
if (!Qualifier)
return TemplateName();
if (TemplateDecl *ToTemplate
= cast_or_null<TemplateDecl>(Import(From.getAsTemplateDecl())))
return ToContext.getQualifiedTemplateName(Qualifier,
QTN->hasTemplateKeyword(),
ToTemplate);
return TemplateName();
}
case TemplateName::DependentTemplate: {
DependentTemplateName *DTN = From.getAsDependentTemplateName();
NestedNameSpecifier *Qualifier = Import(DTN->getQualifier());
if (!Qualifier)
return TemplateName();
if (DTN->isIdentifier()) {
return ToContext.getDependentTemplateName(Qualifier,
Import(DTN->getIdentifier()));
}
return ToContext.getDependentTemplateName(Qualifier, DTN->getOperator());
}
case TemplateName::SubstTemplateTemplateParm: {
SubstTemplateTemplateParmStorage *subst
= From.getAsSubstTemplateTemplateParm();
TemplateTemplateParmDecl *param
= cast_or_null<TemplateTemplateParmDecl>(Import(subst->getParameter()));
if (!param)
return TemplateName();
TemplateName replacement = Import(subst->getReplacement());
if (replacement.isNull()) return TemplateName();
return ToContext.getSubstTemplateTemplateParm(param, replacement);
}
case TemplateName::SubstTemplateTemplateParmPack: {
SubstTemplateTemplateParmPackStorage *SubstPack
= From.getAsSubstTemplateTemplateParmPack();
TemplateTemplateParmDecl *Param
= cast_or_null<TemplateTemplateParmDecl>(
Import(SubstPack->getParameterPack()));
if (!Param)
return TemplateName();
ASTNodeImporter Importer(*this);
TemplateArgument ArgPack
= Importer.ImportTemplateArgument(SubstPack->getArgumentPack());
if (ArgPack.isNull())
return TemplateName();
return ToContext.getSubstTemplateTemplateParmPack(Param, ArgPack);
}
}
llvm_unreachable("Invalid template name kind");
}
SourceLocation ASTImporter::Import(SourceLocation FromLoc) {
if (FromLoc.isInvalid())
return SourceLocation();
SourceManager &FromSM = FromContext.getSourceManager();
// For now, map everything down to its file location, so that we
// don't have to import macro expansions.
// FIXME: Import macro expansions!
FromLoc = FromSM.getFileLoc(FromLoc);
std::pair<FileID, unsigned> Decomposed = FromSM.getDecomposedLoc(FromLoc);
SourceManager &ToSM = ToContext.getSourceManager();
FileID ToFileID = Import(Decomposed.first);
if (ToFileID.isInvalid())
return SourceLocation();
SourceLocation ret = ToSM.getLocForStartOfFile(ToFileID)
.getLocWithOffset(Decomposed.second);
return ret;
}
SourceRange ASTImporter::Import(SourceRange FromRange) {
return SourceRange(Import(FromRange.getBegin()), Import(FromRange.getEnd()));
}
FileID ASTImporter::Import(FileID FromID) {
llvm::DenseMap<FileID, FileID>::iterator Pos
= ImportedFileIDs.find(FromID);
if (Pos != ImportedFileIDs.end())
return Pos->second;
SourceManager &FromSM = FromContext.getSourceManager();
SourceManager &ToSM = ToContext.getSourceManager();
const SrcMgr::SLocEntry &FromSLoc = FromSM.getSLocEntry(FromID);
assert(FromSLoc.isFile() && "Cannot handle macro expansions yet");
// Include location of this file.
SourceLocation ToIncludeLoc = Import(FromSLoc.getFile().getIncludeLoc());
// Map the FileID for to the "to" source manager.
FileID ToID;
const SrcMgr::ContentCache *Cache = FromSLoc.getFile().getContentCache();
if (Cache->OrigEntry && Cache->OrigEntry->getDir()) {
// FIXME: We probably want to use getVirtualFile(), so we don't hit the
// disk again
// FIXME: We definitely want to re-use the existing MemoryBuffer, rather
// than mmap the files several times.
const FileEntry *Entry = ToFileManager.getFile(Cache->OrigEntry->getName());
if (!Entry)
return FileID();
ToID = ToSM.createFileID(Entry, ToIncludeLoc,
FromSLoc.getFile().getFileCharacteristic());
} else {
// FIXME: We want to re-use the existing MemoryBuffer!
const llvm::MemoryBuffer *
FromBuf = Cache->getBuffer(FromContext.getDiagnostics(), FromSM);
std::unique_ptr<llvm::MemoryBuffer> ToBuf
= llvm::MemoryBuffer::getMemBufferCopy(FromBuf->getBuffer(),
FromBuf->getBufferIdentifier());
ToID = ToSM.createFileID(std::move(ToBuf),
FromSLoc.getFile().getFileCharacteristic());
}
ImportedFileIDs[FromID] = ToID;
return ToID;
}
CXXCtorInitializer *ASTImporter::Import(CXXCtorInitializer *From) {
Expr *ToExpr = Import(From->getInit());
if (!ToExpr && From->getInit())
return nullptr;
if (From->isBaseInitializer()) {
TypeSourceInfo *ToTInfo = Import(From->getTypeSourceInfo());
if (!ToTInfo && From->getTypeSourceInfo())
return nullptr;
return new (ToContext) CXXCtorInitializer(
ToContext, ToTInfo, From->isBaseVirtual(), Import(From->getLParenLoc()),
ToExpr, Import(From->getRParenLoc()),
From->isPackExpansion() ? Import(From->getEllipsisLoc())
: SourceLocation());
} else if (From->isMemberInitializer()) {
FieldDecl *ToField =
llvm::cast_or_null<FieldDecl>(Import(From->getMember()));
if (!ToField && From->getMember())
return nullptr;
return new (ToContext) CXXCtorInitializer(
ToContext, ToField, Import(From->getMemberLocation()),
Import(From->getLParenLoc()), ToExpr, Import(From->getRParenLoc()));
} else if (From->isIndirectMemberInitializer()) {
IndirectFieldDecl *ToIField = llvm::cast_or_null<IndirectFieldDecl>(
Import(From->getIndirectMember()));
if (!ToIField && From->getIndirectMember())
return nullptr;
return new (ToContext) CXXCtorInitializer(
ToContext, ToIField, Import(From->getMemberLocation()),
Import(From->getLParenLoc()), ToExpr, Import(From->getRParenLoc()));
} else if (From->isDelegatingInitializer()) {
TypeSourceInfo *ToTInfo = Import(From->getTypeSourceInfo());
if (!ToTInfo && From->getTypeSourceInfo())
return nullptr;
return new (ToContext)
CXXCtorInitializer(ToContext, ToTInfo, Import(From->getLParenLoc()),
ToExpr, Import(From->getRParenLoc()));
} else {
return nullptr;
}
}
CXXBaseSpecifier *ASTImporter::Import(const CXXBaseSpecifier *BaseSpec) {
auto Pos = ImportedCXXBaseSpecifiers.find(BaseSpec);
if (Pos != ImportedCXXBaseSpecifiers.end())
return Pos->second;
CXXBaseSpecifier *Imported = new (ToContext) CXXBaseSpecifier(
Import(BaseSpec->getSourceRange()),
BaseSpec->isVirtual(), BaseSpec->isBaseOfClass(),
BaseSpec->getAccessSpecifierAsWritten(),
Import(BaseSpec->getTypeSourceInfo()),
Import(BaseSpec->getEllipsisLoc()));
ImportedCXXBaseSpecifiers[BaseSpec] = Imported;
return Imported;
}
void ASTImporter::ImportDefinition(Decl *From) {
Decl *To = Import(From);
if (!To)
return;
if (DeclContext *FromDC = cast<DeclContext>(From)) {
ASTNodeImporter Importer(*this);
if (RecordDecl *ToRecord = dyn_cast<RecordDecl>(To)) {
if (!ToRecord->getDefinition()) {
Importer.ImportDefinition(cast<RecordDecl>(FromDC), ToRecord,
ASTNodeImporter::IDK_Everything);
return;
}
}
if (EnumDecl *ToEnum = dyn_cast<EnumDecl>(To)) {
if (!ToEnum->getDefinition()) {
Importer.ImportDefinition(cast<EnumDecl>(FromDC), ToEnum,
ASTNodeImporter::IDK_Everything);
return;
}
}
if (ObjCInterfaceDecl *ToIFace = dyn_cast<ObjCInterfaceDecl>(To)) {
if (!ToIFace->getDefinition()) {
Importer.ImportDefinition(cast<ObjCInterfaceDecl>(FromDC), ToIFace,
ASTNodeImporter::IDK_Everything);
return;
}
}
if (ObjCProtocolDecl *ToProto = dyn_cast<ObjCProtocolDecl>(To)) {
if (!ToProto->getDefinition()) {
Importer.ImportDefinition(cast<ObjCProtocolDecl>(FromDC), ToProto,
ASTNodeImporter::IDK_Everything);
return;
}
}
Importer.ImportDeclContext(FromDC, true);
}
}
DeclarationName ASTImporter::Import(DeclarationName FromName) {
if (!FromName)
return DeclarationName();
switch (FromName.getNameKind()) {
case DeclarationName::Identifier:
return Import(FromName.getAsIdentifierInfo());
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
return Import(FromName.getObjCSelector());
case DeclarationName::CXXConstructorName: {
QualType T = Import(FromName.getCXXNameType());
if (T.isNull())
return DeclarationName();
return ToContext.DeclarationNames.getCXXConstructorName(
ToContext.getCanonicalType(T));
}
case DeclarationName::CXXDestructorName: {
QualType T = Import(FromName.getCXXNameType());
if (T.isNull())
return DeclarationName();
return ToContext.DeclarationNames.getCXXDestructorName(
ToContext.getCanonicalType(T));
}
case DeclarationName::CXXDeductionGuideName: {
TemplateDecl *Template = cast_or_null<TemplateDecl>(
Import(FromName.getCXXDeductionGuideTemplate()));
if (!Template)
return DeclarationName();
return ToContext.DeclarationNames.getCXXDeductionGuideName(Template);
}
case DeclarationName::CXXConversionFunctionName: {
QualType T = Import(FromName.getCXXNameType());
if (T.isNull())
return DeclarationName();
return ToContext.DeclarationNames.getCXXConversionFunctionName(
ToContext.getCanonicalType(T));
}
case DeclarationName::CXXOperatorName:
return ToContext.DeclarationNames.getCXXOperatorName(
FromName.getCXXOverloadedOperator());
case DeclarationName::CXXLiteralOperatorName:
return ToContext.DeclarationNames.getCXXLiteralOperatorName(
Import(FromName.getCXXLiteralIdentifier()));
case DeclarationName::CXXUsingDirective:
// FIXME: STATICS!
return DeclarationName::getUsingDirectiveName();
}
llvm_unreachable("Invalid DeclarationName Kind!");
}
IdentifierInfo *ASTImporter::Import(const IdentifierInfo *FromId) {
if (!FromId)
return nullptr;
IdentifierInfo *ToId = &ToContext.Idents.get(FromId->getName());
if (!ToId->getBuiltinID() && FromId->getBuiltinID())
ToId->setBuiltinID(FromId->getBuiltinID());
return ToId;
}
Selector ASTImporter::Import(Selector FromSel) {
if (FromSel.isNull())
return Selector();
SmallVector<IdentifierInfo *, 4> Idents;
Idents.push_back(Import(FromSel.getIdentifierInfoForSlot(0)));
for (unsigned I = 1, N = FromSel.getNumArgs(); I < N; ++I)
Idents.push_back(Import(FromSel.getIdentifierInfoForSlot(I)));
return ToContext.Selectors.getSelector(FromSel.getNumArgs(), Idents.data());
}
DeclarationName ASTImporter::HandleNameConflict(DeclarationName Name,
DeclContext *DC,
unsigned IDNS,
NamedDecl **Decls,
unsigned NumDecls) {
return Name;
}
DiagnosticBuilder ASTImporter::ToDiag(SourceLocation Loc, unsigned DiagID) {
if (LastDiagFromFrom)
ToContext.getDiagnostics().notePriorDiagnosticFrom(
FromContext.getDiagnostics());
LastDiagFromFrom = false;
return ToContext.getDiagnostics().Report(Loc, DiagID);
}
DiagnosticBuilder ASTImporter::FromDiag(SourceLocation Loc, unsigned DiagID) {
if (!LastDiagFromFrom)
FromContext.getDiagnostics().notePriorDiagnosticFrom(
ToContext.getDiagnostics());
LastDiagFromFrom = true;
return FromContext.getDiagnostics().Report(Loc, DiagID);
}
void ASTImporter::CompleteDecl (Decl *D) {
if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D)) {
if (!ID->getDefinition())
ID->startDefinition();
}
else if (ObjCProtocolDecl *PD = dyn_cast<ObjCProtocolDecl>(D)) {
if (!PD->getDefinition())
PD->startDefinition();
}
else if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
if (!TD->getDefinition() && !TD->isBeingDefined()) {
TD->startDefinition();
TD->setCompleteDefinition(true);
}
}
else {
assert (0 && "CompleteDecl called on a Decl that can't be completed");
}
}
Decl *ASTImporter::Imported(Decl *From, Decl *To) {
if (From->hasAttrs()) {
for (Attr *FromAttr : From->getAttrs())
To->addAttr(FromAttr->clone(To->getASTContext()));
}
if (From->isUsed()) {
To->setIsUsed();
}
if (From->isImplicit()) {
To->setImplicit();
}
ImportedDecls[From] = To;
return To;
}
bool ASTImporter::IsStructurallyEquivalent(QualType From, QualType To,
bool Complain) {
llvm::DenseMap<const Type *, const Type *>::iterator Pos
= ImportedTypes.find(From.getTypePtr());
if (Pos != ImportedTypes.end() && ToContext.hasSameType(Import(From), To))
return true;
StructuralEquivalenceContext Ctx(FromContext, ToContext, NonEquivalentDecls,
false, Complain);
return Ctx.IsStructurallyEquivalent(From, To);
}
Index: head/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp (revision 322855)
@@ -1,2566 +1,2592 @@
//===--- DeclCXX.cpp - C++ Declaration AST Node Implementation ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the C++ related Decl classes.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/DeclCXX.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTLambda.h"
#include "clang/AST/ASTMutationListener.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ODRHash.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/IdentifierTable.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace clang;
//===----------------------------------------------------------------------===//
// Decl Allocation/Deallocation Method Implementations
//===----------------------------------------------------------------------===//
void AccessSpecDecl::anchor() { }
AccessSpecDecl *AccessSpecDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) AccessSpecDecl(EmptyShell());
}
void LazyASTUnresolvedSet::getFromExternalSource(ASTContext &C) const {
ExternalASTSource *Source = C.getExternalSource();
assert(Impl.Decls.isLazy() && "getFromExternalSource for non-lazy set");
assert(Source && "getFromExternalSource with no external source");
for (ASTUnresolvedSet::iterator I = Impl.begin(); I != Impl.end(); ++I)
I.setDecl(cast<NamedDecl>(Source->GetExternalDecl(
reinterpret_cast<uintptr_t>(I.getDecl()) >> 2)));
Impl.Decls.setLazy(false);
}
CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
: UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0),
Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false),
Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true),
HasPrivateFields(false), HasProtectedFields(false),
HasPublicFields(false), HasMutableFields(false), HasVariantMembers(false),
HasOnlyCMembers(true), HasInClassInitializer(false),
HasUninitializedReferenceMember(false), HasUninitializedFields(false),
HasInheritedConstructor(false), HasInheritedAssignment(false),
+ NeedOverloadResolutionForCopyConstructor(false),
NeedOverloadResolutionForMoveConstructor(false),
NeedOverloadResolutionForMoveAssignment(false),
NeedOverloadResolutionForDestructor(false),
+ DefaultedCopyConstructorIsDeleted(false),
DefaultedMoveConstructorIsDeleted(false),
DefaultedMoveAssignmentIsDeleted(false),
DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All),
DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true),
HasConstexprNonCopyMoveConstructor(false),
HasDefaultedDefaultConstructor(false),
+ CanPassInRegisters(true),
DefaultedDefaultConstructorIsConstexpr(true),
HasConstexprDefaultConstructor(false),
HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false),
UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0),
ImplicitCopyConstructorCanHaveConstParamForVBase(true),
ImplicitCopyConstructorCanHaveConstParamForNonVBase(true),
ImplicitCopyAssignmentHasConstParam(true),
HasDeclaredCopyConstructorWithConstParam(false),
HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false),
IsParsingBaseSpecifiers(false), HasODRHash(false), ODRHash(0),
NumBases(0), NumVBases(0), Bases(), VBases(), Definition(D),
FirstFriend() {}
CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const {
return Bases.get(Definition->getASTContext().getExternalSource());
}
CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getVBasesSlowCase() const {
return VBases.get(Definition->getASTContext().getExternalSource());
}
CXXRecordDecl::CXXRecordDecl(Kind K, TagKind TK, const ASTContext &C,
DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, IdentifierInfo *Id,
CXXRecordDecl *PrevDecl)
: RecordDecl(K, TK, C, DC, StartLoc, IdLoc, Id, PrevDecl),
DefinitionData(PrevDecl ? PrevDecl->DefinitionData
: nullptr),
TemplateOrInstantiation() {}
CXXRecordDecl *CXXRecordDecl::Create(const ASTContext &C, TagKind TK,
DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, IdentifierInfo *Id,
CXXRecordDecl* PrevDecl,
bool DelayTypeCreation) {
CXXRecordDecl *R = new (C, DC) CXXRecordDecl(CXXRecord, TK, C, DC, StartLoc,
IdLoc, Id, PrevDecl);
R->MayHaveOutOfDateDef = C.getLangOpts().Modules;
// FIXME: DelayTypeCreation seems like such a hack
if (!DelayTypeCreation)
C.getTypeDeclType(R, PrevDecl);
return R;
}
CXXRecordDecl *
CXXRecordDecl::CreateLambda(const ASTContext &C, DeclContext *DC,
TypeSourceInfo *Info, SourceLocation Loc,
bool Dependent, bool IsGeneric,
LambdaCaptureDefault CaptureDefault) {
CXXRecordDecl *R =
new (C, DC) CXXRecordDecl(CXXRecord, TTK_Class, C, DC, Loc, Loc,
nullptr, nullptr);
R->IsBeingDefined = true;
R->DefinitionData =
new (C) struct LambdaDefinitionData(R, Info, Dependent, IsGeneric,
CaptureDefault);
R->MayHaveOutOfDateDef = false;
R->setImplicit(true);
C.getTypeDeclType(R, /*PrevDecl=*/nullptr);
return R;
}
CXXRecordDecl *
CXXRecordDecl::CreateDeserialized(const ASTContext &C, unsigned ID) {
CXXRecordDecl *R = new (C, ID) CXXRecordDecl(
CXXRecord, TTK_Struct, C, nullptr, SourceLocation(), SourceLocation(),
nullptr, nullptr);
R->MayHaveOutOfDateDef = false;
return R;
}
void
CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
unsigned NumBases) {
ASTContext &C = getASTContext();
if (!data().Bases.isOffset() && data().NumBases > 0)
C.Deallocate(data().getBases());
if (NumBases) {
if (!C.getLangOpts().CPlusPlus1z) {
// C++ [dcl.init.aggr]p1:
// An aggregate is [...] a class with [...] no base classes [...].
data().Aggregate = false;
}
// C++ [class]p4:
// A POD-struct is an aggregate class...
data().PlainOldData = false;
}
// The set of seen virtual base types.
llvm::SmallPtrSet<CanQualType, 8> SeenVBaseTypes;
// The virtual bases of this class.
SmallVector<const CXXBaseSpecifier *, 8> VBases;
data().Bases = new(C) CXXBaseSpecifier [NumBases];
data().NumBases = NumBases;
for (unsigned i = 0; i < NumBases; ++i) {
data().getBases()[i] = *Bases[i];
// Keep track of inherited vbases for this base class.
const CXXBaseSpecifier *Base = Bases[i];
QualType BaseType = Base->getType();
// Skip dependent types; we can't do any checking on them now.
if (BaseType->isDependentType())
continue;
CXXRecordDecl *BaseClassDecl
= cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl());
if (!BaseClassDecl->isEmpty()) {
if (!data().Empty) {
// C++0x [class]p7:
// A standard-layout class is a class that:
// [...]
// -- either has no non-static data members in the most derived
// class and at most one base class with non-static data members,
// or has no base classes with non-static data members, and
// If this is the second non-empty base, then neither of these two
// clauses can be true.
data().IsStandardLayout = false;
}
// C++14 [meta.unary.prop]p4:
// T is a class type [...] with [...] no base class B for which
// is_empty<B>::value is false.
data().Empty = false;
data().HasNoNonEmptyBases = false;
}
// C++1z [dcl.init.agg]p1:
// An aggregate is a class with [...] no private or protected base classes
if (Base->getAccessSpecifier() != AS_public)
data().Aggregate = false;
// C++ [class.virtual]p1:
// A class that declares or inherits a virtual function is called a
// polymorphic class.
if (BaseClassDecl->isPolymorphic())
data().Polymorphic = true;
// C++0x [class]p7:
// A standard-layout class is a class that: [...]
// -- has no non-standard-layout base classes
if (!BaseClassDecl->isStandardLayout())
data().IsStandardLayout = false;
// Record if this base is the first non-literal field or base.
if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C))
data().HasNonLiteralTypeFieldsOrBases = true;
// Now go through all virtual bases of this base and add them.
for (const auto &VBase : BaseClassDecl->vbases()) {
// Add this base if it's not already in the list.
if (SeenVBaseTypes.insert(C.getCanonicalType(VBase.getType())).second) {
VBases.push_back(&VBase);
// C++11 [class.copy]p8:
// The implicitly-declared copy constructor for a class X will have
// the form 'X::X(const X&)' if each [...] virtual base class B of X
// has a copy constructor whose first parameter is of type
// 'const B&' or 'const volatile B&' [...]
if (CXXRecordDecl *VBaseDecl = VBase.getType()->getAsCXXRecordDecl())
if (!VBaseDecl->hasCopyConstructorWithConstParam())
data().ImplicitCopyConstructorCanHaveConstParamForVBase = false;
// C++1z [dcl.init.agg]p1:
// An aggregate is a class with [...] no virtual base classes
data().Aggregate = false;
}
}
if (Base->isVirtual()) {
// Add this base if it's not already in the list.
if (SeenVBaseTypes.insert(C.getCanonicalType(BaseType)).second)
VBases.push_back(Base);
// C++14 [meta.unary.prop] is_empty:
// T is a class type, but not a union type, with ... no virtual base
// classes
data().Empty = false;
// C++1z [dcl.init.agg]p1:
// An aggregate is a class with [...] no virtual base classes
data().Aggregate = false;
// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25:
// A [default constructor, copy/move constructor, or copy/move assignment
// operator for a class X] is trivial [...] if:
// -- class X has [...] no virtual base classes
data().HasTrivialSpecialMembers &= SMF_Destructor;
// C++0x [class]p7:
// A standard-layout class is a class that: [...]
// -- has [...] no virtual base classes
data().IsStandardLayout = false;
// C++11 [dcl.constexpr]p4:
// In the definition of a constexpr constructor [...]
// -- the class shall not have any virtual base classes
data().DefaultedDefaultConstructorIsConstexpr = false;
// C++1z [class.copy]p8:
// The implicitly-declared copy constructor for a class X will have
// the form 'X::X(const X&)' if each potentially constructed subobject
// has a copy constructor whose first parameter is of type
// 'const B&' or 'const volatile B&' [...]
if (!BaseClassDecl->hasCopyConstructorWithConstParam())
data().ImplicitCopyConstructorCanHaveConstParamForVBase = false;
} else {
// C++ [class.ctor]p5:
// A default constructor is trivial [...] if:
// -- all the direct base classes of its class have trivial default
// constructors.
if (!BaseClassDecl->hasTrivialDefaultConstructor())
data().HasTrivialSpecialMembers &= ~SMF_DefaultConstructor;
// C++0x [class.copy]p13:
// A copy/move constructor for class X is trivial if [...]
// [...]
// -- the constructor selected to copy/move each direct base class
// subobject is trivial, and
if (!BaseClassDecl->hasTrivialCopyConstructor())
data().HasTrivialSpecialMembers &= ~SMF_CopyConstructor;
// If the base class doesn't have a simple move constructor, we'll eagerly
// declare it and perform overload resolution to determine which function
// it actually calls. If it does have a simple move constructor, this
// check is correct.
if (!BaseClassDecl->hasTrivialMoveConstructor())
data().HasTrivialSpecialMembers &= ~SMF_MoveConstructor;
// C++0x [class.copy]p27:
// A copy/move assignment operator for class X is trivial if [...]
// [...]
// -- the assignment operator selected to copy/move each direct base
// class subobject is trivial, and
if (!BaseClassDecl->hasTrivialCopyAssignment())
data().HasTrivialSpecialMembers &= ~SMF_CopyAssignment;
// If the base class doesn't have a simple move assignment, we'll eagerly
// declare it and perform overload resolution to determine which function
// it actually calls. If it does have a simple move assignment, this
// check is correct.
if (!BaseClassDecl->hasTrivialMoveAssignment())
data().HasTrivialSpecialMembers &= ~SMF_MoveAssignment;
// C++11 [class.ctor]p6:
// If that user-written default constructor would satisfy the
// requirements of a constexpr constructor, the implicitly-defined
// default constructor is constexpr.
if (!BaseClassDecl->hasConstexprDefaultConstructor())
data().DefaultedDefaultConstructorIsConstexpr = false;
// C++1z [class.copy]p8:
// The implicitly-declared copy constructor for a class X will have
// the form 'X::X(const X&)' if each potentially constructed subobject
// has a copy constructor whose first parameter is of type
// 'const B&' or 'const volatile B&' [...]
if (!BaseClassDecl->hasCopyConstructorWithConstParam())
data().ImplicitCopyConstructorCanHaveConstParamForNonVBase = false;
}
// C++ [class.ctor]p3:
// A destructor is trivial if all the direct base classes of its class
// have trivial destructors.
if (!BaseClassDecl->hasTrivialDestructor())
data().HasTrivialSpecialMembers &= ~SMF_Destructor;
if (!BaseClassDecl->hasIrrelevantDestructor())
data().HasIrrelevantDestructor = false;
// C++11 [class.copy]p18:
// The implicitly-declared copy assignment oeprator for a class X will
// have the form 'X& X::operator=(const X&)' if each direct base class B
// of X has a copy assignment operator whose parameter is of type 'const
// B&', 'const volatile B&', or 'B' [...]
if (!BaseClassDecl->hasCopyAssignmentWithConstParam())
data().ImplicitCopyAssignmentHasConstParam = false;
// A class has an Objective-C object member if... or any of its bases
// has an Objective-C object member.
if (BaseClassDecl->hasObjectMember())
setHasObjectMember(true);
if (BaseClassDecl->hasVolatileMember())
setHasVolatileMember(true);
// Keep track of the presence of mutable fields.
- if (BaseClassDecl->hasMutableFields())
+ if (BaseClassDecl->hasMutableFields()) {
data().HasMutableFields = true;
+ data().NeedOverloadResolutionForCopyConstructor = true;
+ }
if (BaseClassDecl->hasUninitializedReferenceMember())
data().HasUninitializedReferenceMember = true;
if (!BaseClassDecl->allowConstDefaultInit())
data().HasUninitializedFields = true;
addedClassSubobject(BaseClassDecl);
}
if (VBases.empty()) {
data().IsParsingBaseSpecifiers = false;
return;
}
// Create base specifier for any direct or indirect virtual bases.
data().VBases = new (C) CXXBaseSpecifier[VBases.size()];
data().NumVBases = VBases.size();
for (int I = 0, E = VBases.size(); I != E; ++I) {
QualType Type = VBases[I]->getType();
if (!Type->isDependentType())
addedClassSubobject(Type->getAsCXXRecordDecl());
data().getVBases()[I] = *VBases[I];
}
data().IsParsingBaseSpecifiers = false;
}
unsigned CXXRecordDecl::getODRHash() const {
assert(hasDefinition() && "ODRHash only for records with definitions");
// Previously calculated hash is stored in DefinitionData.
if (DefinitionData->HasODRHash)
return DefinitionData->ODRHash;
// Only calculate hash on first call of getODRHash per record.
ODRHash Hash;
Hash.AddCXXRecordDecl(getDefinition());
DefinitionData->HasODRHash = true;
DefinitionData->ODRHash = Hash.CalculateHash();
return DefinitionData->ODRHash;
}
void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) {
// C++11 [class.copy]p11:
// A defaulted copy/move constructor for a class X is defined as
// deleted if X has:
// -- a direct or virtual base class B that cannot be copied/moved [...]
// -- a non-static data member of class type M (or array thereof)
// that cannot be copied or moved [...]
+ if (!Subobj->hasSimpleCopyConstructor())
+ data().NeedOverloadResolutionForCopyConstructor = true;
if (!Subobj->hasSimpleMoveConstructor())
data().NeedOverloadResolutionForMoveConstructor = true;
// C++11 [class.copy]p23:
// A defaulted copy/move assignment operator for a class X is defined as
// deleted if X has:
// -- a direct or virtual base class B that cannot be copied/moved [...]
// -- a non-static data member of class type M (or array thereof)
// that cannot be copied or moved [...]
if (!Subobj->hasSimpleMoveAssignment())
data().NeedOverloadResolutionForMoveAssignment = true;
// C++11 [class.ctor]p5, C++11 [class.copy]p11, C++11 [class.dtor]p5:
// A defaulted [ctor or dtor] for a class X is defined as
// deleted if X has:
// -- any direct or virtual base class [...] has a type with a destructor
// that is deleted or inaccessible from the defaulted [ctor or dtor].
// -- any non-static data member has a type with a destructor
// that is deleted or inaccessible from the defaulted [ctor or dtor].
if (!Subobj->hasSimpleDestructor()) {
+ data().NeedOverloadResolutionForCopyConstructor = true;
data().NeedOverloadResolutionForMoveConstructor = true;
data().NeedOverloadResolutionForDestructor = true;
}
}
bool CXXRecordDecl::hasAnyDependentBases() const {
if (!isDependentContext())
return false;
return !forallBases([](const CXXRecordDecl *) { return true; });
}
bool CXXRecordDecl::isTriviallyCopyable() const {
// C++0x [class]p5:
// A trivially copyable class is a class that:
// -- has no non-trivial copy constructors,
if (hasNonTrivialCopyConstructor()) return false;
// -- has no non-trivial move constructors,
if (hasNonTrivialMoveConstructor()) return false;
// -- has no non-trivial copy assignment operators,
if (hasNonTrivialCopyAssignment()) return false;
// -- has no non-trivial move assignment operators, and
if (hasNonTrivialMoveAssignment()) return false;
// -- has a trivial destructor.
if (!hasTrivialDestructor()) return false;
return true;
}
void CXXRecordDecl::markedVirtualFunctionPure() {
// C++ [class.abstract]p2:
// A class is abstract if it has at least one pure virtual function.
data().Abstract = true;
}
void CXXRecordDecl::addedMember(Decl *D) {
if (!D->isImplicit() &&
!isa<FieldDecl>(D) &&
!isa<IndirectFieldDecl>(D) &&
(!isa<TagDecl>(D) || cast<TagDecl>(D)->getTagKind() == TTK_Class ||
cast<TagDecl>(D)->getTagKind() == TTK_Interface))
data().HasOnlyCMembers = false;
// Ignore friends and invalid declarations.
if (D->getFriendObjectKind() || D->isInvalidDecl())
return;
FunctionTemplateDecl *FunTmpl = dyn_cast<FunctionTemplateDecl>(D);
if (FunTmpl)
D = FunTmpl->getTemplatedDecl();
// FIXME: Pass NamedDecl* to addedMember?
Decl *DUnderlying = D;
if (auto *ND = dyn_cast<NamedDecl>(DUnderlying)) {
DUnderlying = ND->getUnderlyingDecl();
if (FunctionTemplateDecl *UnderlyingFunTmpl =
dyn_cast<FunctionTemplateDecl>(DUnderlying))
DUnderlying = UnderlyingFunTmpl->getTemplatedDecl();
}
if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
if (Method->isVirtual()) {
// C++ [dcl.init.aggr]p1:
// An aggregate is an array or a class with [...] no virtual functions.
data().Aggregate = false;
// C++ [class]p4:
// A POD-struct is an aggregate class...
data().PlainOldData = false;
// C++14 [meta.unary.prop]p4:
// T is a class type [...] with [...] no virtual member functions...
data().Empty = false;
// C++ [class.virtual]p1:
// A class that declares or inherits a virtual function is called a
// polymorphic class.
data().Polymorphic = true;
// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25:
// A [default constructor, copy/move constructor, or copy/move
// assignment operator for a class X] is trivial [...] if:
// -- class X has no virtual functions [...]
data().HasTrivialSpecialMembers &= SMF_Destructor;
// C++0x [class]p7:
// A standard-layout class is a class that: [...]
// -- has no virtual functions
data().IsStandardLayout = false;
}
}
// Notify the listener if an implicit member was added after the definition
// was completed.
if (!isBeingDefined() && D->isImplicit())
if (ASTMutationListener *L = getASTMutationListener())
L->AddedCXXImplicitMember(data().Definition, D);
// The kind of special member this declaration is, if any.
unsigned SMKind = 0;
// Handle constructors.
if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(D)) {
if (!Constructor->isImplicit()) {
// Note that we have a user-declared constructor.
data().UserDeclaredConstructor = true;
// C++ [class]p4:
// A POD-struct is an aggregate class [...]
// Since the POD bit is meant to be C++03 POD-ness, clear it even if the
// type is technically an aggregate in C++0x since it wouldn't be in 03.
data().PlainOldData = false;
}
if (Constructor->isDefaultConstructor()) {
SMKind |= SMF_DefaultConstructor;
if (Constructor->isUserProvided())
data().UserProvidedDefaultConstructor = true;
if (Constructor->isConstexpr())
data().HasConstexprDefaultConstructor = true;
if (Constructor->isDefaulted())
data().HasDefaultedDefaultConstructor = true;
}
if (!FunTmpl) {
unsigned Quals;
if (Constructor->isCopyConstructor(Quals)) {
SMKind |= SMF_CopyConstructor;
if (Quals & Qualifiers::Const)
data().HasDeclaredCopyConstructorWithConstParam = true;
} else if (Constructor->isMoveConstructor())
SMKind |= SMF_MoveConstructor;
}
// C++11 [dcl.init.aggr]p1: DR1518
// An aggregate is an array or a class with no user-provided, explicit, or
// inherited constructors
if (Constructor->isUserProvided() || Constructor->isExplicit())
data().Aggregate = false;
}
// Handle constructors, including those inherited from base classes.
if (CXXConstructorDecl *Constructor =
dyn_cast<CXXConstructorDecl>(DUnderlying)) {
// Record if we see any constexpr constructors which are neither copy
// nor move constructors.
// C++1z [basic.types]p10:
// [...] has at least one constexpr constructor or constructor template
// (possibly inherited from a base class) that is not a copy or move
// constructor [...]
if (Constructor->isConstexpr() && !Constructor->isCopyOrMoveConstructor())
data().HasConstexprNonCopyMoveConstructor = true;
}
// Handle destructors.
if (CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(D)) {
SMKind |= SMF_Destructor;
if (DD->isUserProvided())
data().HasIrrelevantDestructor = false;
// If the destructor is explicitly defaulted and not trivial or not public
// or if the destructor is deleted, we clear HasIrrelevantDestructor in
// finishedDefaultedOrDeletedMember.
// C++11 [class.dtor]p5:
// A destructor is trivial if [...] the destructor is not virtual.
if (DD->isVirtual())
data().HasTrivialSpecialMembers &= ~SMF_Destructor;
}
// Handle member functions.
if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
if (Method->isCopyAssignmentOperator()) {
SMKind |= SMF_CopyAssignment;
const ReferenceType *ParamTy =
Method->getParamDecl(0)->getType()->getAs<ReferenceType>();
if (!ParamTy || ParamTy->getPointeeType().isConstQualified())
data().HasDeclaredCopyAssignmentWithConstParam = true;
}
if (Method->isMoveAssignmentOperator())
SMKind |= SMF_MoveAssignment;
// Keep the list of conversion functions up-to-date.
if (CXXConversionDecl *Conversion = dyn_cast<CXXConversionDecl>(D)) {
// FIXME: We use the 'unsafe' accessor for the access specifier here,
// because Sema may not have set it yet. That's really just a misdesign
// in Sema. However, LLDB *will* have set the access specifier correctly,
// and adds declarations after the class is technically completed,
// so completeDefinition()'s overriding of the access specifiers doesn't
// work.
AccessSpecifier AS = Conversion->getAccessUnsafe();
if (Conversion->getPrimaryTemplate()) {
// We don't record specializations.
} else {
ASTContext &Ctx = getASTContext();
ASTUnresolvedSet &Conversions = data().Conversions.get(Ctx);
NamedDecl *Primary =
FunTmpl ? cast<NamedDecl>(FunTmpl) : cast<NamedDecl>(Conversion);
if (Primary->getPreviousDecl())
Conversions.replace(cast<NamedDecl>(Primary->getPreviousDecl()),
Primary, AS);
else
Conversions.addDecl(Ctx, Primary, AS);
}
}
if (SMKind) {
// If this is the first declaration of a special member, we no longer have
// an implicit trivial special member.
data().HasTrivialSpecialMembers &=
data().DeclaredSpecialMembers | ~SMKind;
if (!Method->isImplicit() && !Method->isUserProvided()) {
// This method is user-declared but not user-provided. We can't work out
// whether it's trivial yet (not until we get to the end of the class).
// We'll handle this method in finishedDefaultedOrDeletedMember.
} else if (Method->isTrivial())
data().HasTrivialSpecialMembers |= SMKind;
else
data().DeclaredNonTrivialSpecialMembers |= SMKind;
// Note when we have declared a declared special member, and suppress the
// implicit declaration of this special member.
data().DeclaredSpecialMembers |= SMKind;
if (!Method->isImplicit()) {
data().UserDeclaredSpecialMembers |= SMKind;
// C++03 [class]p4:
// A POD-struct is an aggregate class that has [...] no user-defined
// copy assignment operator and no user-defined destructor.
//
// Since the POD bit is meant to be C++03 POD-ness, and in C++03,
// aggregates could not have any constructors, clear it even for an
// explicitly defaulted or deleted constructor.
// type is technically an aggregate in C++0x since it wouldn't be in 03.
//
// Also, a user-declared move assignment operator makes a class non-POD.
// This is an extension in C++03.
data().PlainOldData = false;
}
}
return;
}
// Handle non-static data members.
if (FieldDecl *Field = dyn_cast<FieldDecl>(D)) {
// C++ [class.bit]p2:
// A declaration for a bit-field that omits the identifier declares an
// unnamed bit-field. Unnamed bit-fields are not members and cannot be
// initialized.
if (Field->isUnnamedBitfield())
return;
// C++ [dcl.init.aggr]p1:
// An aggregate is an array or a class (clause 9) with [...] no
// private or protected non-static data members (clause 11).
//
// A POD must be an aggregate.
if (D->getAccess() == AS_private || D->getAccess() == AS_protected) {
data().Aggregate = false;
data().PlainOldData = false;
}
// C++0x [class]p7:
// A standard-layout class is a class that:
// [...]
// -- has the same access control for all non-static data members,
switch (D->getAccess()) {
case AS_private: data().HasPrivateFields = true; break;
case AS_protected: data().HasProtectedFields = true; break;
case AS_public: data().HasPublicFields = true; break;
case AS_none: llvm_unreachable("Invalid access specifier");
};
if ((data().HasPrivateFields + data().HasProtectedFields +
data().HasPublicFields) > 1)
data().IsStandardLayout = false;
// Keep track of the presence of mutable fields.
- if (Field->isMutable())
+ if (Field->isMutable()) {
data().HasMutableFields = true;
+ data().NeedOverloadResolutionForCopyConstructor = true;
+ }
// C++11 [class.union]p8, DR1460:
// If X is a union, a non-static data member of X that is not an anonymous
// union is a variant member of X.
if (isUnion() && !Field->isAnonymousStructOrUnion())
data().HasVariantMembers = true;
// C++0x [class]p9:
// A POD struct is a class that is both a trivial class and a
// standard-layout class, and has no non-static data members of type
// non-POD struct, non-POD union (or array of such types).
//
// Automatic Reference Counting: the presence of a member of Objective-C pointer type
// that does not explicitly have no lifetime makes the class a non-POD.
ASTContext &Context = getASTContext();
QualType T = Context.getBaseElementType(Field->getType());
if (T->isObjCRetainableType() || T.isObjCGCStrong()) {
if (T.hasNonTrivialObjCLifetime()) {
// Objective-C Automatic Reference Counting:
// If a class has a non-static data member of Objective-C pointer
// type (or array thereof), it is a non-POD type and its
// default constructor (if any), copy constructor, move constructor,
// copy assignment operator, move assignment operator, and destructor are
// non-trivial.
setHasObjectMember(true);
struct DefinitionData &Data = data();
Data.PlainOldData = false;
Data.HasTrivialSpecialMembers = 0;
Data.HasIrrelevantDestructor = false;
} else if (!Context.getLangOpts().ObjCAutoRefCount) {
setHasObjectMember(true);
}
} else if (!T.isCXX98PODType(Context))
data().PlainOldData = false;
if (T->isReferenceType()) {
if (!Field->hasInClassInitializer())
data().HasUninitializedReferenceMember = true;
// C++0x [class]p7:
// A standard-layout class is a class that:
// -- has no non-static data members of type [...] reference,
data().IsStandardLayout = false;
+
+ // C++1z [class.copy.ctor]p10:
+ // A defaulted copy constructor for a class X is defined as deleted if X has:
+ // -- a non-static data member of rvalue reference type
+ if (T->isRValueReferenceType())
+ data().DefaultedCopyConstructorIsDeleted = true;
}
if (!Field->hasInClassInitializer() && !Field->isMutable()) {
if (CXXRecordDecl *FieldType = T->getAsCXXRecordDecl()) {
if (FieldType->hasDefinition() && !FieldType->allowConstDefaultInit())
data().HasUninitializedFields = true;
} else {
data().HasUninitializedFields = true;
}
}
// Record if this field is the first non-literal or volatile field or base.
if (!T->isLiteralType(Context) || T.isVolatileQualified())
data().HasNonLiteralTypeFieldsOrBases = true;
if (Field->hasInClassInitializer() ||
(Field->isAnonymousStructOrUnion() &&
Field->getType()->getAsCXXRecordDecl()->hasInClassInitializer())) {
data().HasInClassInitializer = true;
// C++11 [class]p5:
// A default constructor is trivial if [...] no non-static data member
// of its class has a brace-or-equal-initializer.
data().HasTrivialSpecialMembers &= ~SMF_DefaultConstructor;
// C++11 [dcl.init.aggr]p1:
// An aggregate is a [...] class with [...] no
// brace-or-equal-initializers for non-static data members.
//
// This rule was removed in C++14.
if (!getASTContext().getLangOpts().CPlusPlus14)
data().Aggregate = false;
// C++11 [class]p10:
// A POD struct is [...] a trivial class.
data().PlainOldData = false;
}
// C++11 [class.copy]p23:
// A defaulted copy/move assignment operator for a class X is defined
// as deleted if X has:
// -- a non-static data member of reference type
if (T->isReferenceType())
data().DefaultedMoveAssignmentIsDeleted = true;
if (const RecordType *RecordTy = T->getAs<RecordType>()) {
CXXRecordDecl* FieldRec = cast<CXXRecordDecl>(RecordTy->getDecl());
if (FieldRec->getDefinition()) {
addedClassSubobject(FieldRec);
// We may need to perform overload resolution to determine whether a
// field can be moved if it's const or volatile qualified.
if (T.getCVRQualifiers() & (Qualifiers::Const | Qualifiers::Volatile)) {
+ // We need to care about 'const' for the copy constructor because an
+ // implicit copy constructor might be declared with a non-const
+ // parameter.
+ data().NeedOverloadResolutionForCopyConstructor = true;
data().NeedOverloadResolutionForMoveConstructor = true;
data().NeedOverloadResolutionForMoveAssignment = true;
}
// C++11 [class.ctor]p5, C++11 [class.copy]p11:
// A defaulted [special member] for a class X is defined as
// deleted if:
// -- X is a union-like class that has a variant member with a
// non-trivial [corresponding special member]
if (isUnion()) {
+ if (FieldRec->hasNonTrivialCopyConstructor())
+ data().DefaultedCopyConstructorIsDeleted = true;
if (FieldRec->hasNonTrivialMoveConstructor())
data().DefaultedMoveConstructorIsDeleted = true;
if (FieldRec->hasNonTrivialMoveAssignment())
data().DefaultedMoveAssignmentIsDeleted = true;
if (FieldRec->hasNonTrivialDestructor())
data().DefaultedDestructorIsDeleted = true;
}
// For an anonymous union member, our overload resolution will perform
// overload resolution for its members.
if (Field->isAnonymousStructOrUnion()) {
+ data().NeedOverloadResolutionForCopyConstructor |=
+ FieldRec->data().NeedOverloadResolutionForCopyConstructor;
data().NeedOverloadResolutionForMoveConstructor |=
FieldRec->data().NeedOverloadResolutionForMoveConstructor;
data().NeedOverloadResolutionForMoveAssignment |=
FieldRec->data().NeedOverloadResolutionForMoveAssignment;
data().NeedOverloadResolutionForDestructor |=
FieldRec->data().NeedOverloadResolutionForDestructor;
}
// C++0x [class.ctor]p5:
// A default constructor is trivial [...] if:
// -- for all the non-static data members of its class that are of
// class type (or array thereof), each such class has a trivial
// default constructor.
if (!FieldRec->hasTrivialDefaultConstructor())
data().HasTrivialSpecialMembers &= ~SMF_DefaultConstructor;
// C++0x [class.copy]p13:
// A copy/move constructor for class X is trivial if [...]
// [...]
// -- for each non-static data member of X that is of class type (or
// an array thereof), the constructor selected to copy/move that
// member is trivial;
if (!FieldRec->hasTrivialCopyConstructor())
data().HasTrivialSpecialMembers &= ~SMF_CopyConstructor;
// If the field doesn't have a simple move constructor, we'll eagerly
// declare the move constructor for this class and we'll decide whether
// it's trivial then.
if (!FieldRec->hasTrivialMoveConstructor())
data().HasTrivialSpecialMembers &= ~SMF_MoveConstructor;
// C++0x [class.copy]p27:
// A copy/move assignment operator for class X is trivial if [...]
// [...]
// -- for each non-static data member of X that is of class type (or
// an array thereof), the assignment operator selected to
// copy/move that member is trivial;
if (!FieldRec->hasTrivialCopyAssignment())
data().HasTrivialSpecialMembers &= ~SMF_CopyAssignment;
// If the field doesn't have a simple move assignment, we'll eagerly
// declare the move assignment for this class and we'll decide whether
// it's trivial then.
if (!FieldRec->hasTrivialMoveAssignment())
data().HasTrivialSpecialMembers &= ~SMF_MoveAssignment;
if (!FieldRec->hasTrivialDestructor())
data().HasTrivialSpecialMembers &= ~SMF_Destructor;
if (!FieldRec->hasIrrelevantDestructor())
data().HasIrrelevantDestructor = false;
if (FieldRec->hasObjectMember())
setHasObjectMember(true);
if (FieldRec->hasVolatileMember())
setHasVolatileMember(true);
// C++0x [class]p7:
// A standard-layout class is a class that:
// -- has no non-static data members of type non-standard-layout
// class (or array of such types) [...]
if (!FieldRec->isStandardLayout())
data().IsStandardLayout = false;
// C++0x [class]p7:
// A standard-layout class is a class that:
// [...]
// -- has no base classes of the same type as the first non-static
// data member.
// We don't want to expend bits in the state of the record decl
// tracking whether this is the first non-static data member so we
// cheat a bit and use some of the existing state: the empty bit.
// Virtual bases and virtual methods make a class non-empty, but they
// also make it non-standard-layout so we needn't check here.
// A non-empty base class may leave the class standard-layout, but not
// if we have arrived here, and have at least one non-static data
// member. If IsStandardLayout remains true, then the first non-static
// data member must come through here with Empty still true, and Empty
// will subsequently be set to false below.
if (data().IsStandardLayout && data().Empty) {
for (const auto &BI : bases()) {
if (Context.hasSameUnqualifiedType(BI.getType(), T)) {
data().IsStandardLayout = false;
break;
}
}
}
// Keep track of the presence of mutable fields.
- if (FieldRec->hasMutableFields())
+ if (FieldRec->hasMutableFields()) {
data().HasMutableFields = true;
+ data().NeedOverloadResolutionForCopyConstructor = true;
+ }
// C++11 [class.copy]p13:
// If the implicitly-defined constructor would satisfy the
// requirements of a constexpr constructor, the implicitly-defined
// constructor is constexpr.
// C++11 [dcl.constexpr]p4:
// -- every constructor involved in initializing non-static data
// members [...] shall be a constexpr constructor
if (!Field->hasInClassInitializer() &&
!FieldRec->hasConstexprDefaultConstructor() && !isUnion())
// The standard requires any in-class initializer to be a constant
// expression. We consider this to be a defect.
data().DefaultedDefaultConstructorIsConstexpr = false;
// C++11 [class.copy]p8:
// The implicitly-declared copy constructor for a class X will have
// the form 'X::X(const X&)' if each potentially constructed subobject
// of a class type M (or array thereof) has a copy constructor whose
// first parameter is of type 'const M&' or 'const volatile M&'.
if (!FieldRec->hasCopyConstructorWithConstParam())
data().ImplicitCopyConstructorCanHaveConstParamForNonVBase = false;
// C++11 [class.copy]p18:
// The implicitly-declared copy assignment oeprator for a class X will
// have the form 'X& X::operator=(const X&)' if [...] for all the
// non-static data members of X that are of a class type M (or array
// thereof), each such class type has a copy assignment operator whose
// parameter is of type 'const M&', 'const volatile M&' or 'M'.
if (!FieldRec->hasCopyAssignmentWithConstParam())
data().ImplicitCopyAssignmentHasConstParam = false;
if (FieldRec->hasUninitializedReferenceMember() &&
!Field->hasInClassInitializer())
data().HasUninitializedReferenceMember = true;
// C++11 [class.union]p8, DR1460:
// a non-static data member of an anonymous union that is a member of
// X is also a variant member of X.
if (FieldRec->hasVariantMembers() &&
Field->isAnonymousStructOrUnion())
data().HasVariantMembers = true;
}
} else {
// Base element type of field is a non-class type.
if (!T->isLiteralType(Context) ||
(!Field->hasInClassInitializer() && !isUnion()))
data().DefaultedDefaultConstructorIsConstexpr = false;
// C++11 [class.copy]p23:
// A defaulted copy/move assignment operator for a class X is defined
// as deleted if X has:
// -- a non-static data member of const non-class type (or array
// thereof)
if (T.isConstQualified())
data().DefaultedMoveAssignmentIsDeleted = true;
}
// C++0x [class]p7:
// A standard-layout class is a class that:
// [...]
// -- either has no non-static data members in the most derived
// class and at most one base class with non-static data members,
// or has no base classes with non-static data members, and
// At this point we know that we have a non-static data member, so the last
// clause holds.
if (!data().HasNoNonEmptyBases)
data().IsStandardLayout = false;
// C++14 [meta.unary.prop]p4:
// T is a class type [...] with [...] no non-static data members other
// than bit-fields of length 0...
if (data().Empty) {
if (!Field->isBitField() ||
(!Field->getBitWidth()->isTypeDependent() &&
!Field->getBitWidth()->isValueDependent() &&
Field->getBitWidthValue(Context) != 0))
data().Empty = false;
}
}
// Handle using declarations of conversion functions.
if (UsingShadowDecl *Shadow = dyn_cast<UsingShadowDecl>(D)) {
if (Shadow->getDeclName().getNameKind()
== DeclarationName::CXXConversionFunctionName) {
ASTContext &Ctx = getASTContext();
data().Conversions.get(Ctx).addDecl(Ctx, Shadow, Shadow->getAccess());
}
}
if (UsingDecl *Using = dyn_cast<UsingDecl>(D)) {
if (Using->getDeclName().getNameKind() ==
DeclarationName::CXXConstructorName) {
data().HasInheritedConstructor = true;
// C++1z [dcl.init.aggr]p1:
// An aggregate is [...] a class [...] with no inherited constructors
data().Aggregate = false;
}
if (Using->getDeclName().getCXXOverloadedOperator() == OO_Equal)
data().HasInheritedAssignment = true;
}
}
void CXXRecordDecl::finishedDefaultedOrDeletedMember(CXXMethodDecl *D) {
assert(!D->isImplicit() && !D->isUserProvided());
// The kind of special member this declaration is, if any.
unsigned SMKind = 0;
if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(D)) {
if (Constructor->isDefaultConstructor()) {
SMKind |= SMF_DefaultConstructor;
if (Constructor->isConstexpr())
data().HasConstexprDefaultConstructor = true;
}
if (Constructor->isCopyConstructor())
SMKind |= SMF_CopyConstructor;
else if (Constructor->isMoveConstructor())
SMKind |= SMF_MoveConstructor;
else if (Constructor->isConstexpr())
// We may now know that the constructor is constexpr.
data().HasConstexprNonCopyMoveConstructor = true;
} else if (isa<CXXDestructorDecl>(D)) {
SMKind |= SMF_Destructor;
if (!D->isTrivial() || D->getAccess() != AS_public || D->isDeleted())
data().HasIrrelevantDestructor = false;
} else if (D->isCopyAssignmentOperator())
SMKind |= SMF_CopyAssignment;
else if (D->isMoveAssignmentOperator())
SMKind |= SMF_MoveAssignment;
// Update which trivial / non-trivial special members we have.
// addedMember will have skipped this step for this member.
if (D->isTrivial())
data().HasTrivialSpecialMembers |= SMKind;
else
data().DeclaredNonTrivialSpecialMembers |= SMKind;
}
bool CXXRecordDecl::isCLike() const {
if (getTagKind() == TTK_Class || getTagKind() == TTK_Interface ||
!TemplateOrInstantiation.isNull())
return false;
if (!hasDefinition())
return true;
return isPOD() && data().HasOnlyCMembers;
}
bool CXXRecordDecl::isGenericLambda() const {
if (!isLambda()) return false;
return getLambdaData().IsGenericLambda;
}
CXXMethodDecl* CXXRecordDecl::getLambdaCallOperator() const {
if (!isLambda()) return nullptr;
DeclarationName Name =
getASTContext().DeclarationNames.getCXXOperatorName(OO_Call);
DeclContext::lookup_result Calls = lookup(Name);
assert(!Calls.empty() && "Missing lambda call operator!");
assert(Calls.size() == 1 && "More than one lambda call operator!");
NamedDecl *CallOp = Calls.front();
if (FunctionTemplateDecl *CallOpTmpl =
dyn_cast<FunctionTemplateDecl>(CallOp))
return cast<CXXMethodDecl>(CallOpTmpl->getTemplatedDecl());
return cast<CXXMethodDecl>(CallOp);
}
CXXMethodDecl* CXXRecordDecl::getLambdaStaticInvoker() const {
if (!isLambda()) return nullptr;
DeclarationName Name =
&getASTContext().Idents.get(getLambdaStaticInvokerName());
DeclContext::lookup_result Invoker = lookup(Name);
if (Invoker.empty()) return nullptr;
assert(Invoker.size() == 1 && "More than one static invoker operator!");
NamedDecl *InvokerFun = Invoker.front();
if (FunctionTemplateDecl *InvokerTemplate =
dyn_cast<FunctionTemplateDecl>(InvokerFun))
return cast<CXXMethodDecl>(InvokerTemplate->getTemplatedDecl());
return cast<CXXMethodDecl>(InvokerFun);
}
void CXXRecordDecl::getCaptureFields(
llvm::DenseMap<const VarDecl *, FieldDecl *> &Captures,
FieldDecl *&ThisCapture) const {
Captures.clear();
ThisCapture = nullptr;
LambdaDefinitionData &Lambda = getLambdaData();
RecordDecl::field_iterator Field = field_begin();
for (const LambdaCapture *C = Lambda.Captures, *CEnd = C + Lambda.NumCaptures;
C != CEnd; ++C, ++Field) {
if (C->capturesThis())
ThisCapture = *Field;
else if (C->capturesVariable())
Captures[C->getCapturedVar()] = *Field;
}
assert(Field == field_end());
}
TemplateParameterList *
CXXRecordDecl::getGenericLambdaTemplateParameterList() const {
if (!isLambda()) return nullptr;
CXXMethodDecl *CallOp = getLambdaCallOperator();
if (FunctionTemplateDecl *Tmpl = CallOp->getDescribedFunctionTemplate())
return Tmpl->getTemplateParameters();
return nullptr;
}
Decl *CXXRecordDecl::getLambdaContextDecl() const {
assert(isLambda() && "Not a lambda closure type!");
ExternalASTSource *Source = getParentASTContext().getExternalSource();
return getLambdaData().ContextDecl.get(Source);
}
static CanQualType GetConversionType(ASTContext &Context, NamedDecl *Conv) {
QualType T =
cast<CXXConversionDecl>(Conv->getUnderlyingDecl()->getAsFunction())
->getConversionType();
return Context.getCanonicalType(T);
}
/// Collect the visible conversions of a base class.
///
/// \param Record a base class of the class we're considering
/// \param InVirtual whether this base class is a virtual base (or a base
/// of a virtual base)
/// \param Access the access along the inheritance path to this base
/// \param ParentHiddenTypes the conversions provided by the inheritors
/// of this base
/// \param Output the set to which to add conversions from non-virtual bases
/// \param VOutput the set to which to add conversions from virtual bases
/// \param HiddenVBaseCs the set of conversions which were hidden in a
/// virtual base along some inheritance path
static void CollectVisibleConversions(ASTContext &Context,
CXXRecordDecl *Record,
bool InVirtual,
AccessSpecifier Access,
const llvm::SmallPtrSet<CanQualType, 8> &ParentHiddenTypes,
ASTUnresolvedSet &Output,
UnresolvedSetImpl &VOutput,
llvm::SmallPtrSet<NamedDecl*, 8> &HiddenVBaseCs) {
// The set of types which have conversions in this class or its
// subclasses. As an optimization, we don't copy the derived set
// unless it might change.
const llvm::SmallPtrSet<CanQualType, 8> *HiddenTypes = &ParentHiddenTypes;
llvm::SmallPtrSet<CanQualType, 8> HiddenTypesBuffer;
// Collect the direct conversions and figure out which conversions
// will be hidden in the subclasses.
CXXRecordDecl::conversion_iterator ConvI = Record->conversion_begin();
CXXRecordDecl::conversion_iterator ConvE = Record->conversion_end();
if (ConvI != ConvE) {
HiddenTypesBuffer = ParentHiddenTypes;
HiddenTypes = &HiddenTypesBuffer;
for (CXXRecordDecl::conversion_iterator I = ConvI; I != ConvE; ++I) {
CanQualType ConvType(GetConversionType(Context, I.getDecl()));
bool Hidden = ParentHiddenTypes.count(ConvType);
if (!Hidden)
HiddenTypesBuffer.insert(ConvType);
// If this conversion is hidden and we're in a virtual base,
// remember that it's hidden along some inheritance path.
if (Hidden && InVirtual)
HiddenVBaseCs.insert(cast<NamedDecl>(I.getDecl()->getCanonicalDecl()));
// If this conversion isn't hidden, add it to the appropriate output.
else if (!Hidden) {
AccessSpecifier IAccess
= CXXRecordDecl::MergeAccess(Access, I.getAccess());
if (InVirtual)
VOutput.addDecl(I.getDecl(), IAccess);
else
Output.addDecl(Context, I.getDecl(), IAccess);
}
}
}
// Collect information recursively from any base classes.
for (const auto &I : Record->bases()) {
const RecordType *RT = I.getType()->getAs<RecordType>();
if (!RT) continue;
AccessSpecifier BaseAccess
= CXXRecordDecl::MergeAccess(Access, I.getAccessSpecifier());
bool BaseInVirtual = InVirtual || I.isVirtual();
CXXRecordDecl *Base = cast<CXXRecordDecl>(RT->getDecl());
CollectVisibleConversions(Context, Base, BaseInVirtual, BaseAccess,
*HiddenTypes, Output, VOutput, HiddenVBaseCs);
}
}
/// Collect the visible conversions of a class.
///
/// This would be extremely straightforward if it weren't for virtual
/// bases. It might be worth special-casing that, really.
static void CollectVisibleConversions(ASTContext &Context,
CXXRecordDecl *Record,
ASTUnresolvedSet &Output) {
// The collection of all conversions in virtual bases that we've
// found. These will be added to the output as long as they don't
// appear in the hidden-conversions set.
UnresolvedSet<8> VBaseCs;
// The set of conversions in virtual bases that we've determined to
// be hidden.
llvm::SmallPtrSet<NamedDecl*, 8> HiddenVBaseCs;
// The set of types hidden by classes derived from this one.
llvm::SmallPtrSet<CanQualType, 8> HiddenTypes;
// Go ahead and collect the direct conversions and add them to the
// hidden-types set.
CXXRecordDecl::conversion_iterator ConvI = Record->conversion_begin();
CXXRecordDecl::conversion_iterator ConvE = Record->conversion_end();
Output.append(Context, ConvI, ConvE);
for (; ConvI != ConvE; ++ConvI)
HiddenTypes.insert(GetConversionType(Context, ConvI.getDecl()));
// Recursively collect conversions from base classes.
for (const auto &I : Record->bases()) {
const RecordType *RT = I.getType()->getAs<RecordType>();
if (!RT) continue;
CollectVisibleConversions(Context, cast<CXXRecordDecl>(RT->getDecl()),
I.isVirtual(), I.getAccessSpecifier(),
HiddenTypes, Output, VBaseCs, HiddenVBaseCs);
}
// Add any unhidden conversions provided by virtual bases.
for (UnresolvedSetIterator I = VBaseCs.begin(), E = VBaseCs.end();
I != E; ++I) {
if (!HiddenVBaseCs.count(cast<NamedDecl>(I.getDecl()->getCanonicalDecl())))
Output.addDecl(Context, I.getDecl(), I.getAccess());
}
}
/// getVisibleConversionFunctions - get all conversion functions visible
/// in current class; including conversion function templates.
llvm::iterator_range<CXXRecordDecl::conversion_iterator>
CXXRecordDecl::getVisibleConversionFunctions() {
ASTContext &Ctx = getASTContext();
ASTUnresolvedSet *Set;
if (bases_begin() == bases_end()) {
// If root class, all conversions are visible.
Set = &data().Conversions.get(Ctx);
} else {
Set = &data().VisibleConversions.get(Ctx);
// If visible conversion list is not evaluated, evaluate it.
if (!data().ComputedVisibleConversions) {
CollectVisibleConversions(Ctx, this, *Set);
data().ComputedVisibleConversions = true;
}
}
return llvm::make_range(Set->begin(), Set->end());
}
void CXXRecordDecl::removeConversion(const NamedDecl *ConvDecl) {
// This operation is O(N) but extremely rare. Sema only uses it to
// remove UsingShadowDecls in a class that were followed by a direct
// declaration, e.g.:
// class A : B {
// using B::operator int;
// operator int();
// };
// This is uncommon by itself and even more uncommon in conjunction
// with sufficiently large numbers of directly-declared conversions
// that asymptotic behavior matters.
ASTUnresolvedSet &Convs = data().Conversions.get(getASTContext());
for (unsigned I = 0, E = Convs.size(); I != E; ++I) {
if (Convs[I].getDecl() == ConvDecl) {
Convs.erase(I);
assert(std::find(Convs.begin(), Convs.end(), ConvDecl) == Convs.end()
&& "conversion was found multiple times in unresolved set");
return;
}
}
llvm_unreachable("conversion not found in set!");
}
CXXRecordDecl *CXXRecordDecl::getInstantiatedFromMemberClass() const {
if (MemberSpecializationInfo *MSInfo = getMemberSpecializationInfo())
return cast<CXXRecordDecl>(MSInfo->getInstantiatedFrom());
return nullptr;
}
MemberSpecializationInfo *CXXRecordDecl::getMemberSpecializationInfo() const {
return TemplateOrInstantiation.dyn_cast<MemberSpecializationInfo *>();
}
void
CXXRecordDecl::setInstantiationOfMemberClass(CXXRecordDecl *RD,
TemplateSpecializationKind TSK) {
assert(TemplateOrInstantiation.isNull() &&
"Previous template or instantiation?");
assert(!isa<ClassTemplatePartialSpecializationDecl>(this));
TemplateOrInstantiation
= new (getASTContext()) MemberSpecializationInfo(RD, TSK);
}
ClassTemplateDecl *CXXRecordDecl::getDescribedClassTemplate() const {
return TemplateOrInstantiation.dyn_cast<ClassTemplateDecl *>();
}
void CXXRecordDecl::setDescribedClassTemplate(ClassTemplateDecl *Template) {
TemplateOrInstantiation = Template;
}
TemplateSpecializationKind CXXRecordDecl::getTemplateSpecializationKind() const{
if (const ClassTemplateSpecializationDecl *Spec
= dyn_cast<ClassTemplateSpecializationDecl>(this))
return Spec->getSpecializationKind();
if (MemberSpecializationInfo *MSInfo = getMemberSpecializationInfo())
return MSInfo->getTemplateSpecializationKind();
return TSK_Undeclared;
}
void
CXXRecordDecl::setTemplateSpecializationKind(TemplateSpecializationKind TSK) {
if (ClassTemplateSpecializationDecl *Spec
= dyn_cast<ClassTemplateSpecializationDecl>(this)) {
Spec->setSpecializationKind(TSK);
return;
}
if (MemberSpecializationInfo *MSInfo = getMemberSpecializationInfo()) {
MSInfo->setTemplateSpecializationKind(TSK);
return;
}
llvm_unreachable("Not a class template or member class specialization");
}
const CXXRecordDecl *CXXRecordDecl::getTemplateInstantiationPattern() const {
auto GetDefinitionOrSelf =
[](const CXXRecordDecl *D) -> const CXXRecordDecl * {
if (auto *Def = D->getDefinition())
return Def;
return D;
};
// If it's a class template specialization, find the template or partial
// specialization from which it was instantiated.
if (auto *TD = dyn_cast<ClassTemplateSpecializationDecl>(this)) {
auto From = TD->getInstantiatedFrom();
if (auto *CTD = From.dyn_cast<ClassTemplateDecl *>()) {
while (auto *NewCTD = CTD->getInstantiatedFromMemberTemplate()) {
if (NewCTD->isMemberSpecialization())
break;
CTD = NewCTD;
}
return GetDefinitionOrSelf(CTD->getTemplatedDecl());
}
if (auto *CTPSD =
From.dyn_cast<ClassTemplatePartialSpecializationDecl *>()) {
while (auto *NewCTPSD = CTPSD->getInstantiatedFromMember()) {
if (NewCTPSD->isMemberSpecialization())
break;
CTPSD = NewCTPSD;
}
return GetDefinitionOrSelf(CTPSD);
}
}
if (MemberSpecializationInfo *MSInfo = getMemberSpecializationInfo()) {
if (isTemplateInstantiation(MSInfo->getTemplateSpecializationKind())) {
const CXXRecordDecl *RD = this;
while (auto *NewRD = RD->getInstantiatedFromMemberClass())
RD = NewRD;
return GetDefinitionOrSelf(RD);
}
}
assert(!isTemplateInstantiation(this->getTemplateSpecializationKind()) &&
"couldn't find pattern for class template instantiation");
return nullptr;
}
CXXDestructorDecl *CXXRecordDecl::getDestructor() const {
ASTContext &Context = getASTContext();
QualType ClassType = Context.getTypeDeclType(this);
DeclarationName Name
= Context.DeclarationNames.getCXXDestructorName(
Context.getCanonicalType(ClassType));
DeclContext::lookup_result R = lookup(Name);
return R.empty() ? nullptr : dyn_cast<CXXDestructorDecl>(R.front());
}
bool CXXRecordDecl::isAnyDestructorNoReturn() const {
// Destructor is noreturn.
if (const CXXDestructorDecl *Destructor = getDestructor())
if (Destructor->isNoReturn())
return true;
// Check base classes destructor for noreturn.
for (const auto &Base : bases())
if (const CXXRecordDecl *RD = Base.getType()->getAsCXXRecordDecl())
if (RD->isAnyDestructorNoReturn())
return true;
// Check fields for noreturn.
for (const auto *Field : fields())
if (const CXXRecordDecl *RD =
Field->getType()->getBaseElementTypeUnsafe()->getAsCXXRecordDecl())
if (RD->isAnyDestructorNoReturn())
return true;
// All destructors are not noreturn.
return false;
}
void CXXRecordDecl::completeDefinition() {
completeDefinition(nullptr);
}
void CXXRecordDecl::completeDefinition(CXXFinalOverriderMap *FinalOverriders) {
RecordDecl::completeDefinition();
-
+
// If the class may be abstract (but hasn't been marked as such), check for
// any pure final overriders.
if (mayBeAbstract()) {
CXXFinalOverriderMap MyFinalOverriders;
if (!FinalOverriders) {
getFinalOverriders(MyFinalOverriders);
FinalOverriders = &MyFinalOverriders;
}
bool Done = false;
for (CXXFinalOverriderMap::iterator M = FinalOverriders->begin(),
MEnd = FinalOverriders->end();
M != MEnd && !Done; ++M) {
for (OverridingMethods::iterator SO = M->second.begin(),
SOEnd = M->second.end();
SO != SOEnd && !Done; ++SO) {
assert(SO->second.size() > 0 &&
"All virtual functions have overridding virtual functions");
// C++ [class.abstract]p4:
// A class is abstract if it contains or inherits at least one
// pure virtual function for which the final overrider is pure
// virtual.
if (SO->second.front().Method->isPure()) {
data().Abstract = true;
Done = true;
break;
}
}
}
}
// Set access bits correctly on the directly-declared conversions.
for (conversion_iterator I = conversion_begin(), E = conversion_end();
I != E; ++I)
I.setAccess((*I)->getAccess());
}
bool CXXRecordDecl::mayBeAbstract() const {
if (data().Abstract || isInvalidDecl() || !data().Polymorphic ||
isDependentContext())
return false;
for (const auto &B : bases()) {
CXXRecordDecl *BaseDecl
= cast<CXXRecordDecl>(B.getType()->getAs<RecordType>()->getDecl());
if (BaseDecl->isAbstract())
return true;
}
return false;
}
void CXXDeductionGuideDecl::anchor() { }
CXXDeductionGuideDecl *CXXDeductionGuideDecl::Create(
ASTContext &C, DeclContext *DC, SourceLocation StartLoc, bool IsExplicit,
const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
SourceLocation EndLocation) {
return new (C, DC) CXXDeductionGuideDecl(C, DC, StartLoc, IsExplicit,
NameInfo, T, TInfo, EndLocation);
}
CXXDeductionGuideDecl *CXXDeductionGuideDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) CXXDeductionGuideDecl(C, nullptr, SourceLocation(), false,
DeclarationNameInfo(), QualType(),
nullptr, SourceLocation());
}
void CXXMethodDecl::anchor() { }
bool CXXMethodDecl::isStatic() const {
const CXXMethodDecl *MD = getCanonicalDecl();
if (MD->getStorageClass() == SC_Static)
return true;
OverloadedOperatorKind OOK = getDeclName().getCXXOverloadedOperator();
return isStaticOverloadedOperator(OOK);
}
static bool recursivelyOverrides(const CXXMethodDecl *DerivedMD,
const CXXMethodDecl *BaseMD) {
for (CXXMethodDecl::method_iterator I = DerivedMD->begin_overridden_methods(),
E = DerivedMD->end_overridden_methods(); I != E; ++I) {
const CXXMethodDecl *MD = *I;
if (MD->getCanonicalDecl() == BaseMD->getCanonicalDecl())
return true;
if (recursivelyOverrides(MD, BaseMD))
return true;
}
return false;
}
CXXMethodDecl *
CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD,
bool MayBeBase) {
if (this->getParent()->getCanonicalDecl() == RD->getCanonicalDecl())
return this;
// Lookup doesn't work for destructors, so handle them separately.
if (isa<CXXDestructorDecl>(this)) {
CXXMethodDecl *MD = RD->getDestructor();
if (MD) {
if (recursivelyOverrides(MD, this))
return MD;
if (MayBeBase && recursivelyOverrides(this, MD))
return MD;
}
return nullptr;
}
for (auto *ND : RD->lookup(getDeclName())) {
CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ND);
if (!MD)
continue;
if (recursivelyOverrides(MD, this))
return MD;
if (MayBeBase && recursivelyOverrides(this, MD))
return MD;
}
for (const auto &I : RD->bases()) {
const RecordType *RT = I.getType()->getAs<RecordType>();
if (!RT)
continue;
const CXXRecordDecl *Base = cast<CXXRecordDecl>(RT->getDecl());
CXXMethodDecl *T = this->getCorrespondingMethodInClass(Base);
if (T)
return T;
}
return nullptr;
}
CXXMethodDecl *
CXXMethodDecl::Create(ASTContext &C, CXXRecordDecl *RD,
SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
StorageClass SC, bool isInline,
bool isConstexpr, SourceLocation EndLocation) {
return new (C, RD) CXXMethodDecl(CXXMethod, C, RD, StartLoc, NameInfo,
T, TInfo, SC, isInline, isConstexpr,
EndLocation);
}
CXXMethodDecl *CXXMethodDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) CXXMethodDecl(CXXMethod, C, nullptr, SourceLocation(),
DeclarationNameInfo(), QualType(), nullptr,
SC_None, false, false, SourceLocation());
}
CXXMethodDecl *CXXMethodDecl::getDevirtualizedMethod(const Expr *Base,
bool IsAppleKext) {
assert(isVirtual() && "this method is expected to be virtual");
// When building with -fapple-kext, all calls must go through the vtable since
// the kernel linker can do runtime patching of vtables.
if (IsAppleKext)
return nullptr;
// If the member function is marked 'final', we know that it can't be
// overridden and can therefore devirtualize it unless it's pure virtual.
if (hasAttr<FinalAttr>())
return isPure() ? nullptr : this;
// If Base is unknown, we cannot devirtualize.
if (!Base)
return nullptr;
// If the base expression (after skipping derived-to-base conversions) is a
// class prvalue, then we can devirtualize.
Base = Base->getBestDynamicClassTypeExpr();
if (Base->isRValue() && Base->getType()->isRecordType())
return this;
// If we don't even know what we would call, we can't devirtualize.
const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType();
if (!BestDynamicDecl)
return nullptr;
// There may be a method corresponding to MD in a derived class.
CXXMethodDecl *DevirtualizedMethod =
getCorrespondingMethodInClass(BestDynamicDecl);
// If that method is pure virtual, we can't devirtualize. If this code is
// reached, the result would be UB, not a direct call to the derived class
// function, and we can't assume the derived class function is defined.
if (DevirtualizedMethod->isPure())
return nullptr;
// If that method is marked final, we can devirtualize it.
if (DevirtualizedMethod->hasAttr<FinalAttr>())
return DevirtualizedMethod;
// Similarly, if the class itself is marked 'final' it can't be overridden
// and we can therefore devirtualize the member function call.
if (BestDynamicDecl->hasAttr<FinalAttr>())
return DevirtualizedMethod;
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Base)) {
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
if (VD->getType()->isRecordType())
// This is a record decl. We know the type and can devirtualize it.
return DevirtualizedMethod;
return nullptr;
}
// We can devirtualize calls on an object accessed by a class member access
// expression, since by C++11 [basic.life]p6 we know that it can't refer to
// a derived class object constructed in the same location.
if (const MemberExpr *ME = dyn_cast<MemberExpr>(Base))
if (const ValueDecl *VD = dyn_cast<ValueDecl>(ME->getMemberDecl()))
return VD->getType()->isRecordType() ? DevirtualizedMethod : nullptr;
// Likewise for calls on an object accessed by a (non-reference) pointer to
// member access.
if (auto *BO = dyn_cast<BinaryOperator>(Base)) {
if (BO->isPtrMemOp()) {
auto *MPT = BO->getRHS()->getType()->castAs<MemberPointerType>();
if (MPT->getPointeeType()->isRecordType())
return DevirtualizedMethod;
}
}
// We can't devirtualize the call.
return nullptr;
}
bool CXXMethodDecl::isUsualDeallocationFunction() const {
if (getOverloadedOperator() != OO_Delete &&
getOverloadedOperator() != OO_Array_Delete)
return false;
// C++ [basic.stc.dynamic.deallocation]p2:
// A template instance is never a usual deallocation function,
// regardless of its signature.
if (getPrimaryTemplate())
return false;
// C++ [basic.stc.dynamic.deallocation]p2:
// If a class T has a member deallocation function named operator delete
// with exactly one parameter, then that function is a usual (non-placement)
// deallocation function. [...]
if (getNumParams() == 1)
return true;
unsigned UsualParams = 1;
// C++ <=14 [basic.stc.dynamic.deallocation]p2:
// [...] If class T does not declare such an operator delete but does
// declare a member deallocation function named operator delete with
// exactly two parameters, the second of which has type std::size_t (18.1),
// then this function is a usual deallocation function.
//
// C++17 says a usual deallocation function is one with the signature
// (void* [, size_t] [, std::align_val_t] [, ...])
// and all such functions are usual deallocation functions. It's not clear
// that allowing varargs functions was intentional.
ASTContext &Context = getASTContext();
if (UsualParams < getNumParams() &&
Context.hasSameUnqualifiedType(getParamDecl(UsualParams)->getType(),
Context.getSizeType()))
++UsualParams;
if (UsualParams < getNumParams() &&
getParamDecl(UsualParams)->getType()->isAlignValT())
++UsualParams;
if (UsualParams != getNumParams())
return false;
// In C++17 onwards, all potential usual deallocation functions are actual
// usual deallocation functions.
if (Context.getLangOpts().AlignedAllocation)
return true;
// This function is a usual deallocation function if there are no
// single-parameter deallocation functions of the same kind.
DeclContext::lookup_result R = getDeclContext()->lookup(getDeclName());
for (DeclContext::lookup_result::iterator I = R.begin(), E = R.end();
I != E; ++I) {
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(*I))
if (FD->getNumParams() == 1)
return false;
}
return true;
}
bool CXXMethodDecl::isCopyAssignmentOperator() const {
// C++0x [class.copy]p17:
// A user-declared copy assignment operator X::operator= is a non-static
// non-template member function of class X with exactly one parameter of
// type X, X&, const X&, volatile X& or const volatile X&.
if (/*operator=*/getOverloadedOperator() != OO_Equal ||
/*non-static*/ isStatic() ||
/*non-template*/getPrimaryTemplate() || getDescribedFunctionTemplate() ||
getNumParams() != 1)
return false;
QualType ParamType = getParamDecl(0)->getType();
if (const LValueReferenceType *Ref = ParamType->getAs<LValueReferenceType>())
ParamType = Ref->getPointeeType();
ASTContext &Context = getASTContext();
QualType ClassType
= Context.getCanonicalType(Context.getTypeDeclType(getParent()));
return Context.hasSameUnqualifiedType(ClassType, ParamType);
}
bool CXXMethodDecl::isMoveAssignmentOperator() const {
// C++0x [class.copy]p19:
// A user-declared move assignment operator X::operator= is a non-static
// non-template member function of class X with exactly one parameter of type
// X&&, const X&&, volatile X&&, or const volatile X&&.
if (getOverloadedOperator() != OO_Equal || isStatic() ||
getPrimaryTemplate() || getDescribedFunctionTemplate() ||
getNumParams() != 1)
return false;
QualType ParamType = getParamDecl(0)->getType();
if (!isa<RValueReferenceType>(ParamType))
return false;
ParamType = ParamType->getPointeeType();
ASTContext &Context = getASTContext();
QualType ClassType
= Context.getCanonicalType(Context.getTypeDeclType(getParent()));
return Context.hasSameUnqualifiedType(ClassType, ParamType);
}
void CXXMethodDecl::addOverriddenMethod(const CXXMethodDecl *MD) {
assert(MD->isCanonicalDecl() && "Method is not canonical!");
assert(!MD->getParent()->isDependentContext() &&
"Can't add an overridden method to a class template!");
assert(MD->isVirtual() && "Method is not virtual!");
getASTContext().addOverriddenMethod(this, MD);
}
CXXMethodDecl::method_iterator CXXMethodDecl::begin_overridden_methods() const {
if (isa<CXXConstructorDecl>(this)) return nullptr;
return getASTContext().overridden_methods_begin(this);
}
CXXMethodDecl::method_iterator CXXMethodDecl::end_overridden_methods() const {
if (isa<CXXConstructorDecl>(this)) return nullptr;
return getASTContext().overridden_methods_end(this);
}
unsigned CXXMethodDecl::size_overridden_methods() const {
if (isa<CXXConstructorDecl>(this)) return 0;
return getASTContext().overridden_methods_size(this);
}
CXXMethodDecl::overridden_method_range
CXXMethodDecl::overridden_methods() const {
if (isa<CXXConstructorDecl>(this))
return overridden_method_range(nullptr, nullptr);
return getASTContext().overridden_methods(this);
}
QualType CXXMethodDecl::getThisType(ASTContext &C) const {
// C++ 9.3.2p1: The type of this in a member function of a class X is X*.
// If the member function is declared const, the type of this is const X*,
// if the member function is declared volatile, the type of this is
// volatile X*, and if the member function is declared const volatile,
// the type of this is const volatile X*.
assert(isInstance() && "No 'this' for static methods!");
QualType ClassTy = C.getTypeDeclType(getParent());
ClassTy = C.getQualifiedType(ClassTy,
Qualifiers::fromCVRUMask(getTypeQualifiers()));
return C.getPointerType(ClassTy);
}
bool CXXMethodDecl::hasInlineBody() const {
// If this function is a template instantiation, look at the template from
// which it was instantiated.
const FunctionDecl *CheckFn = getTemplateInstantiationPattern();
if (!CheckFn)
CheckFn = this;
const FunctionDecl *fn;
return CheckFn->isDefined(fn) && !fn->isOutOfLine() &&
(fn->doesThisDeclarationHaveABody() || fn->willHaveBody());
}
bool CXXMethodDecl::isLambdaStaticInvoker() const {
const CXXRecordDecl *P = getParent();
if (P->isLambda()) {
if (const CXXMethodDecl *StaticInvoker = P->getLambdaStaticInvoker()) {
if (StaticInvoker == this) return true;
if (P->isGenericLambda() && this->isFunctionTemplateSpecialization())
return StaticInvoker == this->getPrimaryTemplate()->getTemplatedDecl();
}
}
return false;
}
CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
TypeSourceInfo *TInfo, bool IsVirtual,
SourceLocation L, Expr *Init,
SourceLocation R,
SourceLocation EllipsisLoc)
: Initializee(TInfo), MemberOrEllipsisLocation(EllipsisLoc), Init(Init),
LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(IsVirtual),
IsWritten(false), SourceOrder(0)
{
}
CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
FieldDecl *Member,
SourceLocation MemberLoc,
SourceLocation L, Expr *Init,
SourceLocation R)
: Initializee(Member), MemberOrEllipsisLocation(MemberLoc), Init(Init),
LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(false),
IsWritten(false), SourceOrder(0)
{
}
CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
IndirectFieldDecl *Member,
SourceLocation MemberLoc,
SourceLocation L, Expr *Init,
SourceLocation R)
: Initializee(Member), MemberOrEllipsisLocation(MemberLoc), Init(Init),
LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(false),
IsWritten(false), SourceOrder(0)
{
}
CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
TypeSourceInfo *TInfo,
SourceLocation L, Expr *Init,
SourceLocation R)
: Initializee(TInfo), MemberOrEllipsisLocation(), Init(Init),
LParenLoc(L), RParenLoc(R), IsDelegating(true), IsVirtual(false),
IsWritten(false), SourceOrder(0)
{
}
TypeLoc CXXCtorInitializer::getBaseClassLoc() const {
if (isBaseInitializer())
return Initializee.get<TypeSourceInfo*>()->getTypeLoc();
else
return TypeLoc();
}
const Type *CXXCtorInitializer::getBaseClass() const {
if (isBaseInitializer())
return Initializee.get<TypeSourceInfo*>()->getType().getTypePtr();
else
return nullptr;
}
SourceLocation CXXCtorInitializer::getSourceLocation() const {
if (isInClassMemberInitializer())
return getAnyMember()->getLocation();
if (isAnyMemberInitializer())
return getMemberLocation();
if (TypeSourceInfo *TSInfo = Initializee.get<TypeSourceInfo*>())
return TSInfo->getTypeLoc().getLocalSourceRange().getBegin();
return SourceLocation();
}
SourceRange CXXCtorInitializer::getSourceRange() const {
if (isInClassMemberInitializer()) {
FieldDecl *D = getAnyMember();
if (Expr *I = D->getInClassInitializer())
return I->getSourceRange();
return SourceRange();
}
return SourceRange(getSourceLocation(), getRParenLoc());
}
void CXXConstructorDecl::anchor() { }
CXXConstructorDecl *CXXConstructorDecl::CreateDeserialized(ASTContext &C,
unsigned ID,
bool Inherited) {
unsigned Extra = additionalSizeToAlloc<InheritedConstructor>(Inherited);
auto *Result = new (C, ID, Extra) CXXConstructorDecl(
C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(), nullptr,
false, false, false, false, InheritedConstructor());
Result->IsInheritingConstructor = Inherited;
return Result;
}
CXXConstructorDecl *
CXXConstructorDecl::Create(ASTContext &C, CXXRecordDecl *RD,
SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
bool isExplicit, bool isInline,
bool isImplicitlyDeclared, bool isConstexpr,
InheritedConstructor Inherited) {
assert(NameInfo.getName().getNameKind()
== DeclarationName::CXXConstructorName &&
"Name must refer to a constructor");
unsigned Extra =
additionalSizeToAlloc<InheritedConstructor>(Inherited ? 1 : 0);
return new (C, RD, Extra) CXXConstructorDecl(
C, RD, StartLoc, NameInfo, T, TInfo, isExplicit, isInline,
isImplicitlyDeclared, isConstexpr, Inherited);
}
CXXConstructorDecl::init_const_iterator CXXConstructorDecl::init_begin() const {
return CtorInitializers.get(getASTContext().getExternalSource());
}
CXXConstructorDecl *CXXConstructorDecl::getTargetConstructor() const {
assert(isDelegatingConstructor() && "Not a delegating constructor!");
Expr *E = (*init_begin())->getInit()->IgnoreImplicit();
if (CXXConstructExpr *Construct = dyn_cast<CXXConstructExpr>(E))
return Construct->getConstructor();
return nullptr;
}
bool CXXConstructorDecl::isDefaultConstructor() const {
// C++ [class.ctor]p5:
// A default constructor for a class X is a constructor of class
// X that can be called without an argument.
return (getNumParams() == 0) ||
(getNumParams() > 0 && getParamDecl(0)->hasDefaultArg());
}
bool
CXXConstructorDecl::isCopyConstructor(unsigned &TypeQuals) const {
return isCopyOrMoveConstructor(TypeQuals) &&
getParamDecl(0)->getType()->isLValueReferenceType();
}
bool CXXConstructorDecl::isMoveConstructor(unsigned &TypeQuals) const {
return isCopyOrMoveConstructor(TypeQuals) &&
getParamDecl(0)->getType()->isRValueReferenceType();
}
/// \brief Determine whether this is a copy or move constructor.
bool CXXConstructorDecl::isCopyOrMoveConstructor(unsigned &TypeQuals) const {
// C++ [class.copy]p2:
// A non-template constructor for class X is a copy constructor
// if its first parameter is of type X&, const X&, volatile X& or
// const volatile X&, and either there are no other parameters
// or else all other parameters have default arguments (8.3.6).
// C++0x [class.copy]p3:
// A non-template constructor for class X is a move constructor if its
// first parameter is of type X&&, const X&&, volatile X&&, or
// const volatile X&&, and either there are no other parameters or else
// all other parameters have default arguments.
if ((getNumParams() < 1) ||
(getNumParams() > 1 && !getParamDecl(1)->hasDefaultArg()) ||
(getPrimaryTemplate() != nullptr) ||
(getDescribedFunctionTemplate() != nullptr))
return false;
const ParmVarDecl *Param = getParamDecl(0);
// Do we have a reference type?
const ReferenceType *ParamRefType = Param->getType()->getAs<ReferenceType>();
if (!ParamRefType)
return false;
// Is it a reference to our class type?
ASTContext &Context = getASTContext();
CanQualType PointeeType
= Context.getCanonicalType(ParamRefType->getPointeeType());
CanQualType ClassTy
= Context.getCanonicalType(Context.getTagDeclType(getParent()));
if (PointeeType.getUnqualifiedType() != ClassTy)
return false;
// FIXME: other qualifiers?
// We have a copy or move constructor.
TypeQuals = PointeeType.getCVRQualifiers();
return true;
}
bool CXXConstructorDecl::isConvertingConstructor(bool AllowExplicit) const {
// C++ [class.conv.ctor]p1:
// A constructor declared without the function-specifier explicit
// that can be called with a single parameter specifies a
// conversion from the type of its first parameter to the type of
// its class. Such a constructor is called a converting
// constructor.
if (isExplicit() && !AllowExplicit)
return false;
return (getNumParams() == 0 &&
getType()->getAs<FunctionProtoType>()->isVariadic()) ||
(getNumParams() == 1) ||
(getNumParams() > 1 &&
(getParamDecl(1)->hasDefaultArg() ||
getParamDecl(1)->isParameterPack()));
}
bool CXXConstructorDecl::isSpecializationCopyingObject() const {
if ((getNumParams() < 1) ||
(getNumParams() > 1 && !getParamDecl(1)->hasDefaultArg()) ||
(getDescribedFunctionTemplate() != nullptr))
return false;
const ParmVarDecl *Param = getParamDecl(0);
ASTContext &Context = getASTContext();
CanQualType ParamType = Context.getCanonicalType(Param->getType());
// Is it the same as our our class type?
CanQualType ClassTy
= Context.getCanonicalType(Context.getTagDeclType(getParent()));
if (ParamType.getUnqualifiedType() != ClassTy)
return false;
return true;
}
void CXXDestructorDecl::anchor() { }
CXXDestructorDecl *
CXXDestructorDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID)
CXXDestructorDecl(C, nullptr, SourceLocation(), DeclarationNameInfo(),
QualType(), nullptr, false, false);
}
CXXDestructorDecl *
CXXDestructorDecl::Create(ASTContext &C, CXXRecordDecl *RD,
SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
bool isInline, bool isImplicitlyDeclared) {
assert(NameInfo.getName().getNameKind()
== DeclarationName::CXXDestructorName &&
"Name must refer to a destructor");
return new (C, RD) CXXDestructorDecl(C, RD, StartLoc, NameInfo, T, TInfo,
isInline, isImplicitlyDeclared);
}
void CXXDestructorDecl::setOperatorDelete(FunctionDecl *OD) {
auto *First = cast<CXXDestructorDecl>(getFirstDecl());
if (OD && !First->OperatorDelete) {
First->OperatorDelete = OD;
if (auto *L = getASTMutationListener())
L->ResolvedOperatorDelete(First, OD);
}
}
void CXXConversionDecl::anchor() { }
CXXConversionDecl *
CXXConversionDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) CXXConversionDecl(C, nullptr, SourceLocation(),
DeclarationNameInfo(), QualType(),
nullptr, false, false, false,
SourceLocation());
}
CXXConversionDecl *
CXXConversionDecl::Create(ASTContext &C, CXXRecordDecl *RD,
SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo,
bool isInline, bool isExplicit,
bool isConstexpr, SourceLocation EndLocation) {
assert(NameInfo.getName().getNameKind()
== DeclarationName::CXXConversionFunctionName &&
"Name must refer to a conversion function");
return new (C, RD) CXXConversionDecl(C, RD, StartLoc, NameInfo, T, TInfo,
isInline, isExplicit, isConstexpr,
EndLocation);
}
bool CXXConversionDecl::isLambdaToBlockPointerConversion() const {
return isImplicit() && getParent()->isLambda() &&
getConversionType()->isBlockPointerType();
}
void LinkageSpecDecl::anchor() { }
LinkageSpecDecl *LinkageSpecDecl::Create(ASTContext &C,
DeclContext *DC,
SourceLocation ExternLoc,
SourceLocation LangLoc,
LanguageIDs Lang,
bool HasBraces) {
return new (C, DC) LinkageSpecDecl(DC, ExternLoc, LangLoc, Lang, HasBraces);
}
LinkageSpecDecl *LinkageSpecDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) LinkageSpecDecl(nullptr, SourceLocation(),
SourceLocation(), lang_c, false);
}
void UsingDirectiveDecl::anchor() { }
UsingDirectiveDecl *UsingDirectiveDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation L,
SourceLocation NamespaceLoc,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation IdentLoc,
NamedDecl *Used,
DeclContext *CommonAncestor) {
if (NamespaceDecl *NS = dyn_cast_or_null<NamespaceDecl>(Used))
Used = NS->getOriginalNamespace();
return new (C, DC) UsingDirectiveDecl(DC, L, NamespaceLoc, QualifierLoc,
IdentLoc, Used, CommonAncestor);
}
UsingDirectiveDecl *UsingDirectiveDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) UsingDirectiveDecl(nullptr, SourceLocation(),
SourceLocation(),
NestedNameSpecifierLoc(),
SourceLocation(), nullptr, nullptr);
}
NamespaceDecl *UsingDirectiveDecl::getNominatedNamespace() {
if (NamespaceAliasDecl *NA =
dyn_cast_or_null<NamespaceAliasDecl>(NominatedNamespace))
return NA->getNamespace();
return cast_or_null<NamespaceDecl>(NominatedNamespace);
}
NamespaceDecl::NamespaceDecl(ASTContext &C, DeclContext *DC, bool Inline,
SourceLocation StartLoc, SourceLocation IdLoc,
IdentifierInfo *Id, NamespaceDecl *PrevDecl)
: NamedDecl(Namespace, DC, IdLoc, Id), DeclContext(Namespace),
redeclarable_base(C), LocStart(StartLoc), RBraceLoc(),
AnonOrFirstNamespaceAndInline(nullptr, Inline) {
setPreviousDecl(PrevDecl);
if (PrevDecl)
AnonOrFirstNamespaceAndInline.setPointer(PrevDecl->getOriginalNamespace());
}
NamespaceDecl *NamespaceDecl::Create(ASTContext &C, DeclContext *DC,
bool Inline, SourceLocation StartLoc,
SourceLocation IdLoc, IdentifierInfo *Id,
NamespaceDecl *PrevDecl) {
return new (C, DC) NamespaceDecl(C, DC, Inline, StartLoc, IdLoc, Id,
PrevDecl);
}
NamespaceDecl *NamespaceDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) NamespaceDecl(C, nullptr, false, SourceLocation(),
SourceLocation(), nullptr, nullptr);
}
NamespaceDecl *NamespaceDecl::getOriginalNamespace() {
if (isFirstDecl())
return this;
return AnonOrFirstNamespaceAndInline.getPointer();
}
const NamespaceDecl *NamespaceDecl::getOriginalNamespace() const {
if (isFirstDecl())
return this;
return AnonOrFirstNamespaceAndInline.getPointer();
}
bool NamespaceDecl::isOriginalNamespace() const { return isFirstDecl(); }
NamespaceDecl *NamespaceDecl::getNextRedeclarationImpl() {
return getNextRedeclaration();
}
NamespaceDecl *NamespaceDecl::getPreviousDeclImpl() {
return getPreviousDecl();
}
NamespaceDecl *NamespaceDecl::getMostRecentDeclImpl() {
return getMostRecentDecl();
}
void NamespaceAliasDecl::anchor() { }
NamespaceAliasDecl *NamespaceAliasDecl::getNextRedeclarationImpl() {
return getNextRedeclaration();
}
NamespaceAliasDecl *NamespaceAliasDecl::getPreviousDeclImpl() {
return getPreviousDecl();
}
NamespaceAliasDecl *NamespaceAliasDecl::getMostRecentDeclImpl() {
return getMostRecentDecl();
}
NamespaceAliasDecl *NamespaceAliasDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation UsingLoc,
SourceLocation AliasLoc,
IdentifierInfo *Alias,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation IdentLoc,
NamedDecl *Namespace) {
// FIXME: Preserve the aliased namespace as written.
if (NamespaceDecl *NS = dyn_cast_or_null<NamespaceDecl>(Namespace))
Namespace = NS->getOriginalNamespace();
return new (C, DC) NamespaceAliasDecl(C, DC, UsingLoc, AliasLoc, Alias,
QualifierLoc, IdentLoc, Namespace);
}
NamespaceAliasDecl *
NamespaceAliasDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) NamespaceAliasDecl(C, nullptr, SourceLocation(),
SourceLocation(), nullptr,
NestedNameSpecifierLoc(),
SourceLocation(), nullptr);
}
void UsingShadowDecl::anchor() { }
UsingShadowDecl::UsingShadowDecl(Kind K, ASTContext &C, DeclContext *DC,
SourceLocation Loc, UsingDecl *Using,
NamedDecl *Target)
: NamedDecl(K, DC, Loc, Using ? Using->getDeclName() : DeclarationName()),
redeclarable_base(C), Underlying(Target),
UsingOrNextShadow(cast<NamedDecl>(Using)) {
if (Target)
IdentifierNamespace = Target->getIdentifierNamespace();
setImplicit();
}
UsingShadowDecl::UsingShadowDecl(Kind K, ASTContext &C, EmptyShell Empty)
: NamedDecl(K, nullptr, SourceLocation(), DeclarationName()),
redeclarable_base(C), Underlying(), UsingOrNextShadow() {}
UsingShadowDecl *
UsingShadowDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) UsingShadowDecl(UsingShadow, C, EmptyShell());
}
UsingDecl *UsingShadowDecl::getUsingDecl() const {
const UsingShadowDecl *Shadow = this;
while (const UsingShadowDecl *NextShadow =
dyn_cast<UsingShadowDecl>(Shadow->UsingOrNextShadow))
Shadow = NextShadow;
return cast<UsingDecl>(Shadow->UsingOrNextShadow);
}
void ConstructorUsingShadowDecl::anchor() { }
ConstructorUsingShadowDecl *
ConstructorUsingShadowDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation Loc, UsingDecl *Using,
NamedDecl *Target, bool IsVirtual) {
return new (C, DC) ConstructorUsingShadowDecl(C, DC, Loc, Using, Target,
IsVirtual);
}
ConstructorUsingShadowDecl *
ConstructorUsingShadowDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) ConstructorUsingShadowDecl(C, EmptyShell());
}
CXXRecordDecl *ConstructorUsingShadowDecl::getNominatedBaseClass() const {
return getUsingDecl()->getQualifier()->getAsRecordDecl();
}
void UsingDecl::anchor() { }
void UsingDecl::addShadowDecl(UsingShadowDecl *S) {
assert(std::find(shadow_begin(), shadow_end(), S) == shadow_end() &&
"declaration already in set");
assert(S->getUsingDecl() == this);
if (FirstUsingShadow.getPointer())
S->UsingOrNextShadow = FirstUsingShadow.getPointer();
FirstUsingShadow.setPointer(S);
}
void UsingDecl::removeShadowDecl(UsingShadowDecl *S) {
assert(std::find(shadow_begin(), shadow_end(), S) != shadow_end() &&
"declaration not in set");
assert(S->getUsingDecl() == this);
// Remove S from the shadow decl chain. This is O(n) but hopefully rare.
if (FirstUsingShadow.getPointer() == S) {
FirstUsingShadow.setPointer(
dyn_cast<UsingShadowDecl>(S->UsingOrNextShadow));
S->UsingOrNextShadow = this;
return;
}
UsingShadowDecl *Prev = FirstUsingShadow.getPointer();
while (Prev->UsingOrNextShadow != S)
Prev = cast<UsingShadowDecl>(Prev->UsingOrNextShadow);
Prev->UsingOrNextShadow = S->UsingOrNextShadow;
S->UsingOrNextShadow = this;
}
UsingDecl *UsingDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation UL,
NestedNameSpecifierLoc QualifierLoc,
const DeclarationNameInfo &NameInfo,
bool HasTypename) {
return new (C, DC) UsingDecl(DC, UL, QualifierLoc, NameInfo, HasTypename);
}
UsingDecl *UsingDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) UsingDecl(nullptr, SourceLocation(),
NestedNameSpecifierLoc(), DeclarationNameInfo(),
false);
}
SourceRange UsingDecl::getSourceRange() const {
SourceLocation Begin = isAccessDeclaration()
? getQualifierLoc().getBeginLoc() : UsingLocation;
return SourceRange(Begin, getNameInfo().getEndLoc());
}
void UsingPackDecl::anchor() { }
UsingPackDecl *UsingPackDecl::Create(ASTContext &C, DeclContext *DC,
NamedDecl *InstantiatedFrom,
ArrayRef<NamedDecl *> UsingDecls) {
size_t Extra = additionalSizeToAlloc<NamedDecl *>(UsingDecls.size());
return new (C, DC, Extra) UsingPackDecl(DC, InstantiatedFrom, UsingDecls);
}
UsingPackDecl *UsingPackDecl::CreateDeserialized(ASTContext &C, unsigned ID,
unsigned NumExpansions) {
size_t Extra = additionalSizeToAlloc<NamedDecl *>(NumExpansions);
auto *Result = new (C, ID, Extra) UsingPackDecl(nullptr, nullptr, None);
Result->NumExpansions = NumExpansions;
auto *Trail = Result->getTrailingObjects<NamedDecl *>();
for (unsigned I = 0; I != NumExpansions; ++I)
new (Trail + I) NamedDecl*(nullptr);
return Result;
}
void UnresolvedUsingValueDecl::anchor() { }
UnresolvedUsingValueDecl *
UnresolvedUsingValueDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation UsingLoc,
NestedNameSpecifierLoc QualifierLoc,
const DeclarationNameInfo &NameInfo,
SourceLocation EllipsisLoc) {
return new (C, DC) UnresolvedUsingValueDecl(DC, C.DependentTy, UsingLoc,
QualifierLoc, NameInfo,
EllipsisLoc);
}
UnresolvedUsingValueDecl *
UnresolvedUsingValueDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) UnresolvedUsingValueDecl(nullptr, QualType(),
SourceLocation(),
NestedNameSpecifierLoc(),
DeclarationNameInfo(),
SourceLocation());
}
SourceRange UnresolvedUsingValueDecl::getSourceRange() const {
SourceLocation Begin = isAccessDeclaration()
? getQualifierLoc().getBeginLoc() : UsingLocation;
return SourceRange(Begin, getNameInfo().getEndLoc());
}
void UnresolvedUsingTypenameDecl::anchor() { }
UnresolvedUsingTypenameDecl *
UnresolvedUsingTypenameDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation UsingLoc,
SourceLocation TypenameLoc,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation TargetNameLoc,
DeclarationName TargetName,
SourceLocation EllipsisLoc) {
return new (C, DC) UnresolvedUsingTypenameDecl(
DC, UsingLoc, TypenameLoc, QualifierLoc, TargetNameLoc,
TargetName.getAsIdentifierInfo(), EllipsisLoc);
}
UnresolvedUsingTypenameDecl *
UnresolvedUsingTypenameDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) UnresolvedUsingTypenameDecl(
nullptr, SourceLocation(), SourceLocation(), NestedNameSpecifierLoc(),
SourceLocation(), nullptr, SourceLocation());
}
void StaticAssertDecl::anchor() { }
StaticAssertDecl *StaticAssertDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation StaticAssertLoc,
Expr *AssertExpr,
StringLiteral *Message,
SourceLocation RParenLoc,
bool Failed) {
return new (C, DC) StaticAssertDecl(DC, StaticAssertLoc, AssertExpr, Message,
RParenLoc, Failed);
}
StaticAssertDecl *StaticAssertDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) StaticAssertDecl(nullptr, SourceLocation(), nullptr,
nullptr, SourceLocation(), false);
}
void BindingDecl::anchor() {}
BindingDecl *BindingDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation IdLoc, IdentifierInfo *Id) {
return new (C, DC) BindingDecl(DC, IdLoc, Id);
}
BindingDecl *BindingDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) BindingDecl(nullptr, SourceLocation(), nullptr);
}
VarDecl *BindingDecl::getHoldingVar() const {
Expr *B = getBinding();
if (!B)
return nullptr;
auto *DRE = dyn_cast<DeclRefExpr>(B->IgnoreImplicit());
if (!DRE)
return nullptr;
auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
assert(VD->isImplicit() && "holding var for binding decl not implicit");
return VD;
}
void DecompositionDecl::anchor() {}
DecompositionDecl *DecompositionDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation StartLoc,
SourceLocation LSquareLoc,
QualType T, TypeSourceInfo *TInfo,
StorageClass SC,
ArrayRef<BindingDecl *> Bindings) {
size_t Extra = additionalSizeToAlloc<BindingDecl *>(Bindings.size());
return new (C, DC, Extra)
DecompositionDecl(C, DC, StartLoc, LSquareLoc, T, TInfo, SC, Bindings);
}
DecompositionDecl *DecompositionDecl::CreateDeserialized(ASTContext &C,
unsigned ID,
unsigned NumBindings) {
size_t Extra = additionalSizeToAlloc<BindingDecl *>(NumBindings);
auto *Result = new (C, ID, Extra)
DecompositionDecl(C, nullptr, SourceLocation(), SourceLocation(),
QualType(), nullptr, StorageClass(), None);
// Set up and clean out the bindings array.
Result->NumBindings = NumBindings;
auto *Trail = Result->getTrailingObjects<BindingDecl *>();
for (unsigned I = 0; I != NumBindings; ++I)
new (Trail + I) BindingDecl*(nullptr);
return Result;
}
void DecompositionDecl::printName(llvm::raw_ostream &os) const {
os << '[';
bool Comma = false;
for (auto *B : bindings()) {
if (Comma)
os << ", ";
B->printName(os);
Comma = true;
}
os << ']';
}
MSPropertyDecl *MSPropertyDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation L, DeclarationName N,
QualType T, TypeSourceInfo *TInfo,
SourceLocation StartL,
IdentifierInfo *Getter,
IdentifierInfo *Setter) {
return new (C, DC) MSPropertyDecl(DC, L, N, T, TInfo, StartL, Getter, Setter);
}
MSPropertyDecl *MSPropertyDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) MSPropertyDecl(nullptr, SourceLocation(),
DeclarationName(), QualType(), nullptr,
SourceLocation(), nullptr, nullptr);
}
static const char *getAccessName(AccessSpecifier AS) {
switch (AS) {
case AS_none:
llvm_unreachable("Invalid access specifier!");
case AS_public:
return "public";
case AS_private:
return "private";
case AS_protected:
return "protected";
}
llvm_unreachable("Invalid access specifier!");
}
const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB,
AccessSpecifier AS) {
return DB << getAccessName(AS);
}
const PartialDiagnostic &clang::operator<<(const PartialDiagnostic &DB,
AccessSpecifier AS) {
return DB << getAccessName(AS);
}
Index: head/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp (revision 322855)
@@ -1,333 +1,304 @@
//===----- CGCXXABI.cpp - Interface to C++ ABIs ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This provides an abstract class for C++ code generation. Concrete subclasses
// of this implement code generation for specific C++ ABIs.
//
//===----------------------------------------------------------------------===//
#include "CGCXXABI.h"
#include "CGCleanup.h"
using namespace clang;
using namespace CodeGen;
CGCXXABI::~CGCXXABI() { }
void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) {
DiagnosticsEngine &Diags = CGF.CGM.getDiags();
unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
"cannot yet compile %0 in this ABI");
Diags.Report(CGF.getContext().getFullLoc(CGF.CurCodeDecl->getLocation()),
DiagID)
<< S;
}
bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const {
- // If RD has a non-trivial move or copy constructor, we cannot copy the
- // argument.
- if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialMoveConstructor())
- return false;
-
- // If RD has a non-trivial destructor, we cannot copy the argument.
- if (RD->hasNonTrivialDestructor())
- return false;
-
// We can only copy the argument if there exists at least one trivial,
// non-deleted copy or move constructor.
- // FIXME: This assumes that all lazily declared copy and move constructors are
- // not deleted. This assumption might not be true in some corner cases.
- bool CopyDeleted = false;
- bool MoveDeleted = false;
- for (const CXXConstructorDecl *CD : RD->ctors()) {
- if (CD->isCopyConstructor() || CD->isMoveConstructor()) {
- assert(CD->isTrivial());
- // We had at least one undeleted trivial copy or move ctor. Return
- // directly.
- if (!CD->isDeleted())
- return true;
- if (CD->isCopyConstructor())
- CopyDeleted = true;
- else
- MoveDeleted = true;
- }
- }
-
- // If all trivial copy and move constructors are deleted, we cannot copy the
- // argument.
- return !(CopyDeleted && MoveDeleted);
+ return RD->canPassInRegisters();
}
llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) {
return llvm::Constant::getNullValue(CGM.getTypes().ConvertType(T));
}
llvm::Type *
CGCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
return CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
}
CGCallee CGCXXABI::EmitLoadOfMemberFunctionPointer(
CodeGenFunction &CGF, const Expr *E, Address This,
llvm::Value *&ThisPtrForCall,
llvm::Value *MemPtr, const MemberPointerType *MPT) {
ErrorUnsupportedABI(CGF, "calls through member pointers");
ThisPtrForCall = This.getPointer();
const FunctionProtoType *FPT =
MPT->getPointeeType()->getAs<FunctionProtoType>();
const CXXRecordDecl *RD =
cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl());
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr));
llvm::Constant *FnPtr = llvm::Constant::getNullValue(FTy->getPointerTo());
return CGCallee::forDirect(FnPtr, FPT);
}
llvm::Value *
CGCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
Address Base, llvm::Value *MemPtr,
const MemberPointerType *MPT) {
ErrorUnsupportedABI(CGF, "loads of member pointers");
llvm::Type *Ty = CGF.ConvertType(MPT->getPointeeType())
->getPointerTo(Base.getAddressSpace());
return llvm::Constant::getNullValue(Ty);
}
llvm::Value *CGCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
const CastExpr *E,
llvm::Value *Src) {
ErrorUnsupportedABI(CGF, "member function pointer conversions");
return GetBogusMemberPointer(E->getType());
}
llvm::Constant *CGCXXABI::EmitMemberPointerConversion(const CastExpr *E,
llvm::Constant *Src) {
return GetBogusMemberPointer(E->getType());
}
llvm::Value *
CGCXXABI::EmitMemberPointerComparison(CodeGenFunction &CGF,
llvm::Value *L,
llvm::Value *R,
const MemberPointerType *MPT,
bool Inequality) {
ErrorUnsupportedABI(CGF, "member function pointer comparison");
return CGF.Builder.getFalse();
}
llvm::Value *
CGCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
llvm::Value *MemPtr,
const MemberPointerType *MPT) {
ErrorUnsupportedABI(CGF, "member function pointer null testing");
return CGF.Builder.getFalse();
}
llvm::Constant *
CGCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
return GetBogusMemberPointer(QualType(MPT, 0));
}
llvm::Constant *CGCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
return GetBogusMemberPointer(CGM.getContext().getMemberPointerType(
MD->getType(), MD->getParent()->getTypeForDecl()));
}
llvm::Constant *CGCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
CharUnits offset) {
return GetBogusMemberPointer(QualType(MPT, 0));
}
llvm::Constant *CGCXXABI::EmitMemberPointer(const APValue &MP, QualType MPT) {
return GetBogusMemberPointer(MPT);
}
bool CGCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
// Fake answer.
return true;
}
void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList &params) {
const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
// FIXME: I'm not entirely sure I like using a fake decl just for code
// generation. Maybe we can come up with a better way?
auto *ThisDecl = ImplicitParamDecl::Create(
CGM.getContext(), nullptr, MD->getLocation(),
&CGM.getContext().Idents.get("this"), MD->getThisType(CGM.getContext()),
ImplicitParamDecl::CXXThis);
params.push_back(ThisDecl);
CGF.CXXABIThisDecl = ThisDecl;
// Compute the presumed alignment of 'this', which basically comes
// down to whether we know it's a complete object or not.
auto &Layout = CGF.getContext().getASTRecordLayout(MD->getParent());
if (MD->getParent()->getNumVBases() == 0 || // avoid vcall in common case
MD->getParent()->hasAttr<FinalAttr>() ||
!isThisCompleteObject(CGF.CurGD)) {
CGF.CXXABIThisAlignment = Layout.getAlignment();
} else {
CGF.CXXABIThisAlignment = Layout.getNonVirtualAlignment();
}
}
void CGCXXABI::EmitThisParam(CodeGenFunction &CGF) {
/// Initialize the 'this' slot.
assert(getThisDecl(CGF) && "no 'this' variable for function");
CGF.CXXABIThisValue
= CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(getThisDecl(CGF)),
"this");
}
void CGCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF,
RValue RV, QualType ResultType) {
CGF.EmitReturnOfRValue(RV, ResultType);
}
CharUnits CGCXXABI::GetArrayCookieSize(const CXXNewExpr *expr) {
if (!requiresArrayCookie(expr))
return CharUnits::Zero();
return getArrayCookieSizeImpl(expr->getAllocatedType());
}
CharUnits CGCXXABI::getArrayCookieSizeImpl(QualType elementType) {
// BOGUS
return CharUnits::Zero();
}
Address CGCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
Address NewPtr,
llvm::Value *NumElements,
const CXXNewExpr *expr,
QualType ElementType) {
// Should never be called.
ErrorUnsupportedABI(CGF, "array cookie initialization");
return Address::invalid();
}
bool CGCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr,
QualType elementType) {
// If the class's usual deallocation function takes two arguments,
// it needs a cookie.
if (expr->doesUsualArrayDeleteWantSize())
return true;
return elementType.isDestructedType();
}
bool CGCXXABI::requiresArrayCookie(const CXXNewExpr *expr) {
// If the class's usual deallocation function takes two arguments,
// it needs a cookie.
if (expr->doesUsualArrayDeleteWantSize())
return true;
return expr->getAllocatedType().isDestructedType();
}
void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, Address ptr,
const CXXDeleteExpr *expr, QualType eltTy,
llvm::Value *&numElements,
llvm::Value *&allocPtr, CharUnits &cookieSize) {
// Derive a char* in the same address space as the pointer.
ptr = CGF.Builder.CreateElementBitCast(ptr, CGF.Int8Ty);
// If we don't need an array cookie, bail out early.
if (!requiresArrayCookie(expr, eltTy)) {
allocPtr = ptr.getPointer();
numElements = nullptr;
cookieSize = CharUnits::Zero();
return;
}
cookieSize = getArrayCookieSizeImpl(eltTy);
Address allocAddr =
CGF.Builder.CreateConstInBoundsByteGEP(ptr, -cookieSize);
allocPtr = allocAddr.getPointer();
numElements = readArrayCookieImpl(CGF, allocAddr, cookieSize);
}
llvm::Value *CGCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
Address ptr,
CharUnits cookieSize) {
ErrorUnsupportedABI(CGF, "reading a new[] cookie");
return llvm::ConstantInt::get(CGF.SizeTy, 0);
}
/// Returns the adjustment, in bytes, required for the given
/// member-pointer operation. Returns null if no adjustment is
/// required.
llvm::Constant *CGCXXABI::getMemberPointerAdjustment(const CastExpr *E) {
assert(E->getCastKind() == CK_DerivedToBaseMemberPointer ||
E->getCastKind() == CK_BaseToDerivedMemberPointer);
QualType derivedType;
if (E->getCastKind() == CK_DerivedToBaseMemberPointer)
derivedType = E->getSubExpr()->getType();
else
derivedType = E->getType();
const CXXRecordDecl *derivedClass =
derivedType->castAs<MemberPointerType>()->getClass()->getAsCXXRecordDecl();
return CGM.GetNonVirtualBaseClassOffset(derivedClass,
E->path_begin(),
E->path_end());
}
CharUnits CGCXXABI::getMemberPointerPathAdjustment(const APValue &MP) {
// TODO: Store base specifiers in APValue member pointer paths so we can
// easily reuse CGM.GetNonVirtualBaseClassOffset().
const ValueDecl *MPD = MP.getMemberPointerDecl();
CharUnits ThisAdjustment = CharUnits::Zero();
ArrayRef<const CXXRecordDecl*> Path = MP.getMemberPointerPath();
bool DerivedMember = MP.isMemberPointerToDerivedMember();
const CXXRecordDecl *RD = cast<CXXRecordDecl>(MPD->getDeclContext());
for (unsigned I = 0, N = Path.size(); I != N; ++I) {
const CXXRecordDecl *Base = RD;
const CXXRecordDecl *Derived = Path[I];
if (DerivedMember)
std::swap(Base, Derived);
ThisAdjustment +=
getContext().getASTRecordLayout(Derived).getBaseClassOffset(Base);
RD = Path[I];
}
if (DerivedMember)
ThisAdjustment = -ThisAdjustment;
return ThisAdjustment;
}
llvm::BasicBlock *
CGCXXABI::EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
const CXXRecordDecl *RD) {
if (CGM.getTarget().getCXXABI().hasConstructorVariants())
llvm_unreachable("shouldn't be called in this ABI");
ErrorUnsupportedABI(CGF, "complete object detection in ctor");
return nullptr;
}
bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) {
return false;
}
llvm::CallInst *
CGCXXABI::emitTerminateForUnexpectedException(CodeGenFunction &CGF,
llvm::Value *Exn) {
// Just call std::terminate and ignore the violating exception.
return CGF.EmitNounwindRuntimeCall(CGF.CGM.getTerminateFn());
}
CatchTypeInfo CGCXXABI::getCatchAllTypeInfo() {
return CatchTypeInfo{nullptr, 0};
}
std::vector<CharUnits> CGCXXABI::getVBPtrOffsets(const CXXRecordDecl *RD) {
return std::vector<CharUnits>();
}
Index: head/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp (revision 322855)
@@ -1,4007 +1,4002 @@
//===------- ItaniumCXXABI.cpp - Emit LLVM Code from ASTs for a Module ----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This provides C++ code generation targeting the Itanium C++ ABI. The class
// in this file generates structures that follow the Itanium C++ ABI, which is
// documented at:
// http://www.codesourcery.com/public/cxx-abi/abi.html
// http://www.codesourcery.com/public/cxx-abi/abi-eh.html
//
// It also supports the closely-related ARM ABI, documented at:
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0041c/IHI0041C_cppabi.pdf
//
//===----------------------------------------------------------------------===//
#include "CGCXXABI.h"
#include "CGCleanup.h"
#include "CGRecordLayout.h"
#include "CGVTables.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "TargetInfo.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Mangle.h"
#include "clang/AST/Type.h"
#include "clang/AST/StmtCXX.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Value.h"
using namespace clang;
using namespace CodeGen;
namespace {
class ItaniumCXXABI : public CodeGen::CGCXXABI {
/// VTables - All the vtables which have been defined.
llvm::DenseMap<const CXXRecordDecl *, llvm::GlobalVariable *> VTables;
protected:
bool UseARMMethodPtrABI;
bool UseARMGuardVarABI;
bool Use32BitVTableOffsetABI;
ItaniumMangleContext &getMangleContext() {
return cast<ItaniumMangleContext>(CodeGen::CGCXXABI::getMangleContext());
}
public:
ItaniumCXXABI(CodeGen::CodeGenModule &CGM,
bool UseARMMethodPtrABI = false,
bool UseARMGuardVarABI = false) :
CGCXXABI(CGM), UseARMMethodPtrABI(UseARMMethodPtrABI),
UseARMGuardVarABI(UseARMGuardVarABI),
Use32BitVTableOffsetABI(false) { }
bool classifyReturnType(CGFunctionInfo &FI) const override;
RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override {
- // Structures with either a non-trivial destructor or a non-trivial
- // copy constructor are always indirect.
- // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
- // special members.
- if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor())
+ // If C++ prohibits us from making a copy, pass by address.
+ if (!canCopyArgument(RD))
return RAA_Indirect;
return RAA_Default;
}
bool isThisCompleteObject(GlobalDecl GD) const override {
// The Itanium ABI has separate complete-object vs. base-object
// variants of both constructors and destructors.
if (isa<CXXDestructorDecl>(GD.getDecl())) {
switch (GD.getDtorType()) {
case Dtor_Complete:
case Dtor_Deleting:
return true;
case Dtor_Base:
return false;
case Dtor_Comdat:
llvm_unreachable("emitting dtor comdat as function?");
}
llvm_unreachable("bad dtor kind");
}
if (isa<CXXConstructorDecl>(GD.getDecl())) {
switch (GD.getCtorType()) {
case Ctor_Complete:
return true;
case Ctor_Base:
return false;
case Ctor_CopyingClosure:
case Ctor_DefaultClosure:
llvm_unreachable("closure ctors in Itanium ABI?");
case Ctor_Comdat:
llvm_unreachable("emitting ctor comdat as function?");
}
llvm_unreachable("bad dtor kind");
}
// No other kinds.
return false;
}
bool isZeroInitializable(const MemberPointerType *MPT) override;
llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT) override;
CGCallee
EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
const Expr *E,
Address This,
llvm::Value *&ThisPtrForCall,
llvm::Value *MemFnPtr,
const MemberPointerType *MPT) override;
llvm::Value *
EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
Address Base,
llvm::Value *MemPtr,
const MemberPointerType *MPT) override;
llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
const CastExpr *E,
llvm::Value *Src) override;
llvm::Constant *EmitMemberPointerConversion(const CastExpr *E,
llvm::Constant *Src) override;
llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT) override;
llvm::Constant *EmitMemberFunctionPointer(const CXXMethodDecl *MD) override;
llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
CharUnits offset) override;
llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT) override;
llvm::Constant *BuildMemberPointer(const CXXMethodDecl *MD,
CharUnits ThisAdjustment);
llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
llvm::Value *L, llvm::Value *R,
const MemberPointerType *MPT,
bool Inequality) override;
llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
llvm::Value *Addr,
const MemberPointerType *MPT) override;
void emitVirtualObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE,
Address Ptr, QualType ElementType,
const CXXDestructorDecl *Dtor) override;
CharUnits getAlignmentOfExnObject() {
unsigned Align = CGM.getContext().getTargetInfo().getExnObjectAlignment();
return CGM.getContext().toCharUnitsFromBits(Align);
}
void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) override;
void emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *C) override;
llvm::CallInst *
emitTerminateForUnexpectedException(CodeGenFunction &CGF,
llvm::Value *Exn) override;
void EmitFundamentalRTTIDescriptor(QualType Type, bool DLLExport);
void EmitFundamentalRTTIDescriptors(bool DLLExport);
llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
CatchTypeInfo
getAddrOfCXXCatchHandlerType(QualType Ty,
QualType CatchHandlerType) override {
return CatchTypeInfo{getAddrOfRTTIDescriptor(Ty), 0};
}
bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
void EmitBadTypeidCall(CodeGenFunction &CGF) override;
llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy,
Address ThisPtr,
llvm::Type *StdTypeInfoPtrTy) override;
bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
QualType SrcRecordTy) override;
llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, Address Value,
QualType SrcRecordTy, QualType DestTy,
QualType DestRecordTy,
llvm::BasicBlock *CastEnd) override;
llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
QualType SrcRecordTy,
QualType DestTy) override;
bool EmitBadCastCall(CodeGenFunction &CGF) override;
llvm::Value *
GetVirtualBaseClassOffset(CodeGenFunction &CGF, Address This,
const CXXRecordDecl *ClassDecl,
const CXXRecordDecl *BaseClassDecl) override;
void EmitCXXConstructors(const CXXConstructorDecl *D) override;
AddedStructorArgs
buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
SmallVectorImpl<CanQualType> &ArgTys) override;
bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
CXXDtorType DT) const override {
// Itanium does not emit any destructor variant as an inline thunk.
// Delegating may occur as an optimization, but all variants are either
// emitted with external linkage or as linkonce if they are inline and used.
return false;
}
void EmitCXXDestructors(const CXXDestructorDecl *D) override;
void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
FunctionArgList &Params) override;
void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
AddedStructorArgs
addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
CXXCtorType Type, bool ForVirtualBase,
bool Delegating, CallArgList &Args) override;
void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
bool Delegating, Address This) override;
void emitVTableDefinitions(CodeGenVTables &CGVT,
const CXXRecordDecl *RD) override;
bool isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF,
CodeGenFunction::VPtr Vptr) override;
bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) override {
return true;
}
llvm::Constant *
getVTableAddressPoint(BaseSubobject Base,
const CXXRecordDecl *VTableClass) override;
llvm::Value *getVTableAddressPointInStructor(
CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
BaseSubobject Base, const CXXRecordDecl *NearestVBase) override;
llvm::Value *getVTableAddressPointInStructorWithVTT(
CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
BaseSubobject Base, const CXXRecordDecl *NearestVBase);
llvm::Constant *
getVTableAddressPointForConstExpr(BaseSubobject Base,
const CXXRecordDecl *VTableClass) override;
llvm::GlobalVariable *getAddrOfVTable(const CXXRecordDecl *RD,
CharUnits VPtrOffset) override;
CGCallee getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
Address This, llvm::Type *Ty,
SourceLocation Loc) override;
llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
const CXXDestructorDecl *Dtor,
CXXDtorType DtorType,
Address This,
const CXXMemberCallExpr *CE) override;
void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override;
void setThunkLinkage(llvm::Function *Thunk, bool ForVTable, GlobalDecl GD,
bool ReturnAdjustment) override {
// Allow inlining of thunks by emitting them with available_externally
// linkage together with vtables when needed.
if (ForVTable && !Thunk->hasLocalLinkage())
Thunk->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage);
}
llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This,
const ThisAdjustment &TA) override;
llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
const ReturnAdjustment &RA) override;
size_t getSrcArgforCopyCtor(const CXXConstructorDecl *,
FunctionArgList &Args) const override {
assert(!Args.empty() && "expected the arglist to not be empty!");
return Args.size() - 1;
}
StringRef GetPureVirtualCallName() override { return "__cxa_pure_virtual"; }
StringRef GetDeletedVirtualCallName() override
{ return "__cxa_deleted_virtual"; }
CharUnits getArrayCookieSizeImpl(QualType elementType) override;
Address InitializeArrayCookie(CodeGenFunction &CGF,
Address NewPtr,
llvm::Value *NumElements,
const CXXNewExpr *expr,
QualType ElementType) override;
llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF,
Address allocPtr,
CharUnits cookieSize) override;
void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
llvm::GlobalVariable *DeclPtr,
bool PerformInit) override;
void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
llvm::Constant *dtor, llvm::Constant *addr) override;
llvm::Function *getOrCreateThreadLocalWrapper(const VarDecl *VD,
llvm::Value *Val);
void EmitThreadLocalInitFuncs(
CodeGenModule &CGM,
ArrayRef<const VarDecl *> CXXThreadLocals,
ArrayRef<llvm::Function *> CXXThreadLocalInits,
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
bool usesThreadWrapperFunction() const override { return true; }
LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
QualType LValType) override;
bool NeedsVTTParameter(GlobalDecl GD) override;
/**************************** RTTI Uniqueness ******************************/
protected:
/// Returns true if the ABI requires RTTI type_info objects to be unique
/// across a program.
virtual bool shouldRTTIBeUnique() const { return true; }
public:
/// What sort of unique-RTTI behavior should we use?
enum RTTIUniquenessKind {
/// We are guaranteeing, or need to guarantee, that the RTTI string
/// is unique.
RUK_Unique,
/// We are not guaranteeing uniqueness for the RTTI string, so we
/// can demote to hidden visibility but must use string comparisons.
RUK_NonUniqueHidden,
/// We are not guaranteeing uniqueness for the RTTI string, so we
/// have to use string comparisons, but we also have to emit it with
/// non-hidden visibility.
RUK_NonUniqueVisible
};
/// Return the required visibility status for the given type and linkage in
/// the current ABI.
RTTIUniquenessKind
classifyRTTIUniqueness(QualType CanTy,
llvm::GlobalValue::LinkageTypes Linkage) const;
friend class ItaniumRTTIBuilder;
void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
private:
bool hasAnyUnusedVirtualInlineFunction(const CXXRecordDecl *RD) const {
const auto &VtableLayout =
CGM.getItaniumVTableContext().getVTableLayout(RD);
for (const auto &VtableComponent : VtableLayout.vtable_components()) {
// Skip empty slot.
if (!VtableComponent.isUsedFunctionPointerKind())
continue;
const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
if (!Method->getCanonicalDecl()->isInlined())
continue;
StringRef Name = CGM.getMangledName(VtableComponent.getGlobalDecl());
auto *Entry = CGM.GetGlobalValue(Name);
// This checks if virtual inline function has already been emitted.
// Note that it is possible that this inline function would be emitted
// after trying to emit vtable speculatively. Because of this we do
// an extra pass after emitting all deferred vtables to find and emit
// these vtables opportunistically.
if (!Entry || Entry->isDeclaration())
return true;
}
return false;
}
bool isVTableHidden(const CXXRecordDecl *RD) const {
const auto &VtableLayout =
CGM.getItaniumVTableContext().getVTableLayout(RD);
for (const auto &VtableComponent : VtableLayout.vtable_components()) {
if (VtableComponent.isRTTIKind()) {
const CXXRecordDecl *RTTIDecl = VtableComponent.getRTTIDecl();
if (RTTIDecl->getVisibility() == Visibility::HiddenVisibility)
return true;
} else if (VtableComponent.isUsedFunctionPointerKind()) {
const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
if (Method->getVisibility() == Visibility::HiddenVisibility &&
!Method->isDefined())
return true;
}
}
return false;
}
};
class ARMCXXABI : public ItaniumCXXABI {
public:
ARMCXXABI(CodeGen::CodeGenModule &CGM) :
ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
/* UseARMGuardVarABI = */ true) {}
bool HasThisReturn(GlobalDecl GD) const override {
return (isa<CXXConstructorDecl>(GD.getDecl()) || (
isa<CXXDestructorDecl>(GD.getDecl()) &&
GD.getDtorType() != Dtor_Deleting));
}
void EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV,
QualType ResTy) override;
CharUnits getArrayCookieSizeImpl(QualType elementType) override;
Address InitializeArrayCookie(CodeGenFunction &CGF,
Address NewPtr,
llvm::Value *NumElements,
const CXXNewExpr *expr,
QualType ElementType) override;
llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF, Address allocPtr,
CharUnits cookieSize) override;
};
class iOS64CXXABI : public ARMCXXABI {
public:
iOS64CXXABI(CodeGen::CodeGenModule &CGM) : ARMCXXABI(CGM) {
Use32BitVTableOffsetABI = true;
}
// ARM64 libraries are prepared for non-unique RTTI.
bool shouldRTTIBeUnique() const override { return false; }
};
class WebAssemblyCXXABI final : public ItaniumCXXABI {
public:
explicit WebAssemblyCXXABI(CodeGen::CodeGenModule &CGM)
: ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true,
/*UseARMGuardVarABI=*/true) {}
private:
bool HasThisReturn(GlobalDecl GD) const override {
return isa<CXXConstructorDecl>(GD.getDecl()) ||
(isa<CXXDestructorDecl>(GD.getDecl()) &&
GD.getDtorType() != Dtor_Deleting);
}
bool canCallMismatchedFunctionType() const override { return false; }
};
}
CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
switch (CGM.getTarget().getCXXABI().getKind()) {
// For IR-generation purposes, there's no significant difference
// between the ARM and iOS ABIs.
case TargetCXXABI::GenericARM:
case TargetCXXABI::iOS:
case TargetCXXABI::WatchOS:
return new ARMCXXABI(CGM);
case TargetCXXABI::iOS64:
return new iOS64CXXABI(CGM);
// Note that AArch64 uses the generic ItaniumCXXABI class since it doesn't
// include the other 32-bit ARM oddities: constructor/destructor return values
// and array cookies.
case TargetCXXABI::GenericAArch64:
return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
/* UseARMGuardVarABI = */ true);
case TargetCXXABI::GenericMIPS:
return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true);
case TargetCXXABI::WebAssembly:
return new WebAssemblyCXXABI(CGM);
case TargetCXXABI::GenericItanium:
if (CGM.getContext().getTargetInfo().getTriple().getArch()
== llvm::Triple::le32) {
// For PNaCl, use ARM-style method pointers so that PNaCl code
// does not assume anything about the alignment of function
// pointers.
return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
/* UseARMGuardVarABI = */ false);
}
return new ItaniumCXXABI(CGM);
case TargetCXXABI::Microsoft:
llvm_unreachable("Microsoft ABI is not Itanium-based");
}
llvm_unreachable("bad ABI kind");
}
llvm::Type *
ItaniumCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
if (MPT->isMemberDataPointer())
return CGM.PtrDiffTy;
return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy);
}
/// In the Itanium and ARM ABIs, method pointers have the form:
/// struct { ptrdiff_t ptr; ptrdiff_t adj; } memptr;
///
/// In the Itanium ABI:
/// - method pointers are virtual if (memptr.ptr & 1) is nonzero
/// - the this-adjustment is (memptr.adj)
/// - the virtual offset is (memptr.ptr - 1)
///
/// In the ARM ABI:
/// - method pointers are virtual if (memptr.adj & 1) is nonzero
/// - the this-adjustment is (memptr.adj >> 1)
/// - the virtual offset is (memptr.ptr)
/// ARM uses 'adj' for the virtual flag because Thumb functions
/// may be only single-byte aligned.
///
/// If the member is virtual, the adjusted 'this' pointer points
/// to a vtable pointer from which the virtual offset is applied.
///
/// If the member is non-virtual, memptr.ptr is the address of
/// the function to call.
CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
CodeGenFunction &CGF, const Expr *E, Address ThisAddr,
llvm::Value *&ThisPtrForCall,
llvm::Value *MemFnPtr, const MemberPointerType *MPT) {
CGBuilderTy &Builder = CGF.Builder;
const FunctionProtoType *FPT =
MPT->getPointeeType()->getAs<FunctionProtoType>();
const CXXRecordDecl *RD =
cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl());
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr));
llvm::Constant *ptrdiff_1 = llvm::ConstantInt::get(CGM.PtrDiffTy, 1);
llvm::BasicBlock *FnVirtual = CGF.createBasicBlock("memptr.virtual");
llvm::BasicBlock *FnNonVirtual = CGF.createBasicBlock("memptr.nonvirtual");
llvm::BasicBlock *FnEnd = CGF.createBasicBlock("memptr.end");
// Extract memptr.adj, which is in the second field.
llvm::Value *RawAdj = Builder.CreateExtractValue(MemFnPtr, 1, "memptr.adj");
// Compute the true adjustment.
llvm::Value *Adj = RawAdj;
if (UseARMMethodPtrABI)
Adj = Builder.CreateAShr(Adj, ptrdiff_1, "memptr.adj.shifted");
// Apply the adjustment and cast back to the original struct type
// for consistency.
llvm::Value *This = ThisAddr.getPointer();
llvm::Value *Ptr = Builder.CreateBitCast(This, Builder.getInt8PtrTy());
Ptr = Builder.CreateInBoundsGEP(Ptr, Adj);
This = Builder.CreateBitCast(Ptr, This->getType(), "this.adjusted");
ThisPtrForCall = This;
// Load the function pointer.
llvm::Value *FnAsInt = Builder.CreateExtractValue(MemFnPtr, 0, "memptr.ptr");
// If the LSB in the function pointer is 1, the function pointer points to
// a virtual function.
llvm::Value *IsVirtual;
if (UseARMMethodPtrABI)
IsVirtual = Builder.CreateAnd(RawAdj, ptrdiff_1);
else
IsVirtual = Builder.CreateAnd(FnAsInt, ptrdiff_1);
IsVirtual = Builder.CreateIsNotNull(IsVirtual, "memptr.isvirtual");
Builder.CreateCondBr(IsVirtual, FnVirtual, FnNonVirtual);
// In the virtual path, the adjustment left 'This' pointing to the
// vtable of the correct base subobject. The "function pointer" is an
// offset within the vtable (+1 for the virtual flag on non-ARM).
CGF.EmitBlock(FnVirtual);
// Cast the adjusted this to a pointer to vtable pointer and load.
llvm::Type *VTableTy = Builder.getInt8PtrTy();
CharUnits VTablePtrAlign =
CGF.CGM.getDynamicOffsetAlignment(ThisAddr.getAlignment(), RD,
CGF.getPointerAlign());
llvm::Value *VTable =
CGF.GetVTablePtr(Address(This, VTablePtrAlign), VTableTy, RD);
// Apply the offset.
// On ARM64, to reserve extra space in virtual member function pointers,
// we only pay attention to the low 32 bits of the offset.
llvm::Value *VTableOffset = FnAsInt;
if (!UseARMMethodPtrABI)
VTableOffset = Builder.CreateSub(VTableOffset, ptrdiff_1);
if (Use32BitVTableOffsetABI) {
VTableOffset = Builder.CreateTrunc(VTableOffset, CGF.Int32Ty);
VTableOffset = Builder.CreateZExt(VTableOffset, CGM.PtrDiffTy);
}
VTable = Builder.CreateGEP(VTable, VTableOffset);
// Load the virtual function to call.
VTable = Builder.CreateBitCast(VTable, FTy->getPointerTo()->getPointerTo());
llvm::Value *VirtualFn =
Builder.CreateAlignedLoad(VTable, CGF.getPointerAlign(),
"memptr.virtualfn");
CGF.EmitBranch(FnEnd);
// In the non-virtual path, the function pointer is actually a
// function pointer.
CGF.EmitBlock(FnNonVirtual);
llvm::Value *NonVirtualFn =
Builder.CreateIntToPtr(FnAsInt, FTy->getPointerTo(), "memptr.nonvirtualfn");
// We're done.
CGF.EmitBlock(FnEnd);
llvm::PHINode *CalleePtr = Builder.CreatePHI(FTy->getPointerTo(), 2);
CalleePtr->addIncoming(VirtualFn, FnVirtual);
CalleePtr->addIncoming(NonVirtualFn, FnNonVirtual);
CGCallee Callee(FPT, CalleePtr);
return Callee;
}
/// Compute an l-value by applying the given pointer-to-member to a
/// base object.
llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress(
CodeGenFunction &CGF, const Expr *E, Address Base, llvm::Value *MemPtr,
const MemberPointerType *MPT) {
assert(MemPtr->getType() == CGM.PtrDiffTy);
CGBuilderTy &Builder = CGF.Builder;
// Cast to char*.
Base = Builder.CreateElementBitCast(Base, CGF.Int8Ty);
// Apply the offset, which we assume is non-null.
llvm::Value *Addr =
Builder.CreateInBoundsGEP(Base.getPointer(), MemPtr, "memptr.offset");
// Cast the address to the appropriate pointer type, adopting the
// address space of the base pointer.
llvm::Type *PType = CGF.ConvertTypeForMem(MPT->getPointeeType())
->getPointerTo(Base.getAddressSpace());
return Builder.CreateBitCast(Addr, PType);
}
/// Perform a bitcast, derived-to-base, or base-to-derived member pointer
/// conversion.
///
/// Bitcast conversions are always a no-op under Itanium.
///
/// Obligatory offset/adjustment diagram:
/// <-- offset --> <-- adjustment -->
/// |--------------------------|----------------------|--------------------|
/// ^Derived address point ^Base address point ^Member address point
///
/// So when converting a base member pointer to a derived member pointer,
/// we add the offset to the adjustment because the address point has
/// decreased; and conversely, when converting a derived MP to a base MP
/// we subtract the offset from the adjustment because the address point
/// has increased.
///
/// The standard forbids (at compile time) conversion to and from
/// virtual bases, which is why we don't have to consider them here.
///
/// The standard forbids (at run time) casting a derived MP to a base
/// MP when the derived MP does not point to a member of the base.
/// This is why -1 is a reasonable choice for null data member
/// pointers.
llvm::Value *
ItaniumCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
const CastExpr *E,
llvm::Value *src) {
assert(E->getCastKind() == CK_DerivedToBaseMemberPointer ||
E->getCastKind() == CK_BaseToDerivedMemberPointer ||
E->getCastKind() == CK_ReinterpretMemberPointer);
// Under Itanium, reinterprets don't require any additional processing.
if (E->getCastKind() == CK_ReinterpretMemberPointer) return src;
// Use constant emission if we can.
if (isa<llvm::Constant>(src))
return EmitMemberPointerConversion(E, cast<llvm::Constant>(src));
llvm::Constant *adj = getMemberPointerAdjustment(E);
if (!adj) return src;
CGBuilderTy &Builder = CGF.Builder;
bool isDerivedToBase = (E->getCastKind() == CK_DerivedToBaseMemberPointer);
const MemberPointerType *destTy =
E->getType()->castAs<MemberPointerType>();
// For member data pointers, this is just a matter of adding the
// offset if the source is non-null.
if (destTy->isMemberDataPointer()) {
llvm::Value *dst;
if (isDerivedToBase)
dst = Builder.CreateNSWSub(src, adj, "adj");
else
dst = Builder.CreateNSWAdd(src, adj, "adj");
// Null check.
llvm::Value *null = llvm::Constant::getAllOnesValue(src->getType());
llvm::Value *isNull = Builder.CreateICmpEQ(src, null, "memptr.isnull");
return Builder.CreateSelect(isNull, src, dst);
}
// The this-adjustment is left-shifted by 1 on ARM.
if (UseARMMethodPtrABI) {
uint64_t offset = cast<llvm::ConstantInt>(adj)->getZExtValue();
offset <<= 1;
adj = llvm::ConstantInt::get(adj->getType(), offset);
}
llvm::Value *srcAdj = Builder.CreateExtractValue(src, 1, "src.adj");
llvm::Value *dstAdj;
if (isDerivedToBase)
dstAdj = Builder.CreateNSWSub(srcAdj, adj, "adj");
else
dstAdj = Builder.CreateNSWAdd(srcAdj, adj, "adj");
return Builder.CreateInsertValue(src, dstAdj, 1);
}
llvm::Constant *
ItaniumCXXABI::EmitMemberPointerConversion(const CastExpr *E,
llvm::Constant *src) {
assert(E->getCastKind() == CK_DerivedToBaseMemberPointer ||
E->getCastKind() == CK_BaseToDerivedMemberPointer ||
E->getCastKind() == CK_ReinterpretMemberPointer);
// Under Itanium, reinterprets don't require any additional processing.
if (E->getCastKind() == CK_ReinterpretMemberPointer) return src;
// If the adjustment is trivial, we don't need to do anything.
llvm::Constant *adj = getMemberPointerAdjustment(E);
if (!adj) return src;
bool isDerivedToBase = (E->getCastKind() == CK_DerivedToBaseMemberPointer);
const MemberPointerType *destTy =
E->getType()->castAs<MemberPointerType>();
// For member data pointers, this is just a matter of adding the
// offset if the source is non-null.
if (destTy->isMemberDataPointer()) {
// null maps to null.
if (src->isAllOnesValue()) return src;
if (isDerivedToBase)
return llvm::ConstantExpr::getNSWSub(src, adj);
else
return llvm::ConstantExpr::getNSWAdd(src, adj);
}
// The this-adjustment is left-shifted by 1 on ARM.
if (UseARMMethodPtrABI) {
uint64_t offset = cast<llvm::ConstantInt>(adj)->getZExtValue();
offset <<= 1;
adj = llvm::ConstantInt::get(adj->getType(), offset);
}
llvm::Constant *srcAdj = llvm::ConstantExpr::getExtractValue(src, 1);
llvm::Constant *dstAdj;
if (isDerivedToBase)
dstAdj = llvm::ConstantExpr::getNSWSub(srcAdj, adj);
else
dstAdj = llvm::ConstantExpr::getNSWAdd(srcAdj, adj);
return llvm::ConstantExpr::getInsertValue(src, dstAdj, 1);
}
llvm::Constant *
ItaniumCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
// Itanium C++ ABI 2.3:
// A NULL pointer is represented as -1.
if (MPT->isMemberDataPointer())
return llvm::ConstantInt::get(CGM.PtrDiffTy, -1ULL, /*isSigned=*/true);
llvm::Constant *Zero = llvm::ConstantInt::get(CGM.PtrDiffTy, 0);
llvm::Constant *Values[2] = { Zero, Zero };
return llvm::ConstantStruct::getAnon(Values);
}
llvm::Constant *
ItaniumCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
CharUnits offset) {
// Itanium C++ ABI 2.3:
// A pointer to data member is an offset from the base address of
// the class object containing it, represented as a ptrdiff_t
return llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity());
}
llvm::Constant *
ItaniumCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
return BuildMemberPointer(MD, CharUnits::Zero());
}
llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD,
CharUnits ThisAdjustment) {
assert(MD->isInstance() && "Member function must not be static!");
MD = MD->getCanonicalDecl();
CodeGenTypes &Types = CGM.getTypes();
// Get the function pointer (or index if this is a virtual function).
llvm::Constant *MemPtr[2];
if (MD->isVirtual()) {
uint64_t Index = CGM.getItaniumVTableContext().getMethodVTableIndex(MD);
const ASTContext &Context = getContext();
CharUnits PointerWidth =
Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
uint64_t VTableOffset = (Index * PointerWidth.getQuantity());
if (UseARMMethodPtrABI) {
// ARM C++ ABI 3.2.1:
// This ABI specifies that adj contains twice the this
// adjustment, plus 1 if the member function is virtual. The
// least significant bit of adj then makes exactly the same
// discrimination as the least significant bit of ptr does for
// Itanium.
MemPtr[0] = llvm::ConstantInt::get(CGM.PtrDiffTy, VTableOffset);
MemPtr[1] = llvm::ConstantInt::get(CGM.PtrDiffTy,
2 * ThisAdjustment.getQuantity() + 1);
} else {
// Itanium C++ ABI 2.3:
// For a virtual function, [the pointer field] is 1 plus the
// virtual table offset (in bytes) of the function,
// represented as a ptrdiff_t.
MemPtr[0] = llvm::ConstantInt::get(CGM.PtrDiffTy, VTableOffset + 1);
MemPtr[1] = llvm::ConstantInt::get(CGM.PtrDiffTy,
ThisAdjustment.getQuantity());
}
} else {
const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
llvm::Type *Ty;
// Check whether the function has a computable LLVM signature.
if (Types.isFuncTypeConvertible(FPT)) {
// The function has a computable LLVM signature; use the correct type.
Ty = Types.GetFunctionType(Types.arrangeCXXMethodDeclaration(MD));
} else {
// Use an arbitrary non-function type to tell GetAddrOfFunction that the
// function type is incomplete.
Ty = CGM.PtrDiffTy;
}
llvm::Constant *addr = CGM.GetAddrOfFunction(MD, Ty);
MemPtr[0] = llvm::ConstantExpr::getPtrToInt(addr, CGM.PtrDiffTy);
MemPtr[1] = llvm::ConstantInt::get(CGM.PtrDiffTy,
(UseARMMethodPtrABI ? 2 : 1) *
ThisAdjustment.getQuantity());
}
return llvm::ConstantStruct::getAnon(MemPtr);
}
llvm::Constant *ItaniumCXXABI::EmitMemberPointer(const APValue &MP,
QualType MPType) {
const MemberPointerType *MPT = MPType->castAs<MemberPointerType>();
const ValueDecl *MPD = MP.getMemberPointerDecl();
if (!MPD)
return EmitNullMemberPointer(MPT);
CharUnits ThisAdjustment = getMemberPointerPathAdjustment(MP);
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MPD))
return BuildMemberPointer(MD, ThisAdjustment);
CharUnits FieldOffset =
getContext().toCharUnitsFromBits(getContext().getFieldOffset(MPD));
return EmitMemberDataPointer(MPT, ThisAdjustment + FieldOffset);
}
/// The comparison algorithm is pretty easy: the member pointers are
/// the same if they're either bitwise identical *or* both null.
///
/// ARM is different here only because null-ness is more complicated.
llvm::Value *
ItaniumCXXABI::EmitMemberPointerComparison(CodeGenFunction &CGF,
llvm::Value *L,
llvm::Value *R,
const MemberPointerType *MPT,
bool Inequality) {
CGBuilderTy &Builder = CGF.Builder;
llvm::ICmpInst::Predicate Eq;
llvm::Instruction::BinaryOps And, Or;
if (Inequality) {
Eq = llvm::ICmpInst::ICMP_NE;
And = llvm::Instruction::Or;
Or = llvm::Instruction::And;
} else {
Eq = llvm::ICmpInst::ICMP_EQ;
And = llvm::Instruction::And;
Or = llvm::Instruction::Or;
}
// Member data pointers are easy because there's a unique null
// value, so it just comes down to bitwise equality.
if (MPT->isMemberDataPointer())
return Builder.CreateICmp(Eq, L, R);
// For member function pointers, the tautologies are more complex.
// The Itanium tautology is:
// (L == R) <==> (L.ptr == R.ptr && (L.ptr == 0 || L.adj == R.adj))
// The ARM tautology is:
// (L == R) <==> (L.ptr == R.ptr &&
// (L.adj == R.adj ||
// (L.ptr == 0 && ((L.adj|R.adj) & 1) == 0)))
// The inequality tautologies have exactly the same structure, except
// applying De Morgan's laws.
llvm::Value *LPtr = Builder.CreateExtractValue(L, 0, "lhs.memptr.ptr");
llvm::Value *RPtr = Builder.CreateExtractValue(R, 0, "rhs.memptr.ptr");
// This condition tests whether L.ptr == R.ptr. This must always be
// true for equality to hold.
llvm::Value *PtrEq = Builder.CreateICmp(Eq, LPtr, RPtr, "cmp.ptr");
// This condition, together with the assumption that L.ptr == R.ptr,
// tests whether the pointers are both null. ARM imposes an extra
// condition.
llvm::Value *Zero = llvm::Constant::getNullValue(LPtr->getType());
llvm::Value *EqZero = Builder.CreateICmp(Eq, LPtr, Zero, "cmp.ptr.null");
// This condition tests whether L.adj == R.adj. If this isn't
// true, the pointers are unequal unless they're both null.
llvm::Value *LAdj = Builder.CreateExtractValue(L, 1, "lhs.memptr.adj");
llvm::Value *RAdj = Builder.CreateExtractValue(R, 1, "rhs.memptr.adj");
llvm::Value *AdjEq = Builder.CreateICmp(Eq, LAdj, RAdj, "cmp.adj");
// Null member function pointers on ARM clear the low bit of Adj,
// so the zero condition has to check that neither low bit is set.
if (UseARMMethodPtrABI) {
llvm::Value *One = llvm::ConstantInt::get(LPtr->getType(), 1);
// Compute (l.adj | r.adj) & 1 and test it against zero.
llvm::Value *OrAdj = Builder.CreateOr(LAdj, RAdj, "or.adj");
llvm::Value *OrAdjAnd1 = Builder.CreateAnd(OrAdj, One);
llvm::Value *OrAdjAnd1EqZero = Builder.CreateICmp(Eq, OrAdjAnd1, Zero,
"cmp.or.adj");
EqZero = Builder.CreateBinOp(And, EqZero, OrAdjAnd1EqZero);
}
// Tie together all our conditions.
llvm::Value *Result = Builder.CreateBinOp(Or, EqZero, AdjEq);
Result = Builder.CreateBinOp(And, PtrEq, Result,
Inequality ? "memptr.ne" : "memptr.eq");
return Result;
}
llvm::Value *
ItaniumCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
llvm::Value *MemPtr,
const MemberPointerType *MPT) {
CGBuilderTy &Builder = CGF.Builder;
/// For member data pointers, this is just a check against -1.
if (MPT->isMemberDataPointer()) {
assert(MemPtr->getType() == CGM.PtrDiffTy);
llvm::Value *NegativeOne =
llvm::Constant::getAllOnesValue(MemPtr->getType());
return Builder.CreateICmpNE(MemPtr, NegativeOne, "memptr.tobool");
}
// In Itanium, a member function pointer is not null if 'ptr' is not null.
llvm::Value *Ptr = Builder.CreateExtractValue(MemPtr, 0, "memptr.ptr");
llvm::Constant *Zero = llvm::ConstantInt::get(Ptr->getType(), 0);
llvm::Value *Result = Builder.CreateICmpNE(Ptr, Zero, "memptr.tobool");
// On ARM, a member function pointer is also non-null if the low bit of 'adj'
// (the virtual bit) is set.
if (UseARMMethodPtrABI) {
llvm::Constant *One = llvm::ConstantInt::get(Ptr->getType(), 1);
llvm::Value *Adj = Builder.CreateExtractValue(MemPtr, 1, "memptr.adj");
llvm::Value *VirtualBit = Builder.CreateAnd(Adj, One, "memptr.virtualbit");
llvm::Value *IsVirtual = Builder.CreateICmpNE(VirtualBit, Zero,
"memptr.isvirtual");
Result = Builder.CreateOr(Result, IsVirtual);
}
return Result;
}
bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
const CXXRecordDecl *RD = FI.getReturnType()->getAsCXXRecordDecl();
if (!RD)
return false;
- // Return indirectly if we have a non-trivial copy ctor or non-trivial dtor.
- // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
- // special members.
- if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) {
+ // If C++ prohibits us from making a copy, return by address.
+ if (!canCopyArgument(RD)) {
auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
return true;
}
return false;
}
/// The Itanium ABI requires non-zero initialization only for data
/// member pointers, for which '0' is a valid offset.
bool ItaniumCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
return MPT->isMemberFunctionPointer();
}
/// The Itanium ABI always places an offset to the complete object
/// at entry -2 in the vtable.
void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
const CXXDeleteExpr *DE,
Address Ptr,
QualType ElementType,
const CXXDestructorDecl *Dtor) {
bool UseGlobalDelete = DE->isGlobalDelete();
if (UseGlobalDelete) {
// Derive the complete-object pointer, which is what we need
// to pass to the deallocation function.
// Grab the vtable pointer as an intptr_t*.
auto *ClassDecl =
cast<CXXRecordDecl>(ElementType->getAs<RecordType>()->getDecl());
llvm::Value *VTable =
CGF.GetVTablePtr(Ptr, CGF.IntPtrTy->getPointerTo(), ClassDecl);
// Track back to entry -2 and pull out the offset there.
llvm::Value *OffsetPtr = CGF.Builder.CreateConstInBoundsGEP1_64(
VTable, -2, "complete-offset.ptr");
llvm::Value *Offset =
CGF.Builder.CreateAlignedLoad(OffsetPtr, CGF.getPointerAlign());
// Apply the offset.
llvm::Value *CompletePtr =
CGF.Builder.CreateBitCast(Ptr.getPointer(), CGF.Int8PtrTy);
CompletePtr = CGF.Builder.CreateInBoundsGEP(CompletePtr, Offset);
// If we're supposed to call the global delete, make sure we do so
// even if the destructor throws.
CGF.pushCallObjectDeleteCleanup(DE->getOperatorDelete(), CompletePtr,
ElementType);
}
// FIXME: Provide a source location here even though there's no
// CXXMemberCallExpr for dtor call.
CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, /*CE=*/nullptr);
if (UseGlobalDelete)
CGF.PopCleanupBlock();
}
void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
// void __cxa_rethrow();
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
llvm::Constant *Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow");
if (isNoReturn)
CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, None);
else
CGF.EmitRuntimeCallOrInvoke(Fn);
}
static llvm::Constant *getAllocateExceptionFn(CodeGenModule &CGM) {
// void *__cxa_allocate_exception(size_t thrown_size);
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /*IsVarArgs=*/false);
return CGM.CreateRuntimeFunction(FTy, "__cxa_allocate_exception");
}
static llvm::Constant *getThrowFn(CodeGenModule &CGM) {
// void __cxa_throw(void *thrown_exception, std::type_info *tinfo,
// void (*dest) (void *));
llvm::Type *Args[3] = { CGM.Int8PtrTy, CGM.Int8PtrTy, CGM.Int8PtrTy };
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false);
return CGM.CreateRuntimeFunction(FTy, "__cxa_throw");
}
void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
QualType ThrowType = E->getSubExpr()->getType();
// Now allocate the exception object.
llvm::Type *SizeTy = CGF.ConvertType(getContext().getSizeType());
uint64_t TypeSize = getContext().getTypeSizeInChars(ThrowType).getQuantity();
llvm::Constant *AllocExceptionFn = getAllocateExceptionFn(CGM);
llvm::CallInst *ExceptionPtr = CGF.EmitNounwindRuntimeCall(
AllocExceptionFn, llvm::ConstantInt::get(SizeTy, TypeSize), "exception");
CharUnits ExnAlign = getAlignmentOfExnObject();
CGF.EmitAnyExprToExn(E->getSubExpr(), Address(ExceptionPtr, ExnAlign));
// Now throw the exception.
llvm::Constant *TypeInfo = CGM.GetAddrOfRTTIDescriptor(ThrowType,
/*ForEH=*/true);
// The address of the destructor. If the exception type has a
// trivial destructor (or isn't a record), we just pass null.
llvm::Constant *Dtor = nullptr;
if (const RecordType *RecordTy = ThrowType->getAs<RecordType>()) {
CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordTy->getDecl());
if (!Record->hasTrivialDestructor()) {
CXXDestructorDecl *DtorD = Record->getDestructor();
Dtor = CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete);
Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy);
}
}
if (!Dtor) Dtor = llvm::Constant::getNullValue(CGM.Int8PtrTy);
llvm::Value *args[] = { ExceptionPtr, TypeInfo, Dtor };
CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(CGM), args);
}
static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) {
// void *__dynamic_cast(const void *sub,
// const abi::__class_type_info *src,
// const abi::__class_type_info *dst,
// std::ptrdiff_t src2dst_offset);
llvm::Type *Int8PtrTy = CGF.Int8PtrTy;
llvm::Type *PtrDiffTy =
CGF.ConvertType(CGF.getContext().getPointerDiffType());
llvm::Type *Args[4] = { Int8PtrTy, Int8PtrTy, Int8PtrTy, PtrDiffTy };
llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false);
// Mark the function as nounwind readonly.
llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind,
llvm::Attribute::ReadOnly };
llvm::AttributeList Attrs = llvm::AttributeList::get(
CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs);
return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs);
}
static llvm::Constant *getBadCastFn(CodeGenFunction &CGF) {
// void __cxa_bad_cast();
llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false);
return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_cast");
}
/// \brief Compute the src2dst_offset hint as described in the
/// Itanium C++ ABI [2.9.7]
static CharUnits computeOffsetHint(ASTContext &Context,
const CXXRecordDecl *Src,
const CXXRecordDecl *Dst) {
CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
/*DetectVirtual=*/false);
// If Dst is not derived from Src we can skip the whole computation below and
// return that Src is not a public base of Dst. Record all inheritance paths.
if (!Dst->isDerivedFrom(Src, Paths))
return CharUnits::fromQuantity(-2ULL);
unsigned NumPublicPaths = 0;
CharUnits Offset;
// Now walk all possible inheritance paths.
for (const CXXBasePath &Path : Paths) {
if (Path.Access != AS_public) // Ignore non-public inheritance.
continue;
++NumPublicPaths;
for (const CXXBasePathElement &PathElement : Path) {
// If the path contains a virtual base class we can't give any hint.
// -1: no hint.
if (PathElement.Base->isVirtual())
return CharUnits::fromQuantity(-1ULL);
if (NumPublicPaths > 1) // Won't use offsets, skip computation.
continue;
// Accumulate the base class offsets.
const ASTRecordLayout &L = Context.getASTRecordLayout(PathElement.Class);
Offset += L.getBaseClassOffset(
PathElement.Base->getType()->getAsCXXRecordDecl());
}
}
// -2: Src is not a public base of Dst.
if (NumPublicPaths == 0)
return CharUnits::fromQuantity(-2ULL);
// -3: Src is a multiple public base type but never a virtual base type.
if (NumPublicPaths > 1)
return CharUnits::fromQuantity(-3ULL);
// Otherwise, the Src type is a unique public nonvirtual base type of Dst.
// Return the offset of Src from the origin of Dst.
return Offset;
}
static llvm::Constant *getBadTypeidFn(CodeGenFunction &CGF) {
// void __cxa_bad_typeid();
llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false);
return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_typeid");
}
bool ItaniumCXXABI::shouldTypeidBeNullChecked(bool IsDeref,
QualType SrcRecordTy) {
return IsDeref;
}
void ItaniumCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) {
llvm::Value *Fn = getBadTypeidFn(CGF);
CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn();
CGF.Builder.CreateUnreachable();
}
llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF,
QualType SrcRecordTy,
Address ThisPtr,
llvm::Type *StdTypeInfoPtrTy) {
auto *ClassDecl =
cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl());
llvm::Value *Value =
CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl);
// Load the type info.
Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL);
return CGF.Builder.CreateAlignedLoad(Value, CGF.getPointerAlign());
}
bool ItaniumCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
QualType SrcRecordTy) {
return SrcIsPtr;
}
llvm::Value *ItaniumCXXABI::EmitDynamicCastCall(
CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy,
QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) {
llvm::Type *PtrDiffLTy =
CGF.ConvertType(CGF.getContext().getPointerDiffType());
llvm::Type *DestLTy = CGF.ConvertType(DestTy);
llvm::Value *SrcRTTI =
CGF.CGM.GetAddrOfRTTIDescriptor(SrcRecordTy.getUnqualifiedType());
llvm::Value *DestRTTI =
CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType());
// Compute the offset hint.
const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
const CXXRecordDecl *DestDecl = DestRecordTy->getAsCXXRecordDecl();
llvm::Value *OffsetHint = llvm::ConstantInt::get(
PtrDiffLTy,
computeOffsetHint(CGF.getContext(), SrcDecl, DestDecl).getQuantity());
// Emit the call to __dynamic_cast.
llvm::Value *Value = ThisAddr.getPointer();
Value = CGF.EmitCastToVoidPtr(Value);
llvm::Value *args[] = {Value, SrcRTTI, DestRTTI, OffsetHint};
Value = CGF.EmitNounwindRuntimeCall(getItaniumDynamicCastFn(CGF), args);
Value = CGF.Builder.CreateBitCast(Value, DestLTy);
/// C++ [expr.dynamic.cast]p9:
/// A failed cast to reference type throws std::bad_cast
if (DestTy->isReferenceType()) {
llvm::BasicBlock *BadCastBlock =
CGF.createBasicBlock("dynamic_cast.bad_cast");
llvm::Value *IsNull = CGF.Builder.CreateIsNull(Value);
CGF.Builder.CreateCondBr(IsNull, BadCastBlock, CastEnd);
CGF.EmitBlock(BadCastBlock);
EmitBadCastCall(CGF);
}
return Value;
}
llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF,
Address ThisAddr,
QualType SrcRecordTy,
QualType DestTy) {
llvm::Type *PtrDiffLTy =
CGF.ConvertType(CGF.getContext().getPointerDiffType());
llvm::Type *DestLTy = CGF.ConvertType(DestTy);
auto *ClassDecl =
cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl());
// Get the vtable pointer.
llvm::Value *VTable = CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(),
ClassDecl);
// Get the offset-to-top from the vtable.
llvm::Value *OffsetToTop =
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL);
OffsetToTop =
CGF.Builder.CreateAlignedLoad(OffsetToTop, CGF.getPointerAlign(),
"offset.to.top");
// Finally, add the offset to the pointer.
llvm::Value *Value = ThisAddr.getPointer();
Value = CGF.EmitCastToVoidPtr(Value);
Value = CGF.Builder.CreateInBoundsGEP(Value, OffsetToTop);
return CGF.Builder.CreateBitCast(Value, DestLTy);
}
bool ItaniumCXXABI::EmitBadCastCall(CodeGenFunction &CGF) {
llvm::Value *Fn = getBadCastFn(CGF);
CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn();
CGF.Builder.CreateUnreachable();
return true;
}
llvm::Value *
ItaniumCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF,
Address This,
const CXXRecordDecl *ClassDecl,
const CXXRecordDecl *BaseClassDecl) {
llvm::Value *VTablePtr = CGF.GetVTablePtr(This, CGM.Int8PtrTy, ClassDecl);
CharUnits VBaseOffsetOffset =
CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(ClassDecl,
BaseClassDecl);
llvm::Value *VBaseOffsetPtr =
CGF.Builder.CreateConstGEP1_64(VTablePtr, VBaseOffsetOffset.getQuantity(),
"vbase.offset.ptr");
VBaseOffsetPtr = CGF.Builder.CreateBitCast(VBaseOffsetPtr,
CGM.PtrDiffTy->getPointerTo());
llvm::Value *VBaseOffset =
CGF.Builder.CreateAlignedLoad(VBaseOffsetPtr, CGF.getPointerAlign(),
"vbase.offset");
return VBaseOffset;
}
void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
// Just make sure we're in sync with TargetCXXABI.
assert(CGM.getTarget().getCXXABI().hasConstructorVariants());
// The constructor used for constructing this as a base class;
// ignores virtual bases.
CGM.EmitGlobal(GlobalDecl(D, Ctor_Base));
// The constructor used for constructing this as a complete class;
// constructs the virtual bases, then calls the base constructor.
if (!D->getParent()->isAbstract()) {
// We don't need to emit the complete ctor if the class is abstract.
CGM.EmitGlobal(GlobalDecl(D, Ctor_Complete));
}
}
CGCXXABI::AddedStructorArgs
ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
SmallVectorImpl<CanQualType> &ArgTys) {
ASTContext &Context = getContext();
// All parameters are already in place except VTT, which goes after 'this'.
// These are Clang types, so we don't need to worry about sret yet.
// Check if we need to add a VTT parameter (which has type void **).
if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) {
ArgTys.insert(ArgTys.begin() + 1,
Context.getPointerType(Context.VoidPtrTy));
return AddedStructorArgs::prefix(1);
}
return AddedStructorArgs{};
}
void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
// The destructor used for destructing this as a base class; ignores
// virtual bases.
CGM.EmitGlobal(GlobalDecl(D, Dtor_Base));
// The destructor used for destructing this as a most-derived class;
// call the base destructor and then destructs any virtual bases.
CGM.EmitGlobal(GlobalDecl(D, Dtor_Complete));
// The destructor in a virtual table is always a 'deleting'
// destructor, which calls the complete destructor and then uses the
// appropriate operator delete.
if (D->isVirtual())
CGM.EmitGlobal(GlobalDecl(D, Dtor_Deleting));
}
void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
QualType &ResTy,
FunctionArgList &Params) {
const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD));
// Check if we need a VTT parameter as well.
if (NeedsVTTParameter(CGF.CurGD)) {
ASTContext &Context = getContext();
// FIXME: avoid the fake decl
QualType T = Context.getPointerType(Context.VoidPtrTy);
auto *VTTDecl = ImplicitParamDecl::Create(
Context, /*DC=*/nullptr, MD->getLocation(), &Context.Idents.get("vtt"),
T, ImplicitParamDecl::CXXVTT);
Params.insert(Params.begin() + 1, VTTDecl);
getStructorImplicitParamDecl(CGF) = VTTDecl;
}
}
void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
// Naked functions have no prolog.
if (CGF.CurFuncDecl && CGF.CurFuncDecl->hasAttr<NakedAttr>())
return;
/// Initialize the 'this' slot.
EmitThisParam(CGF);
/// Initialize the 'vtt' slot if needed.
if (getStructorImplicitParamDecl(CGF)) {
getStructorImplicitParamValue(CGF) = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(getStructorImplicitParamDecl(CGF)), "vtt");
}
/// If this is a function that the ABI specifies returns 'this', initialize
/// the return slot to 'this' at the start of the function.
///
/// Unlike the setting of return types, this is done within the ABI
/// implementation instead of by clients of CGCXXABI because:
/// 1) getThisValue is currently protected
/// 2) in theory, an ABI could implement 'this' returns some other way;
/// HasThisReturn only specifies a contract, not the implementation
if (HasThisReturn(CGF.CurGD))
CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
}
CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs(
CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
bool ForVirtualBase, bool Delegating, CallArgList &Args) {
if (!NeedsVTTParameter(GlobalDecl(D, Type)))
return AddedStructorArgs{};
// Insert the implicit 'vtt' argument as the second argument.
llvm::Value *VTT =
CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase, Delegating);
QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
Args.insert(Args.begin() + 1,
CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false));
return AddedStructorArgs::prefix(1); // Added one arg.
}
void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
bool Delegating, Address This) {
GlobalDecl GD(DD, Type);
llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
CGCallee Callee;
if (getContext().getLangOpts().AppleKext &&
Type != Dtor_Base && DD->isVirtual())
Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent());
else
Callee =
CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
DD);
CGF.EmitCXXMemberOrOperatorCall(DD, Callee, ReturnValueSlot(),
This.getPointer(), VTT, VTTTy,
nullptr, nullptr);
}
void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
const CXXRecordDecl *RD) {
llvm::GlobalVariable *VTable = getAddrOfVTable(RD, CharUnits());
if (VTable->hasInitializer())
return;
ItaniumVTableContext &VTContext = CGM.getItaniumVTableContext();
const VTableLayout &VTLayout = VTContext.getVTableLayout(RD);
llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
llvm::Constant *RTTI =
CGM.GetAddrOfRTTIDescriptor(CGM.getContext().getTagDeclType(RD));
// Create and set the initializer.
ConstantInitBuilder Builder(CGM);
auto Components = Builder.beginStruct();
CGVT.createVTableInitializer(Components, VTLayout, RTTI);
Components.finishAndSetAsInitializer(VTable);
// Set the correct linkage.
VTable->setLinkage(Linkage);
if (CGM.supportsCOMDAT() && VTable->isWeakForLinker())
VTable->setComdat(CGM.getModule().getOrInsertComdat(VTable->getName()));
// Set the right visibility.
CGM.setGlobalVisibility(VTable, RD);
// Use pointer alignment for the vtable. Otherwise we would align them based
// on the size of the initializer which doesn't make sense as only single
// values are read.
unsigned PAlign = CGM.getTarget().getPointerAlign(0);
VTable->setAlignment(getContext().toCharUnitsFromBits(PAlign).getQuantity());
// If this is the magic class __cxxabiv1::__fundamental_type_info,
// we will emit the typeinfo for the fundamental types. This is the
// same behaviour as GCC.
const DeclContext *DC = RD->getDeclContext();
if (RD->getIdentifier() &&
RD->getIdentifier()->isStr("__fundamental_type_info") &&
isa<NamespaceDecl>(DC) && cast<NamespaceDecl>(DC)->getIdentifier() &&
cast<NamespaceDecl>(DC)->getIdentifier()->isStr("__cxxabiv1") &&
DC->getParent()->isTranslationUnit())
EmitFundamentalRTTIDescriptors(RD->hasAttr<DLLExportAttr>());
if (!VTable->isDeclarationForLinker())
CGM.EmitVTableTypeMetadata(VTable, VTLayout);
}
bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField(
CodeGenFunction &CGF, CodeGenFunction::VPtr Vptr) {
if (Vptr.NearestVBase == nullptr)
return false;
return NeedsVTTParameter(CGF.CurGD);
}
llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructor(
CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
const CXXRecordDecl *NearestVBase) {
if ((Base.getBase()->getNumVBases() || NearestVBase != nullptr) &&
NeedsVTTParameter(CGF.CurGD)) {
return getVTableAddressPointInStructorWithVTT(CGF, VTableClass, Base,
NearestVBase);
}
return getVTableAddressPoint(Base, VTableClass);
}
llvm::Constant *
ItaniumCXXABI::getVTableAddressPoint(BaseSubobject Base,
const CXXRecordDecl *VTableClass) {
llvm::GlobalValue *VTable = getAddrOfVTable(VTableClass, CharUnits());
// Find the appropriate vtable within the vtable group, and the address point
// within that vtable.
VTableLayout::AddressPointLocation AddressPoint =
CGM.getItaniumVTableContext()
.getVTableLayout(VTableClass)
.getAddressPoint(Base);
llvm::Value *Indices[] = {
llvm::ConstantInt::get(CGM.Int32Ty, 0),
llvm::ConstantInt::get(CGM.Int32Ty, AddressPoint.VTableIndex),
llvm::ConstantInt::get(CGM.Int32Ty, AddressPoint.AddressPointIndex),
};
return llvm::ConstantExpr::getGetElementPtr(VTable->getValueType(), VTable,
Indices, /*InBounds=*/true,
/*InRangeIndex=*/1);
}
llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructorWithVTT(
CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
const CXXRecordDecl *NearestVBase) {
assert((Base.getBase()->getNumVBases() || NearestVBase != nullptr) &&
NeedsVTTParameter(CGF.CurGD) && "This class doesn't have VTT");
// Get the secondary vpointer index.
uint64_t VirtualPointerIndex =
CGM.getVTables().getSecondaryVirtualPointerIndex(VTableClass, Base);
/// Load the VTT.
llvm::Value *VTT = CGF.LoadCXXVTT();
if (VirtualPointerIndex)
VTT = CGF.Builder.CreateConstInBoundsGEP1_64(VTT, VirtualPointerIndex);
// And load the address point from the VTT.
return CGF.Builder.CreateAlignedLoad(VTT, CGF.getPointerAlign());
}
llvm::Constant *ItaniumCXXABI::getVTableAddressPointForConstExpr(
BaseSubobject Base, const CXXRecordDecl *VTableClass) {
return getVTableAddressPoint(Base, VTableClass);
}
llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
CharUnits VPtrOffset) {
assert(VPtrOffset.isZero() && "Itanium ABI only supports zero vptr offsets");
llvm::GlobalVariable *&VTable = VTables[RD];
if (VTable)
return VTable;
// Queue up this vtable for possible deferred emission.
CGM.addDeferredVTable(RD);
SmallString<256> Name;
llvm::raw_svector_ostream Out(Name);
getMangleContext().mangleCXXVTable(RD, Out);
const VTableLayout &VTLayout =
CGM.getItaniumVTableContext().getVTableLayout(RD);
llvm::Type *VTableType = CGM.getVTables().getVTableType(VTLayout);
VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
Name, VTableType, llvm::GlobalValue::ExternalLinkage);
VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
if (RD->hasAttr<DLLImportAttr>())
VTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
else if (RD->hasAttr<DLLExportAttr>())
VTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
return VTable;
}
CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
GlobalDecl GD,
Address This,
llvm::Type *Ty,
SourceLocation Loc) {
GD = GD.getCanonicalDecl();
Ty = Ty->getPointerTo()->getPointerTo();
auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
llvm::Value *VTable = CGF.GetVTablePtr(This, Ty, MethodDecl->getParent());
uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
llvm::Value *VFunc;
if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
VFunc = CGF.EmitVTableTypeCheckedLoad(
MethodDecl->getParent(), VTable,
VTableIndex * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8);
} else {
CGF.EmitTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc);
llvm::Value *VFuncPtr =
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn");
auto *VFuncLoad =
CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
// Add !invariant.load md to virtual function load to indicate that
// function didn't change inside vtable.
// It's safe to add it without -fstrict-vtable-pointers, but it would not
// help in devirtualization because it will only matter if we will have 2
// the same virtual function loads from the same vtable load, which won't
// happen without enabled devirtualization with -fstrict-vtable-pointers.
if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
CGM.getCodeGenOpts().StrictVTablePointers)
VFuncLoad->setMetadata(
llvm::LLVMContext::MD_invariant_load,
llvm::MDNode::get(CGM.getLLVMContext(),
llvm::ArrayRef<llvm::Metadata *>()));
VFunc = VFuncLoad;
}
CGCallee Callee(MethodDecl, VFunc);
return Callee;
}
llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
Address This, const CXXMemberCallExpr *CE) {
assert(CE == nullptr || CE->arg_begin() == CE->arg_end());
assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
Dtor, getFromDtorType(DtorType));
llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
CGCallee Callee =
getVirtualFunctionPointer(CGF, GlobalDecl(Dtor, DtorType), This, Ty,
CE ? CE->getLocStart() : SourceLocation());
CGF.EmitCXXMemberOrOperatorCall(Dtor, Callee, ReturnValueSlot(),
This.getPointer(), /*ImplicitParam=*/nullptr,
QualType(), CE, nullptr);
return nullptr;
}
void ItaniumCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
CodeGenVTables &VTables = CGM.getVTables();
llvm::GlobalVariable *VTT = VTables.GetAddrOfVTT(RD);
VTables.EmitVTTDefinition(VTT, CGM.getVTableLinkage(RD), RD);
}
bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
// We don't emit available_externally vtables if we are in -fapple-kext mode
// because kext mode does not permit devirtualization.
if (CGM.getLangOpts().AppleKext)
return false;
// If we don't have any not emitted inline virtual function, and if vtable is
// not hidden, then we are safe to emit available_externally copy of vtable.
// FIXME we can still emit a copy of the vtable if we
// can emit definition of the inline functions.
return !hasAnyUnusedVirtualInlineFunction(RD) && !isVTableHidden(RD);
}
static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
Address InitialPtr,
int64_t NonVirtualAdjustment,
int64_t VirtualAdjustment,
bool IsReturnAdjustment) {
if (!NonVirtualAdjustment && !VirtualAdjustment)
return InitialPtr.getPointer();
Address V = CGF.Builder.CreateElementBitCast(InitialPtr, CGF.Int8Ty);
// In a base-to-derived cast, the non-virtual adjustment is applied first.
if (NonVirtualAdjustment && !IsReturnAdjustment) {
V = CGF.Builder.CreateConstInBoundsByteGEP(V,
CharUnits::fromQuantity(NonVirtualAdjustment));
}
// Perform the virtual adjustment if we have one.
llvm::Value *ResultPtr;
if (VirtualAdjustment) {
llvm::Type *PtrDiffTy =
CGF.ConvertType(CGF.getContext().getPointerDiffType());
Address VTablePtrPtr = CGF.Builder.CreateElementBitCast(V, CGF.Int8PtrTy);
llvm::Value *VTablePtr = CGF.Builder.CreateLoad(VTablePtrPtr);
llvm::Value *OffsetPtr =
CGF.Builder.CreateConstInBoundsGEP1_64(VTablePtr, VirtualAdjustment);
OffsetPtr = CGF.Builder.CreateBitCast(OffsetPtr, PtrDiffTy->getPointerTo());
// Load the adjustment offset from the vtable.
llvm::Value *Offset =
CGF.Builder.CreateAlignedLoad(OffsetPtr, CGF.getPointerAlign());
// Adjust our pointer.
ResultPtr = CGF.Builder.CreateInBoundsGEP(V.getPointer(), Offset);
} else {
ResultPtr = V.getPointer();
}
// In a derived-to-base conversion, the non-virtual adjustment is
// applied second.
if (NonVirtualAdjustment && IsReturnAdjustment) {
ResultPtr = CGF.Builder.CreateConstInBoundsGEP1_64(ResultPtr,
NonVirtualAdjustment);
}
// Cast back to the original type.
return CGF.Builder.CreateBitCast(ResultPtr, InitialPtr.getType());
}
llvm::Value *ItaniumCXXABI::performThisAdjustment(CodeGenFunction &CGF,
Address This,
const ThisAdjustment &TA) {
return performTypeAdjustment(CGF, This, TA.NonVirtual,
TA.Virtual.Itanium.VCallOffsetOffset,
/*IsReturnAdjustment=*/false);
}
llvm::Value *
ItaniumCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
const ReturnAdjustment &RA) {
return performTypeAdjustment(CGF, Ret, RA.NonVirtual,
RA.Virtual.Itanium.VBaseOffsetOffset,
/*IsReturnAdjustment=*/true);
}
void ARMCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF,
RValue RV, QualType ResultType) {
if (!isa<CXXDestructorDecl>(CGF.CurGD.getDecl()))
return ItaniumCXXABI::EmitReturnFromThunk(CGF, RV, ResultType);
// Destructor thunks in the ARM ABI have indeterminate results.
llvm::Type *T = CGF.ReturnValue.getElementType();
RValue Undef = RValue::get(llvm::UndefValue::get(T));
return ItaniumCXXABI::EmitReturnFromThunk(CGF, Undef, ResultType);
}
/************************** Array allocation cookies **************************/
CharUnits ItaniumCXXABI::getArrayCookieSizeImpl(QualType elementType) {
// The array cookie is a size_t; pad that up to the element alignment.
// The cookie is actually right-justified in that space.
return std::max(CharUnits::fromQuantity(CGM.SizeSizeInBytes),
CGM.getContext().getTypeAlignInChars(elementType));
}
Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
Address NewPtr,
llvm::Value *NumElements,
const CXXNewExpr *expr,
QualType ElementType) {
assert(requiresArrayCookie(expr));
unsigned AS = NewPtr.getAddressSpace();
ASTContext &Ctx = getContext();
CharUnits SizeSize = CGF.getSizeSize();
// The size of the cookie.
CharUnits CookieSize =
std::max(SizeSize, Ctx.getTypeAlignInChars(ElementType));
assert(CookieSize == getArrayCookieSizeImpl(ElementType));
// Compute an offset to the cookie.
Address CookiePtr = NewPtr;
CharUnits CookieOffset = CookieSize - SizeSize;
if (!CookieOffset.isZero())
CookiePtr = CGF.Builder.CreateConstInBoundsByteGEP(CookiePtr, CookieOffset);
// Write the number of elements into the appropriate slot.
Address NumElementsPtr =
CGF.Builder.CreateElementBitCast(CookiePtr, CGF.SizeTy);
llvm::Instruction *SI = CGF.Builder.CreateStore(NumElements, NumElementsPtr);
// Handle the array cookie specially in ASan.
if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) && AS == 0 &&
expr->getOperatorNew()->isReplaceableGlobalAllocationFunction()) {
// The store to the CookiePtr does not need to be instrumented.
CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI);
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, NumElementsPtr.getType(), false);
llvm::Constant *F =
CGM.CreateRuntimeFunction(FTy, "__asan_poison_cxx_array_cookie");
CGF.Builder.CreateCall(F, NumElementsPtr.getPointer());
}
// Finally, compute a pointer to the actual data buffer by skipping
// over the cookie completely.
return CGF.Builder.CreateConstInBoundsByteGEP(NewPtr, CookieSize);
}
llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
Address allocPtr,
CharUnits cookieSize) {
// The element size is right-justified in the cookie.
Address numElementsPtr = allocPtr;
CharUnits numElementsOffset = cookieSize - CGF.getSizeSize();
if (!numElementsOffset.isZero())
numElementsPtr =
CGF.Builder.CreateConstInBoundsByteGEP(numElementsPtr, numElementsOffset);
unsigned AS = allocPtr.getAddressSpace();
numElementsPtr = CGF.Builder.CreateElementBitCast(numElementsPtr, CGF.SizeTy);
if (!CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) || AS != 0)
return CGF.Builder.CreateLoad(numElementsPtr);
// In asan mode emit a function call instead of a regular load and let the
// run-time deal with it: if the shadow is properly poisoned return the
// cookie, otherwise return 0 to avoid an infinite loop calling DTORs.
// We can't simply ignore this load using nosanitize metadata because
// the metadata may be lost.
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGF.SizeTy, CGF.SizeTy->getPointerTo(0), false);
llvm::Constant *F =
CGM.CreateRuntimeFunction(FTy, "__asan_load_cxx_array_cookie");
return CGF.Builder.CreateCall(F, numElementsPtr.getPointer());
}
CharUnits ARMCXXABI::getArrayCookieSizeImpl(QualType elementType) {
// ARM says that the cookie is always:
// struct array_cookie {
// std::size_t element_size; // element_size != 0
// std::size_t element_count;
// };
// But the base ABI doesn't give anything an alignment greater than
// 8, so we can dismiss this as typical ABI-author blindness to
// actual language complexity and round up to the element alignment.
return std::max(CharUnits::fromQuantity(2 * CGM.SizeSizeInBytes),
CGM.getContext().getTypeAlignInChars(elementType));
}
Address ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
Address newPtr,
llvm::Value *numElements,
const CXXNewExpr *expr,
QualType elementType) {
assert(requiresArrayCookie(expr));
// The cookie is always at the start of the buffer.
Address cookie = newPtr;
// The first element is the element size.
cookie = CGF.Builder.CreateElementBitCast(cookie, CGF.SizeTy);
llvm::Value *elementSize = llvm::ConstantInt::get(CGF.SizeTy,
getContext().getTypeSizeInChars(elementType).getQuantity());
CGF.Builder.CreateStore(elementSize, cookie);
// The second element is the element count.
cookie = CGF.Builder.CreateConstInBoundsGEP(cookie, 1, CGF.getSizeSize());
CGF.Builder.CreateStore(numElements, cookie);
// Finally, compute a pointer to the actual data buffer by skipping
// over the cookie completely.
CharUnits cookieSize = ARMCXXABI::getArrayCookieSizeImpl(elementType);
return CGF.Builder.CreateConstInBoundsByteGEP(newPtr, cookieSize);
}
llvm::Value *ARMCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
Address allocPtr,
CharUnits cookieSize) {
// The number of elements is at offset sizeof(size_t) relative to
// the allocated pointer.
Address numElementsPtr
= CGF.Builder.CreateConstInBoundsByteGEP(allocPtr, CGF.getSizeSize());
numElementsPtr = CGF.Builder.CreateElementBitCast(numElementsPtr, CGF.SizeTy);
return CGF.Builder.CreateLoad(numElementsPtr);
}
/*********************** Static local initialization **************************/
static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM,
llvm::PointerType *GuardPtrTy) {
// int __cxa_guard_acquire(__guard *guard_object);
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.getTypes().ConvertType(CGM.getContext().IntTy),
GuardPtrTy, /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "__cxa_guard_acquire",
llvm::AttributeList::get(CGM.getLLVMContext(),
llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind));
}
static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM,
llvm::PointerType *GuardPtrTy) {
// void __cxa_guard_release(__guard *guard_object);
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "__cxa_guard_release",
llvm::AttributeList::get(CGM.getLLVMContext(),
llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind));
}
static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM,
llvm::PointerType *GuardPtrTy) {
// void __cxa_guard_abort(__guard *guard_object);
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "__cxa_guard_abort",
llvm::AttributeList::get(CGM.getLLVMContext(),
llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind));
}
namespace {
struct CallGuardAbort final : EHScopeStack::Cleanup {
llvm::GlobalVariable *Guard;
CallGuardAbort(llvm::GlobalVariable *Guard) : Guard(Guard) {}
void Emit(CodeGenFunction &CGF, Flags flags) override {
CGF.EmitNounwindRuntimeCall(getGuardAbortFn(CGF.CGM, Guard->getType()),
Guard);
}
};
}
/// The ARM code here follows the Itanium code closely enough that we
/// just special-case it at particular places.
void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
const VarDecl &D,
llvm::GlobalVariable *var,
bool shouldPerformInit) {
CGBuilderTy &Builder = CGF.Builder;
// Inline variables that weren't instantiated from variable templates have
// partially-ordered initialization within their translation unit.
bool NonTemplateInline =
D.isInline() &&
!isTemplateInstantiation(D.getTemplateSpecializationKind());
// We only need to use thread-safe statics for local non-TLS variables and
// inline variables; other global initialization is always single-threaded
// or (through lazy dynamic loading in multiple threads) unsequenced.
bool threadsafe = getContext().getLangOpts().ThreadsafeStatics &&
(D.isLocalVarDecl() || NonTemplateInline) &&
!D.getTLSKind();
// If we have a global variable with internal linkage and thread-safe statics
// are disabled, we can just let the guard variable be of type i8.
bool useInt8GuardVariable = !threadsafe && var->hasInternalLinkage();
llvm::IntegerType *guardTy;
CharUnits guardAlignment;
if (useInt8GuardVariable) {
guardTy = CGF.Int8Ty;
guardAlignment = CharUnits::One();
} else {
// Guard variables are 64 bits in the generic ABI and size width on ARM
// (i.e. 32-bit on AArch32, 64-bit on AArch64).
if (UseARMGuardVarABI) {
guardTy = CGF.SizeTy;
guardAlignment = CGF.getSizeAlign();
} else {
guardTy = CGF.Int64Ty;
guardAlignment = CharUnits::fromQuantity(
CGM.getDataLayout().getABITypeAlignment(guardTy));
}
}
llvm::PointerType *guardPtrTy = guardTy->getPointerTo();
// Create the guard variable if we don't already have it (as we
// might if we're double-emitting this function body).
llvm::GlobalVariable *guard = CGM.getStaticLocalDeclGuardAddress(&D);
if (!guard) {
// Mangle the name for the guard.
SmallString<256> guardName;
{
llvm::raw_svector_ostream out(guardName);
getMangleContext().mangleStaticGuardVariable(&D, out);
}
// Create the guard variable with a zero-initializer.
// Just absorb linkage and visibility from the guarded variable.
guard = new llvm::GlobalVariable(CGM.getModule(), guardTy,
false, var->getLinkage(),
llvm::ConstantInt::get(guardTy, 0),
guardName.str());
guard->setVisibility(var->getVisibility());
// If the variable is thread-local, so is its guard variable.
guard->setThreadLocalMode(var->getThreadLocalMode());
guard->setAlignment(guardAlignment.getQuantity());
// The ABI says: "It is suggested that it be emitted in the same COMDAT
// group as the associated data object." In practice, this doesn't work for
// non-ELF and non-Wasm object formats, so only do it for ELF and Wasm.
llvm::Comdat *C = var->getComdat();
if (!D.isLocalVarDecl() && C &&
(CGM.getTarget().getTriple().isOSBinFormatELF() ||
CGM.getTarget().getTriple().isOSBinFormatWasm())) {
guard->setComdat(C);
// An inline variable's guard function is run from the per-TU
// initialization function, not via a dedicated global ctor function, so
// we can't put it in a comdat.
if (!NonTemplateInline)
CGF.CurFn->setComdat(C);
} else if (CGM.supportsCOMDAT() && guard->isWeakForLinker()) {
guard->setComdat(CGM.getModule().getOrInsertComdat(guard->getName()));
}
CGM.setStaticLocalDeclGuardAddress(&D, guard);
}
Address guardAddr = Address(guard, guardAlignment);
// Test whether the variable has completed initialization.
//
// Itanium C++ ABI 3.3.2:
// The following is pseudo-code showing how these functions can be used:
// if (obj_guard.first_byte == 0) {
// if ( __cxa_guard_acquire (&obj_guard) ) {
// try {
// ... initialize the object ...;
// } catch (...) {
// __cxa_guard_abort (&obj_guard);
// throw;
// }
// ... queue object destructor with __cxa_atexit() ...;
// __cxa_guard_release (&obj_guard);
// }
// }
// Load the first byte of the guard variable.
llvm::LoadInst *LI =
Builder.CreateLoad(Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty));
// Itanium ABI:
// An implementation supporting thread-safety on multiprocessor
// systems must also guarantee that references to the initialized
// object do not occur before the load of the initialization flag.
//
// In LLVM, we do this by marking the load Acquire.
if (threadsafe)
LI->setAtomic(llvm::AtomicOrdering::Acquire);
// For ARM, we should only check the first bit, rather than the entire byte:
//
// ARM C++ ABI 3.2.3.1:
// To support the potential use of initialization guard variables
// as semaphores that are the target of ARM SWP and LDREX/STREX
// synchronizing instructions we define a static initialization
// guard variable to be a 4-byte aligned, 4-byte word with the
// following inline access protocol.
// #define INITIALIZED 1
// if ((obj_guard & INITIALIZED) != INITIALIZED) {
// if (__cxa_guard_acquire(&obj_guard))
// ...
// }
//
// and similarly for ARM64:
//
// ARM64 C++ ABI 3.2.2:
// This ABI instead only specifies the value bit 0 of the static guard
// variable; all other bits are platform defined. Bit 0 shall be 0 when the
// variable is not initialized and 1 when it is.
llvm::Value *V =
(UseARMGuardVarABI && !useInt8GuardVariable)
? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1))
: LI;
llvm::Value *isInitialized = Builder.CreateIsNull(V, "guard.uninitialized");
llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check");
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
// Check if the first byte of the guard variable is zero.
Builder.CreateCondBr(isInitialized, InitCheckBlock, EndBlock);
CGF.EmitBlock(InitCheckBlock);
// Variables used when coping with thread-safe statics and exceptions.
if (threadsafe) {
// Call __cxa_guard_acquire.
llvm::Value *V
= CGF.EmitNounwindRuntimeCall(getGuardAcquireFn(CGM, guardPtrTy), guard);
llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
Builder.CreateCondBr(Builder.CreateIsNotNull(V, "tobool"),
InitBlock, EndBlock);
// Call __cxa_guard_abort along the exceptional edge.
CGF.EHStack.pushCleanup<CallGuardAbort>(EHCleanup, guard);
CGF.EmitBlock(InitBlock);
}
// Emit the initializer and add a global destructor if appropriate.
CGF.EmitCXXGlobalVarDeclInit(D, var, shouldPerformInit);
if (threadsafe) {
// Pop the guard-abort cleanup if we pushed one.
CGF.PopCleanupBlock();
// Call __cxa_guard_release. This cannot throw.
CGF.EmitNounwindRuntimeCall(getGuardReleaseFn(CGM, guardPtrTy),
guardAddr.getPointer());
} else {
Builder.CreateStore(llvm::ConstantInt::get(guardTy, 1), guardAddr);
}
CGF.EmitBlock(EndBlock);
}
/// Register a global destructor using __cxa_atexit.
static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
llvm::Constant *dtor,
llvm::Constant *addr,
bool TLS) {
const char *Name = "__cxa_atexit";
if (TLS) {
const llvm::Triple &T = CGF.getTarget().getTriple();
Name = T.isOSDarwin() ? "_tlv_atexit" : "__cxa_thread_atexit";
}
// We're assuming that the destructor function is something we can
// reasonably call with the default CC. Go ahead and cast it to the
// right prototype.
llvm::Type *dtorTy =
llvm::FunctionType::get(CGF.VoidTy, CGF.Int8PtrTy, false)->getPointerTo();
// extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
llvm::Type *paramTys[] = { dtorTy, CGF.Int8PtrTy, CGF.Int8PtrTy };
llvm::FunctionType *atexitTy =
llvm::FunctionType::get(CGF.IntTy, paramTys, false);
// Fetch the actual function.
llvm::Constant *atexit = CGF.CGM.CreateRuntimeFunction(atexitTy, Name);
if (llvm::Function *fn = dyn_cast<llvm::Function>(atexit))
fn->setDoesNotThrow();
// Create a variable that binds the atexit to this shared object.
llvm::Constant *handle =
CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle");
auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts());
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
llvm::Value *args[] = {
llvm::ConstantExpr::getBitCast(dtor, dtorTy),
llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy),
handle
};
CGF.EmitNounwindRuntimeCall(atexit, args);
}
/// Register a global destructor as best as we know how.
void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF,
const VarDecl &D,
llvm::Constant *dtor,
llvm::Constant *addr) {
// Use __cxa_atexit if available.
if (CGM.getCodeGenOpts().CXAAtExit)
return emitGlobalDtorWithCXAAtExit(CGF, dtor, addr, D.getTLSKind());
if (D.getTLSKind())
CGM.ErrorUnsupported(&D, "non-trivial TLS destruction");
// In Apple kexts, we want to add a global destructor entry.
// FIXME: shouldn't this be guarded by some variable?
if (CGM.getLangOpts().AppleKext) {
// Generate a global destructor entry.
return CGM.AddCXXDtorEntry(dtor, addr);
}
CGF.registerGlobalDtorWithAtExit(D, dtor, addr);
}
static bool isThreadWrapperReplaceable(const VarDecl *VD,
CodeGen::CodeGenModule &CGM) {
assert(!VD->isStaticLocal() && "static local VarDecls don't need wrappers!");
// Darwin prefers to have references to thread local variables to go through
// the thread wrapper instead of directly referencing the backing variable.
return VD->getTLSKind() == VarDecl::TLS_Dynamic &&
CGM.getTarget().getTriple().isOSDarwin();
}
/// Get the appropriate linkage for the wrapper function. This is essentially
/// the weak form of the variable's linkage; every translation unit which needs
/// the wrapper emits a copy, and we want the linker to merge them.
static llvm::GlobalValue::LinkageTypes
getThreadLocalWrapperLinkage(const VarDecl *VD, CodeGen::CodeGenModule &CGM) {
llvm::GlobalValue::LinkageTypes VarLinkage =
CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false);
// For internal linkage variables, we don't need an external or weak wrapper.
if (llvm::GlobalValue::isLocalLinkage(VarLinkage))
return VarLinkage;
// If the thread wrapper is replaceable, give it appropriate linkage.
if (isThreadWrapperReplaceable(VD, CGM))
if (!llvm::GlobalVariable::isLinkOnceLinkage(VarLinkage) &&
!llvm::GlobalVariable::isWeakODRLinkage(VarLinkage))
return VarLinkage;
return llvm::GlobalValue::WeakODRLinkage;
}
llvm::Function *
ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
llvm::Value *Val) {
// Mangle the name for the thread_local wrapper function.
SmallString<256> WrapperName;
{
llvm::raw_svector_ostream Out(WrapperName);
getMangleContext().mangleItaniumThreadLocalWrapper(VD, Out);
}
// FIXME: If VD is a definition, we should regenerate the function attributes
// before returning.
if (llvm::Value *V = CGM.getModule().getNamedValue(WrapperName))
return cast<llvm::Function>(V);
QualType RetQT = VD->getType();
if (RetQT->isReferenceType())
RetQT = RetQT.getNonReferenceType();
const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
getContext().getPointerType(RetQT), FunctionArgList());
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Wrapper =
llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM),
WrapperName.str(), &CGM.getModule());
CGM.SetLLVMFunctionAttributes(nullptr, FI, Wrapper);
if (VD->hasDefinition())
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
// Always resolve references to the wrapper at link time.
if (!Wrapper->hasLocalLinkage() && !(isThreadWrapperReplaceable(VD, CGM) &&
!llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) &&
!llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage())))
Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility);
if (isThreadWrapperReplaceable(VD, CGM)) {
Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
Wrapper->addFnAttr(llvm::Attribute::NoUnwind);
}
return Wrapper;
}
void ItaniumCXXABI::EmitThreadLocalInitFuncs(
CodeGenModule &CGM, ArrayRef<const VarDecl *> CXXThreadLocals,
ArrayRef<llvm::Function *> CXXThreadLocalInits,
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) {
llvm::Function *InitFunc = nullptr;
// Separate initializers into those with ordered (or partially-ordered)
// initialization and those with unordered initialization.
llvm::SmallVector<llvm::Function *, 8> OrderedInits;
llvm::SmallDenseMap<const VarDecl *, llvm::Function *> UnorderedInits;
for (unsigned I = 0; I != CXXThreadLocalInits.size(); ++I) {
if (isTemplateInstantiation(
CXXThreadLocalInitVars[I]->getTemplateSpecializationKind()))
UnorderedInits[CXXThreadLocalInitVars[I]->getCanonicalDecl()] =
CXXThreadLocalInits[I];
else
OrderedInits.push_back(CXXThreadLocalInits[I]);
}
if (!OrderedInits.empty()) {
// Generate a guarded initialization function.
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
InitFunc = CGM.CreateGlobalInitOrDestructFunction(FTy, "__tls_init", FI,
SourceLocation(),
/*TLS=*/true);
llvm::GlobalVariable *Guard = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
llvm::GlobalVariable::InternalLinkage,
llvm::ConstantInt::get(CGM.Int8Ty, 0), "__tls_guard");
Guard->setThreadLocal(true);
CharUnits GuardAlign = CharUnits::One();
Guard->setAlignment(GuardAlign.getQuantity());
CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, OrderedInits,
Address(Guard, GuardAlign));
// On Darwin platforms, use CXX_FAST_TLS calling convention.
if (CGM.getTarget().getTriple().isOSDarwin()) {
InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
InitFunc->addFnAttr(llvm::Attribute::NoUnwind);
}
}
// Emit thread wrappers.
for (const VarDecl *VD : CXXThreadLocals) {
llvm::GlobalVariable *Var =
cast<llvm::GlobalVariable>(CGM.GetGlobalValue(CGM.getMangledName(VD)));
llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var);
// Some targets require that all access to thread local variables go through
// the thread wrapper. This means that we cannot attempt to create a thread
// wrapper or a thread helper.
if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) {
Wrapper->setLinkage(llvm::Function::ExternalLinkage);
continue;
}
// Mangle the name for the thread_local initialization function.
SmallString<256> InitFnName;
{
llvm::raw_svector_ostream Out(InitFnName);
getMangleContext().mangleItaniumThreadLocalInit(VD, Out);
}
// If we have a definition for the variable, emit the initialization
// function as an alias to the global Init function (if any). Otherwise,
// produce a declaration of the initialization function.
llvm::GlobalValue *Init = nullptr;
bool InitIsInitFunc = false;
if (VD->hasDefinition()) {
InitIsInitFunc = true;
llvm::Function *InitFuncToUse = InitFunc;
if (isTemplateInstantiation(VD->getTemplateSpecializationKind()))
InitFuncToUse = UnorderedInits.lookup(VD->getCanonicalDecl());
if (InitFuncToUse)
Init = llvm::GlobalAlias::create(Var->getLinkage(), InitFnName.str(),
InitFuncToUse);
} else {
// Emit a weak global function referring to the initialization function.
// This function will not exist if the TU defining the thread_local
// variable in question does not need any dynamic initialization for
// its thread_local variables.
llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, false);
Init = llvm::Function::Create(FnTy,
llvm::GlobalVariable::ExternalWeakLinkage,
InitFnName.str(), &CGM.getModule());
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init));
}
if (Init)
Init->setVisibility(Var->getVisibility());
llvm::LLVMContext &Context = CGM.getModule().getContext();
llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper);
CGBuilderTy Builder(CGM, Entry);
if (InitIsInitFunc) {
if (Init) {
llvm::CallInst *CallVal = Builder.CreateCall(Init);
if (isThreadWrapperReplaceable(VD, CGM))
CallVal->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
}
} else {
// Don't know whether we have an init function. Call it if it exists.
llvm::Value *Have = Builder.CreateIsNotNull(Init);
llvm::BasicBlock *InitBB = llvm::BasicBlock::Create(Context, "", Wrapper);
llvm::BasicBlock *ExitBB = llvm::BasicBlock::Create(Context, "", Wrapper);
Builder.CreateCondBr(Have, InitBB, ExitBB);
Builder.SetInsertPoint(InitBB);
Builder.CreateCall(Init);
Builder.CreateBr(ExitBB);
Builder.SetInsertPoint(ExitBB);
}
// For a reference, the result of the wrapper function is a pointer to
// the referenced object.
llvm::Value *Val = Var;
if (VD->getType()->isReferenceType()) {
CharUnits Align = CGM.getContext().getDeclAlign(VD);
Val = Builder.CreateAlignedLoad(Val, Align);
}
if (Val->getType() != Wrapper->getReturnType())
Val = Builder.CreatePointerBitCastOrAddrSpaceCast(
Val, Wrapper->getReturnType(), "");
Builder.CreateRet(Val);
}
}
LValue ItaniumCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
const VarDecl *VD,
QualType LValType) {
llvm::Value *Val = CGF.CGM.GetAddrOfGlobalVar(VD);
llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Val);
llvm::CallInst *CallVal = CGF.Builder.CreateCall(Wrapper);
CallVal->setCallingConv(Wrapper->getCallingConv());
LValue LV;
if (VD->getType()->isReferenceType())
LV = CGF.MakeNaturalAlignAddrLValue(CallVal, LValType);
else
LV = CGF.MakeAddrLValue(CallVal, LValType,
CGF.getContext().getDeclAlign(VD));
// FIXME: need setObjCGCLValueClass?
return LV;
}
/// Return whether the given global decl needs a VTT parameter, which it does
/// if it's a base constructor or destructor with virtual bases.
bool ItaniumCXXABI::NeedsVTTParameter(GlobalDecl GD) {
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
// We don't have any virtual bases, just return early.
if (!MD->getParent()->getNumVBases())
return false;
// Check if we have a base constructor.
if (isa<CXXConstructorDecl>(MD) && GD.getCtorType() == Ctor_Base)
return true;
// Check if we have a base destructor.
if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
return true;
return false;
}
namespace {
class ItaniumRTTIBuilder {
CodeGenModule &CGM; // Per-module state.
llvm::LLVMContext &VMContext;
const ItaniumCXXABI &CXXABI; // Per-module state.
/// Fields - The fields of the RTTI descriptor currently being built.
SmallVector<llvm::Constant *, 16> Fields;
/// GetAddrOfTypeName - Returns the mangled type name of the given type.
llvm::GlobalVariable *
GetAddrOfTypeName(QualType Ty, llvm::GlobalVariable::LinkageTypes Linkage);
/// GetAddrOfExternalRTTIDescriptor - Returns the constant for the RTTI
/// descriptor of the given type.
llvm::Constant *GetAddrOfExternalRTTIDescriptor(QualType Ty);
/// BuildVTablePointer - Build the vtable pointer for the given type.
void BuildVTablePointer(const Type *Ty);
/// BuildSIClassTypeInfo - Build an abi::__si_class_type_info, used for single
/// inheritance, according to the Itanium C++ ABI, 2.9.5p6b.
void BuildSIClassTypeInfo(const CXXRecordDecl *RD);
/// BuildVMIClassTypeInfo - Build an abi::__vmi_class_type_info, used for
/// classes with bases that do not satisfy the abi::__si_class_type_info
/// constraints, according ti the Itanium C++ ABI, 2.9.5p5c.
void BuildVMIClassTypeInfo(const CXXRecordDecl *RD);
/// BuildPointerTypeInfo - Build an abi::__pointer_type_info struct, used
/// for pointer types.
void BuildPointerTypeInfo(QualType PointeeTy);
/// BuildObjCObjectTypeInfo - Build the appropriate kind of
/// type_info for an object type.
void BuildObjCObjectTypeInfo(const ObjCObjectType *Ty);
/// BuildPointerToMemberTypeInfo - Build an abi::__pointer_to_member_type_info
/// struct, used for member pointer types.
void BuildPointerToMemberTypeInfo(const MemberPointerType *Ty);
public:
ItaniumRTTIBuilder(const ItaniumCXXABI &ABI)
: CGM(ABI.CGM), VMContext(CGM.getModule().getContext()), CXXABI(ABI) {}
// Pointer type info flags.
enum {
/// PTI_Const - Type has const qualifier.
PTI_Const = 0x1,
/// PTI_Volatile - Type has volatile qualifier.
PTI_Volatile = 0x2,
/// PTI_Restrict - Type has restrict qualifier.
PTI_Restrict = 0x4,
/// PTI_Incomplete - Type is incomplete.
PTI_Incomplete = 0x8,
/// PTI_ContainingClassIncomplete - Containing class is incomplete.
/// (in pointer to member).
PTI_ContainingClassIncomplete = 0x10,
/// PTI_TransactionSafe - Pointee is transaction_safe function (C++ TM TS).
//PTI_TransactionSafe = 0x20,
/// PTI_Noexcept - Pointee is noexcept function (C++1z).
PTI_Noexcept = 0x40,
};
// VMI type info flags.
enum {
/// VMI_NonDiamondRepeat - Class has non-diamond repeated inheritance.
VMI_NonDiamondRepeat = 0x1,
/// VMI_DiamondShaped - Class is diamond shaped.
VMI_DiamondShaped = 0x2
};
// Base class type info flags.
enum {
/// BCTI_Virtual - Base class is virtual.
BCTI_Virtual = 0x1,
/// BCTI_Public - Base class is public.
BCTI_Public = 0x2
};
/// BuildTypeInfo - Build the RTTI type info struct for the given type.
///
/// \param Force - true to force the creation of this RTTI value
/// \param DLLExport - true to mark the RTTI value as DLLExport
llvm::Constant *BuildTypeInfo(QualType Ty, bool Force = false,
bool DLLExport = false);
};
}
llvm::GlobalVariable *ItaniumRTTIBuilder::GetAddrOfTypeName(
QualType Ty, llvm::GlobalVariable::LinkageTypes Linkage) {
SmallString<256> Name;
llvm::raw_svector_ostream Out(Name);
CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(Ty, Out);
// We know that the mangled name of the type starts at index 4 of the
// mangled name of the typename, so we can just index into it in order to
// get the mangled name of the type.
llvm::Constant *Init = llvm::ConstantDataArray::getString(VMContext,
Name.substr(4));
llvm::GlobalVariable *GV =
CGM.CreateOrReplaceCXXRuntimeVariable(Name, Init->getType(), Linkage);
GV->setInitializer(Init);
return GV;
}
llvm::Constant *
ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
// Mangle the RTTI name.
SmallString<256> Name;
llvm::raw_svector_ostream Out(Name);
CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
// Look for an existing global.
llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name);
if (!GV) {
// Create a new global variable.
// Note for the future: If we would ever like to do deferred emission of
// RTTI, check if emitting vtables opportunistically need any adjustment.
GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
/*Constant=*/true,
llvm::GlobalValue::ExternalLinkage, nullptr,
Name);
if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
if (RD->hasAttr<DLLImportAttr>())
GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
}
}
return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
}
/// TypeInfoIsInStandardLibrary - Given a builtin type, returns whether the type
/// info for that type is defined in the standard library.
static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
// Itanium C++ ABI 2.9.2:
// Basic type information (e.g. for "int", "bool", etc.) will be kept in
// the run-time support library. Specifically, the run-time support
// library should contain type_info objects for the types X, X* and
// X const*, for every X in: void, std::nullptr_t, bool, wchar_t, char,
// unsigned char, signed char, short, unsigned short, int, unsigned int,
// long, unsigned long, long long, unsigned long long, float, double,
// long double, char16_t, char32_t, and the IEEE 754r decimal and
// half-precision floating point types.
//
// GCC also emits RTTI for __int128.
// FIXME: We do not emit RTTI information for decimal types here.
// Types added here must also be added to EmitFundamentalRTTIDescriptors.
switch (Ty->getKind()) {
case BuiltinType::Void:
case BuiltinType::NullPtr:
case BuiltinType::Bool:
case BuiltinType::WChar_S:
case BuiltinType::WChar_U:
case BuiltinType::Char_U:
case BuiltinType::Char_S:
case BuiltinType::UChar:
case BuiltinType::SChar:
case BuiltinType::Short:
case BuiltinType::UShort:
case BuiltinType::Int:
case BuiltinType::UInt:
case BuiltinType::Long:
case BuiltinType::ULong:
case BuiltinType::LongLong:
case BuiltinType::ULongLong:
case BuiltinType::Half:
case BuiltinType::Float:
case BuiltinType::Double:
case BuiltinType::LongDouble:
case BuiltinType::Float128:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::Int128:
case BuiltinType::UInt128:
return true;
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case BuiltinType::Id:
#include "clang/Basic/OpenCLImageTypes.def"
case BuiltinType::OCLSampler:
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
case BuiltinType::OCLQueue:
case BuiltinType::OCLReserveID:
return false;
case BuiltinType::Dependent:
#define BUILTIN_TYPE(Id, SingletonId)
#define PLACEHOLDER_TYPE(Id, SingletonId) \
case BuiltinType::Id:
#include "clang/AST/BuiltinTypes.def"
llvm_unreachable("asking for RRTI for a placeholder type!");
case BuiltinType::ObjCId:
case BuiltinType::ObjCClass:
case BuiltinType::ObjCSel:
llvm_unreachable("FIXME: Objective-C types are unsupported!");
}
llvm_unreachable("Invalid BuiltinType Kind!");
}
static bool TypeInfoIsInStandardLibrary(const PointerType *PointerTy) {
QualType PointeeTy = PointerTy->getPointeeType();
const BuiltinType *BuiltinTy = dyn_cast<BuiltinType>(PointeeTy);
if (!BuiltinTy)
return false;
// Check the qualifiers.
Qualifiers Quals = PointeeTy.getQualifiers();
Quals.removeConst();
if (!Quals.empty())
return false;
return TypeInfoIsInStandardLibrary(BuiltinTy);
}
/// IsStandardLibraryRTTIDescriptor - Returns whether the type
/// information for the given type exists in the standard library.
static bool IsStandardLibraryRTTIDescriptor(QualType Ty) {
// Type info for builtin types is defined in the standard library.
if (const BuiltinType *BuiltinTy = dyn_cast<BuiltinType>(Ty))
return TypeInfoIsInStandardLibrary(BuiltinTy);
// Type info for some pointer types to builtin types is defined in the
// standard library.
if (const PointerType *PointerTy = dyn_cast<PointerType>(Ty))
return TypeInfoIsInStandardLibrary(PointerTy);
return false;
}
/// ShouldUseExternalRTTIDescriptor - Returns whether the type information for
/// the given type exists somewhere else, and that we should not emit the type
/// information in this translation unit. Assumes that it is not a
/// standard-library type.
static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM,
QualType Ty) {
ASTContext &Context = CGM.getContext();
// If RTTI is disabled, assume it might be disabled in the
// translation unit that defines any potential key function, too.
if (!Context.getLangOpts().RTTI) return false;
if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
if (!RD->hasDefinition())
return false;
if (!RD->isDynamicClass())
return false;
// FIXME: this may need to be reconsidered if the key function
// changes.
// N.B. We must always emit the RTTI data ourselves if there exists a key
// function.
bool IsDLLImport = RD->hasAttr<DLLImportAttr>();
if (CGM.getVTables().isVTableExternal(RD))
return IsDLLImport && !CGM.getTriple().isWindowsItaniumEnvironment()
? false
: true;
if (IsDLLImport)
return true;
}
return false;
}
/// IsIncompleteClassType - Returns whether the given record type is incomplete.
static bool IsIncompleteClassType(const RecordType *RecordTy) {
return !RecordTy->getDecl()->isCompleteDefinition();
}
/// ContainsIncompleteClassType - Returns whether the given type contains an
/// incomplete class type. This is true if
///
/// * The given type is an incomplete class type.
/// * The given type is a pointer type whose pointee type contains an
/// incomplete class type.
/// * The given type is a member pointer type whose class is an incomplete
/// class type.
/// * The given type is a member pointer type whoise pointee type contains an
/// incomplete class type.
/// is an indirect or direct pointer to an incomplete class type.
static bool ContainsIncompleteClassType(QualType Ty) {
if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
if (IsIncompleteClassType(RecordTy))
return true;
}
if (const PointerType *PointerTy = dyn_cast<PointerType>(Ty))
return ContainsIncompleteClassType(PointerTy->getPointeeType());
if (const MemberPointerType *MemberPointerTy =
dyn_cast<MemberPointerType>(Ty)) {
// Check if the class type is incomplete.
const RecordType *ClassType = cast<RecordType>(MemberPointerTy->getClass());
if (IsIncompleteClassType(ClassType))
return true;
return ContainsIncompleteClassType(MemberPointerTy->getPointeeType());
}
return false;
}
// CanUseSingleInheritance - Return whether the given record decl has a "single,
// public, non-virtual base at offset zero (i.e. the derived class is dynamic
// iff the base is)", according to Itanium C++ ABI, 2.95p6b.
static bool CanUseSingleInheritance(const CXXRecordDecl *RD) {
// Check the number of bases.
if (RD->getNumBases() != 1)
return false;
// Get the base.
CXXRecordDecl::base_class_const_iterator Base = RD->bases_begin();
// Check that the base is not virtual.
if (Base->isVirtual())
return false;
// Check that the base is public.
if (Base->getAccessSpecifier() != AS_public)
return false;
// Check that the class is dynamic iff the base is.
const CXXRecordDecl *BaseDecl =
cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
if (!BaseDecl->isEmpty() &&
BaseDecl->isDynamicClass() != RD->isDynamicClass())
return false;
return true;
}
void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
// abi::__class_type_info.
static const char * const ClassTypeInfo =
"_ZTVN10__cxxabiv117__class_type_infoE";
// abi::__si_class_type_info.
static const char * const SIClassTypeInfo =
"_ZTVN10__cxxabiv120__si_class_type_infoE";
// abi::__vmi_class_type_info.
static const char * const VMIClassTypeInfo =
"_ZTVN10__cxxabiv121__vmi_class_type_infoE";
const char *VTableName = nullptr;
switch (Ty->getTypeClass()) {
#define TYPE(Class, Base)
#define ABSTRACT_TYPE(Class, Base)
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
#define DEPENDENT_TYPE(Class, Base) case Type::Class:
#include "clang/AST/TypeNodes.def"
llvm_unreachable("Non-canonical and dependent types shouldn't get here");
case Type::LValueReference:
case Type::RValueReference:
llvm_unreachable("References shouldn't get here");
case Type::Auto:
case Type::DeducedTemplateSpecialization:
llvm_unreachable("Undeduced type shouldn't get here");
case Type::Pipe:
llvm_unreachable("Pipe types shouldn't get here");
case Type::Builtin:
// GCC treats vector and complex types as fundamental types.
case Type::Vector:
case Type::ExtVector:
case Type::Complex:
case Type::Atomic:
// FIXME: GCC treats block pointers as fundamental types?!
case Type::BlockPointer:
// abi::__fundamental_type_info.
VTableName = "_ZTVN10__cxxabiv123__fundamental_type_infoE";
break;
case Type::ConstantArray:
case Type::IncompleteArray:
case Type::VariableArray:
// abi::__array_type_info.
VTableName = "_ZTVN10__cxxabiv117__array_type_infoE";
break;
case Type::FunctionNoProto:
case Type::FunctionProto:
// abi::__function_type_info.
VTableName = "_ZTVN10__cxxabiv120__function_type_infoE";
break;
case Type::Enum:
// abi::__enum_type_info.
VTableName = "_ZTVN10__cxxabiv116__enum_type_infoE";
break;
case Type::Record: {
const CXXRecordDecl *RD =
cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl());
if (!RD->hasDefinition() || !RD->getNumBases()) {
VTableName = ClassTypeInfo;
} else if (CanUseSingleInheritance(RD)) {
VTableName = SIClassTypeInfo;
} else {
VTableName = VMIClassTypeInfo;
}
break;
}
case Type::ObjCObject:
// Ignore protocol qualifiers.
Ty = cast<ObjCObjectType>(Ty)->getBaseType().getTypePtr();
// Handle id and Class.
if (isa<BuiltinType>(Ty)) {
VTableName = ClassTypeInfo;
break;
}
assert(isa<ObjCInterfaceType>(Ty));
// Fall through.
case Type::ObjCInterface:
if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) {
VTableName = SIClassTypeInfo;
} else {
VTableName = ClassTypeInfo;
}
break;
case Type::ObjCObjectPointer:
case Type::Pointer:
// abi::__pointer_type_info.
VTableName = "_ZTVN10__cxxabiv119__pointer_type_infoE";
break;
case Type::MemberPointer:
// abi::__pointer_to_member_type_info.
VTableName = "_ZTVN10__cxxabiv129__pointer_to_member_type_infoE";
break;
}
llvm::Constant *VTable =
CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy);
llvm::Type *PtrDiffTy =
CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
// The vtable address point is 2.
llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2);
VTable =
llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable, Two);
VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy);
Fields.push_back(VTable);
}
/// \brief Return the linkage that the type info and type info name constants
/// should have for the given type.
static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
QualType Ty) {
// Itanium C++ ABI 2.9.5p7:
// In addition, it and all of the intermediate abi::__pointer_type_info
// structs in the chain down to the abi::__class_type_info for the
// incomplete class type must be prevented from resolving to the
// corresponding type_info structs for the complete class type, possibly
// by making them local static objects. Finally, a dummy class RTTI is
// generated for the incomplete type that will not resolve to the final
// complete class RTTI (because the latter need not exist), possibly by
// making it a local static object.
if (ContainsIncompleteClassType(Ty))
return llvm::GlobalValue::InternalLinkage;
switch (Ty->getLinkage()) {
case NoLinkage:
case InternalLinkage:
case UniqueExternalLinkage:
return llvm::GlobalValue::InternalLinkage;
case VisibleNoLinkage:
case ModuleInternalLinkage:
case ModuleLinkage:
case ExternalLinkage:
// RTTI is not enabled, which means that this type info struct is going
// to be used for exception handling. Give it linkonce_odr linkage.
if (!CGM.getLangOpts().RTTI)
return llvm::GlobalValue::LinkOnceODRLinkage;
if (const RecordType *Record = dyn_cast<RecordType>(Ty)) {
const CXXRecordDecl *RD = cast<CXXRecordDecl>(Record->getDecl());
if (RD->hasAttr<WeakAttr>())
return llvm::GlobalValue::WeakODRLinkage;
if (CGM.getTriple().isWindowsItaniumEnvironment())
if (RD->hasAttr<DLLImportAttr>() &&
ShouldUseExternalRTTIDescriptor(CGM, Ty))
return llvm::GlobalValue::ExternalLinkage;
if (RD->isDynamicClass()) {
llvm::GlobalValue::LinkageTypes LT = CGM.getVTableLinkage(RD);
// MinGW won't export the RTTI information when there is a key function.
// Make sure we emit our own copy instead of attempting to dllimport it.
if (RD->hasAttr<DLLImportAttr>() &&
llvm::GlobalValue::isAvailableExternallyLinkage(LT))
LT = llvm::GlobalValue::LinkOnceODRLinkage;
return LT;
}
}
return llvm::GlobalValue::LinkOnceODRLinkage;
}
llvm_unreachable("Invalid linkage!");
}
llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force,
bool DLLExport) {
// We want to operate on the canonical type.
Ty = Ty.getCanonicalType();
// Check if we've already emitted an RTTI descriptor for this type.
SmallString<256> Name;
llvm::raw_svector_ostream Out(Name);
CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
llvm::GlobalVariable *OldGV = CGM.getModule().getNamedGlobal(Name);
if (OldGV && !OldGV->isDeclaration()) {
assert(!OldGV->hasAvailableExternallyLinkage() &&
"available_externally typeinfos not yet implemented");
return llvm::ConstantExpr::getBitCast(OldGV, CGM.Int8PtrTy);
}
// Check if there is already an external RTTI descriptor for this type.
bool IsStdLib = IsStandardLibraryRTTIDescriptor(Ty);
if (!Force && (IsStdLib || ShouldUseExternalRTTIDescriptor(CGM, Ty)))
return GetAddrOfExternalRTTIDescriptor(Ty);
// Emit the standard library with external linkage.
llvm::GlobalVariable::LinkageTypes Linkage;
if (IsStdLib)
Linkage = llvm::GlobalValue::ExternalLinkage;
else
Linkage = getTypeInfoLinkage(CGM, Ty);
// Add the vtable pointer.
BuildVTablePointer(cast<Type>(Ty));
// And the name.
llvm::GlobalVariable *TypeName = GetAddrOfTypeName(Ty, Linkage);
llvm::Constant *TypeNameField;
// If we're supposed to demote the visibility, be sure to set a flag
// to use a string comparison for type_info comparisons.
ItaniumCXXABI::RTTIUniquenessKind RTTIUniqueness =
CXXABI.classifyRTTIUniqueness(Ty, Linkage);
if (RTTIUniqueness != ItaniumCXXABI::RUK_Unique) {
// The flag is the sign bit, which on ARM64 is defined to be clear
// for global pointers. This is very ARM64-specific.
TypeNameField = llvm::ConstantExpr::getPtrToInt(TypeName, CGM.Int64Ty);
llvm::Constant *flag =
llvm::ConstantInt::get(CGM.Int64Ty, ((uint64_t)1) << 63);
TypeNameField = llvm::ConstantExpr::getAdd(TypeNameField, flag);
TypeNameField =
llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.Int8PtrTy);
} else {
TypeNameField = llvm::ConstantExpr::getBitCast(TypeName, CGM.Int8PtrTy);
}
Fields.push_back(TypeNameField);
switch (Ty->getTypeClass()) {
#define TYPE(Class, Base)
#define ABSTRACT_TYPE(Class, Base)
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
#define DEPENDENT_TYPE(Class, Base) case Type::Class:
#include "clang/AST/TypeNodes.def"
llvm_unreachable("Non-canonical and dependent types shouldn't get here");
// GCC treats vector types as fundamental types.
case Type::Builtin:
case Type::Vector:
case Type::ExtVector:
case Type::Complex:
case Type::BlockPointer:
// Itanium C++ ABI 2.9.5p4:
// abi::__fundamental_type_info adds no data members to std::type_info.
break;
case Type::LValueReference:
case Type::RValueReference:
llvm_unreachable("References shouldn't get here");
case Type::Auto:
case Type::DeducedTemplateSpecialization:
llvm_unreachable("Undeduced type shouldn't get here");
case Type::Pipe:
llvm_unreachable("Pipe type shouldn't get here");
case Type::ConstantArray:
case Type::IncompleteArray:
case Type::VariableArray:
// Itanium C++ ABI 2.9.5p5:
// abi::__array_type_info adds no data members to std::type_info.
break;
case Type::FunctionNoProto:
case Type::FunctionProto:
// Itanium C++ ABI 2.9.5p5:
// abi::__function_type_info adds no data members to std::type_info.
break;
case Type::Enum:
// Itanium C++ ABI 2.9.5p5:
// abi::__enum_type_info adds no data members to std::type_info.
break;
case Type::Record: {
const CXXRecordDecl *RD =
cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl());
if (!RD->hasDefinition() || !RD->getNumBases()) {
// We don't need to emit any fields.
break;
}
if (CanUseSingleInheritance(RD))
BuildSIClassTypeInfo(RD);
else
BuildVMIClassTypeInfo(RD);
break;
}
case Type::ObjCObject:
case Type::ObjCInterface:
BuildObjCObjectTypeInfo(cast<ObjCObjectType>(Ty));
break;
case Type::ObjCObjectPointer:
BuildPointerTypeInfo(cast<ObjCObjectPointerType>(Ty)->getPointeeType());
break;
case Type::Pointer:
BuildPointerTypeInfo(cast<PointerType>(Ty)->getPointeeType());
break;
case Type::MemberPointer:
BuildPointerToMemberTypeInfo(cast<MemberPointerType>(Ty));
break;
case Type::Atomic:
// No fields, at least for the moment.
break;
}
llvm::Constant *Init = llvm::ConstantStruct::getAnon(Fields);
llvm::Module &M = CGM.getModule();
llvm::GlobalVariable *GV =
new llvm::GlobalVariable(M, Init->getType(),
/*Constant=*/true, Linkage, Init, Name);
// If there's already an old global variable, replace it with the new one.
if (OldGV) {
GV->takeName(OldGV);
llvm::Constant *NewPtr =
llvm::ConstantExpr::getBitCast(GV, OldGV->getType());
OldGV->replaceAllUsesWith(NewPtr);
OldGV->eraseFromParent();
}
if (CGM.supportsCOMDAT() && GV->isWeakForLinker())
GV->setComdat(M.getOrInsertComdat(GV->getName()));
// The Itanium ABI specifies that type_info objects must be globally
// unique, with one exception: if the type is an incomplete class
// type or a (possibly indirect) pointer to one. That exception
// affects the general case of comparing type_info objects produced
// by the typeid operator, which is why the comparison operators on
// std::type_info generally use the type_info name pointers instead
// of the object addresses. However, the language's built-in uses
// of RTTI generally require class types to be complete, even when
// manipulating pointers to those class types. This allows the
// implementation of dynamic_cast to rely on address equality tests,
// which is much faster.
// All of this is to say that it's important that both the type_info
// object and the type_info name be uniqued when weakly emitted.
// Give the type_info object and name the formal visibility of the
// type itself.
llvm::GlobalValue::VisibilityTypes llvmVisibility;
if (llvm::GlobalValue::isLocalLinkage(Linkage))
// If the linkage is local, only default visibility makes sense.
llvmVisibility = llvm::GlobalValue::DefaultVisibility;
else if (RTTIUniqueness == ItaniumCXXABI::RUK_NonUniqueHidden)
llvmVisibility = llvm::GlobalValue::HiddenVisibility;
else
llvmVisibility = CodeGenModule::GetLLVMVisibility(Ty->getVisibility());
TypeName->setVisibility(llvmVisibility);
GV->setVisibility(llvmVisibility);
if (CGM.getTriple().isWindowsItaniumEnvironment()) {
auto RD = Ty->getAsCXXRecordDecl();
if (DLLExport || (RD && RD->hasAttr<DLLExportAttr>())) {
TypeName->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
} else if (RD && RD->hasAttr<DLLImportAttr>() &&
ShouldUseExternalRTTIDescriptor(CGM, Ty)) {
TypeName->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
// Because the typename and the typeinfo are DLL import, convert them to
// declarations rather than definitions. The initializers still need to
// be constructed to calculate the type for the declarations.
TypeName->setInitializer(nullptr);
GV->setInitializer(nullptr);
}
}
return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
}
/// BuildObjCObjectTypeInfo - Build the appropriate kind of type_info
/// for the given Objective-C object type.
void ItaniumRTTIBuilder::BuildObjCObjectTypeInfo(const ObjCObjectType *OT) {
// Drop qualifiers.
const Type *T = OT->getBaseType().getTypePtr();
assert(isa<BuiltinType>(T) || isa<ObjCInterfaceType>(T));
// The builtin types are abi::__class_type_infos and don't require
// extra fields.
if (isa<BuiltinType>(T)) return;
ObjCInterfaceDecl *Class = cast<ObjCInterfaceType>(T)->getDecl();
ObjCInterfaceDecl *Super = Class->getSuperClass();
// Root classes are also __class_type_info.
if (!Super) return;
QualType SuperTy = CGM.getContext().getObjCInterfaceType(Super);
// Everything else is single inheritance.
llvm::Constant *BaseTypeInfo =
ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(SuperTy);
Fields.push_back(BaseTypeInfo);
}
/// BuildSIClassTypeInfo - Build an abi::__si_class_type_info, used for single
/// inheritance, according to the Itanium C++ ABI, 2.95p6b.
void ItaniumRTTIBuilder::BuildSIClassTypeInfo(const CXXRecordDecl *RD) {
// Itanium C++ ABI 2.9.5p6b:
// It adds to abi::__class_type_info a single member pointing to the
// type_info structure for the base type,
llvm::Constant *BaseTypeInfo =
ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(RD->bases_begin()->getType());
Fields.push_back(BaseTypeInfo);
}
namespace {
/// SeenBases - Contains virtual and non-virtual bases seen when traversing
/// a class hierarchy.
struct SeenBases {
llvm::SmallPtrSet<const CXXRecordDecl *, 16> NonVirtualBases;
llvm::SmallPtrSet<const CXXRecordDecl *, 16> VirtualBases;
};
}
/// ComputeVMIClassTypeInfoFlags - Compute the value of the flags member in
/// abi::__vmi_class_type_info.
///
static unsigned ComputeVMIClassTypeInfoFlags(const CXXBaseSpecifier *Base,
SeenBases &Bases) {
unsigned Flags = 0;
const CXXRecordDecl *BaseDecl =
cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
if (Base->isVirtual()) {
// Mark the virtual base as seen.
if (!Bases.VirtualBases.insert(BaseDecl).second) {
// If this virtual base has been seen before, then the class is diamond
// shaped.
Flags |= ItaniumRTTIBuilder::VMI_DiamondShaped;
} else {
if (Bases.NonVirtualBases.count(BaseDecl))
Flags |= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
}
} else {
// Mark the non-virtual base as seen.
if (!Bases.NonVirtualBases.insert(BaseDecl).second) {
// If this non-virtual base has been seen before, then the class has non-
// diamond shaped repeated inheritance.
Flags |= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
} else {
if (Bases.VirtualBases.count(BaseDecl))
Flags |= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
}
}
// Walk all bases.
for (const auto &I : BaseDecl->bases())
Flags |= ComputeVMIClassTypeInfoFlags(&I, Bases);
return Flags;
}
static unsigned ComputeVMIClassTypeInfoFlags(const CXXRecordDecl *RD) {
unsigned Flags = 0;
SeenBases Bases;
// Walk all bases.
for (const auto &I : RD->bases())
Flags |= ComputeVMIClassTypeInfoFlags(&I, Bases);
return Flags;
}
/// BuildVMIClassTypeInfo - Build an abi::__vmi_class_type_info, used for
/// classes with bases that do not satisfy the abi::__si_class_type_info
/// constraints, according ti the Itanium C++ ABI, 2.9.5p5c.
void ItaniumRTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) {
llvm::Type *UnsignedIntLTy =
CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
// Itanium C++ ABI 2.9.5p6c:
// __flags is a word with flags describing details about the class
// structure, which may be referenced by using the __flags_masks
// enumeration. These flags refer to both direct and indirect bases.
unsigned Flags = ComputeVMIClassTypeInfoFlags(RD);
Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));
// Itanium C++ ABI 2.9.5p6c:
// __base_count is a word with the number of direct proper base class
// descriptions that follow.
Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, RD->getNumBases()));
if (!RD->getNumBases())
return;
// Now add the base class descriptions.
// Itanium C++ ABI 2.9.5p6c:
// __base_info[] is an array of base class descriptions -- one for every
// direct proper base. Each description is of the type:
//
// struct abi::__base_class_type_info {
// public:
// const __class_type_info *__base_type;
// long __offset_flags;
//
// enum __offset_flags_masks {
// __virtual_mask = 0x1,
// __public_mask = 0x2,
// __offset_shift = 8
// };
// };
// If we're in mingw and 'long' isn't wide enough for a pointer, use 'long
// long' instead of 'long' for __offset_flags. libstdc++abi uses long long on
// LLP64 platforms.
// FIXME: Consider updating libc++abi to match, and extend this logic to all
// LLP64 platforms.
QualType OffsetFlagsTy = CGM.getContext().LongTy;
const TargetInfo &TI = CGM.getContext().getTargetInfo();
if (TI.getTriple().isOSCygMing() && TI.getPointerWidth(0) > TI.getLongWidth())
OffsetFlagsTy = CGM.getContext().LongLongTy;
llvm::Type *OffsetFlagsLTy =
CGM.getTypes().ConvertType(OffsetFlagsTy);
for (const auto &Base : RD->bases()) {
// The __base_type member points to the RTTI for the base type.
Fields.push_back(ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(Base.getType()));
const CXXRecordDecl *BaseDecl =
cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl());
int64_t OffsetFlags = 0;
// All but the lower 8 bits of __offset_flags are a signed offset.
// For a non-virtual base, this is the offset in the object of the base
// subobject. For a virtual base, this is the offset in the virtual table of
// the virtual base offset for the virtual base referenced (negative).
CharUnits Offset;
if (Base.isVirtual())
Offset =
CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(RD, BaseDecl);
else {
const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
Offset = Layout.getBaseClassOffset(BaseDecl);
};
OffsetFlags = uint64_t(Offset.getQuantity()) << 8;
// The low-order byte of __offset_flags contains flags, as given by the
// masks from the enumeration __offset_flags_masks.
if (Base.isVirtual())
OffsetFlags |= BCTI_Virtual;
if (Base.getAccessSpecifier() == AS_public)
OffsetFlags |= BCTI_Public;
Fields.push_back(llvm::ConstantInt::get(OffsetFlagsLTy, OffsetFlags));
}
}
/// Compute the flags for a __pbase_type_info, and remove the corresponding
/// pieces from \p Type.
static unsigned extractPBaseFlags(ASTContext &Ctx, QualType &Type) {
unsigned Flags = 0;
if (Type.isConstQualified())
Flags |= ItaniumRTTIBuilder::PTI_Const;
if (Type.isVolatileQualified())
Flags |= ItaniumRTTIBuilder::PTI_Volatile;
if (Type.isRestrictQualified())
Flags |= ItaniumRTTIBuilder::PTI_Restrict;
Type = Type.getUnqualifiedType();
// Itanium C++ ABI 2.9.5p7:
// When the abi::__pbase_type_info is for a direct or indirect pointer to an
// incomplete class type, the incomplete target type flag is set.
if (ContainsIncompleteClassType(Type))
Flags |= ItaniumRTTIBuilder::PTI_Incomplete;
if (auto *Proto = Type->getAs<FunctionProtoType>()) {
if (Proto->isNothrow(Ctx)) {
Flags |= ItaniumRTTIBuilder::PTI_Noexcept;
Type = Ctx.getFunctionType(
Proto->getReturnType(), Proto->getParamTypes(),
Proto->getExtProtoInfo().withExceptionSpec(EST_None));
}
}
return Flags;
}
/// BuildPointerTypeInfo - Build an abi::__pointer_type_info struct,
/// used for pointer types.
void ItaniumRTTIBuilder::BuildPointerTypeInfo(QualType PointeeTy) {
// Itanium C++ ABI 2.9.5p7:
// __flags is a flag word describing the cv-qualification and other
// attributes of the type pointed to
unsigned Flags = extractPBaseFlags(CGM.getContext(), PointeeTy);
llvm::Type *UnsignedIntLTy =
CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));
// Itanium C++ ABI 2.9.5p7:
// __pointee is a pointer to the std::type_info derivation for the
// unqualified type being pointed to.
llvm::Constant *PointeeTypeInfo =
ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(PointeeTy);
Fields.push_back(PointeeTypeInfo);
}
/// BuildPointerToMemberTypeInfo - Build an abi::__pointer_to_member_type_info
/// struct, used for member pointer types.
void
ItaniumRTTIBuilder::BuildPointerToMemberTypeInfo(const MemberPointerType *Ty) {
QualType PointeeTy = Ty->getPointeeType();
// Itanium C++ ABI 2.9.5p7:
// __flags is a flag word describing the cv-qualification and other
// attributes of the type pointed to.
unsigned Flags = extractPBaseFlags(CGM.getContext(), PointeeTy);
const RecordType *ClassType = cast<RecordType>(Ty->getClass());
if (IsIncompleteClassType(ClassType))
Flags |= PTI_ContainingClassIncomplete;
llvm::Type *UnsignedIntLTy =
CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));
// Itanium C++ ABI 2.9.5p7:
// __pointee is a pointer to the std::type_info derivation for the
// unqualified type being pointed to.
llvm::Constant *PointeeTypeInfo =
ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(PointeeTy);
Fields.push_back(PointeeTypeInfo);
// Itanium C++ ABI 2.9.5p9:
// __context is a pointer to an abi::__class_type_info corresponding to the
// class type containing the member pointed to
// (e.g., the "A" in "int A::*").
Fields.push_back(
ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(QualType(ClassType, 0)));
}
llvm::Constant *ItaniumCXXABI::getAddrOfRTTIDescriptor(QualType Ty) {
return ItaniumRTTIBuilder(*this).BuildTypeInfo(Ty);
}
void ItaniumCXXABI::EmitFundamentalRTTIDescriptor(QualType Type,
bool DLLExport) {
QualType PointerType = getContext().getPointerType(Type);
QualType PointerTypeConst = getContext().getPointerType(Type.withConst());
ItaniumRTTIBuilder(*this).BuildTypeInfo(Type, /*Force=*/true, DLLExport);
ItaniumRTTIBuilder(*this).BuildTypeInfo(PointerType, /*Force=*/true,
DLLExport);
ItaniumRTTIBuilder(*this).BuildTypeInfo(PointerTypeConst, /*Force=*/true,
DLLExport);
}
void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(bool DLLExport) {
// Types added here must also be added to TypeInfoIsInStandardLibrary.
QualType FundamentalTypes[] = {
getContext().VoidTy, getContext().NullPtrTy,
getContext().BoolTy, getContext().WCharTy,
getContext().CharTy, getContext().UnsignedCharTy,
getContext().SignedCharTy, getContext().ShortTy,
getContext().UnsignedShortTy, getContext().IntTy,
getContext().UnsignedIntTy, getContext().LongTy,
getContext().UnsignedLongTy, getContext().LongLongTy,
getContext().UnsignedLongLongTy, getContext().Int128Ty,
getContext().UnsignedInt128Ty, getContext().HalfTy,
getContext().FloatTy, getContext().DoubleTy,
getContext().LongDoubleTy, getContext().Float128Ty,
getContext().Char16Ty, getContext().Char32Ty
};
for (const QualType &FundamentalType : FundamentalTypes)
EmitFundamentalRTTIDescriptor(FundamentalType, DLLExport);
}
/// What sort of uniqueness rules should we use for the RTTI for the
/// given type?
ItaniumCXXABI::RTTIUniquenessKind ItaniumCXXABI::classifyRTTIUniqueness(
QualType CanTy, llvm::GlobalValue::LinkageTypes Linkage) const {
if (shouldRTTIBeUnique())
return RUK_Unique;
// It's only necessary for linkonce_odr or weak_odr linkage.
if (Linkage != llvm::GlobalValue::LinkOnceODRLinkage &&
Linkage != llvm::GlobalValue::WeakODRLinkage)
return RUK_Unique;
// It's only necessary with default visibility.
if (CanTy->getVisibility() != DefaultVisibility)
return RUK_Unique;
// If we're not required to publish this symbol, hide it.
if (Linkage == llvm::GlobalValue::LinkOnceODRLinkage)
return RUK_NonUniqueHidden;
// If we're required to publish this symbol, as we might be under an
// explicit instantiation, leave it with default visibility but
// enable string-comparisons.
assert(Linkage == llvm::GlobalValue::WeakODRLinkage);
return RUK_NonUniqueVisible;
}
// Find out how to codegen the complete destructor and constructor
namespace {
enum class StructorCodegen { Emit, RAUW, Alias, COMDAT };
}
static StructorCodegen getCodegenToUse(CodeGenModule &CGM,
const CXXMethodDecl *MD) {
if (!CGM.getCodeGenOpts().CXXCtorDtorAliases)
return StructorCodegen::Emit;
// The complete and base structors are not equivalent if there are any virtual
// bases, so emit separate functions.
if (MD->getParent()->getNumVBases())
return StructorCodegen::Emit;
GlobalDecl AliasDecl;
if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD)) {
AliasDecl = GlobalDecl(DD, Dtor_Complete);
} else {
const auto *CD = cast<CXXConstructorDecl>(MD);
AliasDecl = GlobalDecl(CD, Ctor_Complete);
}
llvm::GlobalValue::LinkageTypes Linkage = CGM.getFunctionLinkage(AliasDecl);
if (llvm::GlobalValue::isDiscardableIfUnused(Linkage))
return StructorCodegen::RAUW;
// FIXME: Should we allow available_externally aliases?
if (!llvm::GlobalAlias::isValidLinkage(Linkage))
return StructorCodegen::RAUW;
if (llvm::GlobalValue::isWeakForLinker(Linkage)) {
// Only ELF and wasm support COMDATs with arbitrary names (C5/D5).
if (CGM.getTarget().getTriple().isOSBinFormatELF() ||
CGM.getTarget().getTriple().isOSBinFormatWasm())
return StructorCodegen::COMDAT;
return StructorCodegen::Emit;
}
return StructorCodegen::Alias;
}
static void emitConstructorDestructorAlias(CodeGenModule &CGM,
GlobalDecl AliasDecl,
GlobalDecl TargetDecl) {
llvm::GlobalValue::LinkageTypes Linkage = CGM.getFunctionLinkage(AliasDecl);
StringRef MangledName = CGM.getMangledName(AliasDecl);
llvm::GlobalValue *Entry = CGM.GetGlobalValue(MangledName);
if (Entry && !Entry->isDeclaration())
return;
auto *Aliasee = cast<llvm::GlobalValue>(CGM.GetAddrOfGlobal(TargetDecl));
// Create the alias with no name.
auto *Alias = llvm::GlobalAlias::create(Linkage, "", Aliasee);
// Switch any previous uses to the alias.
if (Entry) {
assert(Entry->getType() == Aliasee->getType() &&
"declaration exists with different type");
Alias->takeName(Entry);
Entry->replaceAllUsesWith(Alias);
Entry->eraseFromParent();
} else {
Alias->setName(MangledName);
}
// Finally, set up the alias with its proper name and attributes.
CGM.setAliasAttributes(cast<NamedDecl>(AliasDecl.getDecl()), Alias);
}
void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
StructorType Type) {
auto *CD = dyn_cast<CXXConstructorDecl>(MD);
const CXXDestructorDecl *DD = CD ? nullptr : cast<CXXDestructorDecl>(MD);
StructorCodegen CGType = getCodegenToUse(CGM, MD);
if (Type == StructorType::Complete) {
GlobalDecl CompleteDecl;
GlobalDecl BaseDecl;
if (CD) {
CompleteDecl = GlobalDecl(CD, Ctor_Complete);
BaseDecl = GlobalDecl(CD, Ctor_Base);
} else {
CompleteDecl = GlobalDecl(DD, Dtor_Complete);
BaseDecl = GlobalDecl(DD, Dtor_Base);
}
if (CGType == StructorCodegen::Alias || CGType == StructorCodegen::COMDAT) {
emitConstructorDestructorAlias(CGM, CompleteDecl, BaseDecl);
return;
}
if (CGType == StructorCodegen::RAUW) {
StringRef MangledName = CGM.getMangledName(CompleteDecl);
auto *Aliasee = CGM.GetAddrOfGlobal(BaseDecl);
CGM.addReplacement(MangledName, Aliasee);
return;
}
}
// The base destructor is equivalent to the base destructor of its
// base class if there is exactly one non-virtual base class with a
// non-trivial destructor, there are no fields with a non-trivial
// destructor, and the body of the destructor is trivial.
if (DD && Type == StructorType::Base && CGType != StructorCodegen::COMDAT &&
!CGM.TryEmitBaseDestructorAsAlias(DD))
return;
llvm::Function *Fn = CGM.codegenCXXStructor(MD, Type);
if (CGType == StructorCodegen::COMDAT) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
if (DD)
getMangleContext().mangleCXXDtorComdat(DD, Out);
else
getMangleContext().mangleCXXCtorComdat(CD, Out);
llvm::Comdat *C = CGM.getModule().getOrInsertComdat(Out.str());
Fn->setComdat(C);
} else {
CGM.maybeSetTrivialComdat(*MD, *Fn);
}
}
static llvm::Constant *getBeginCatchFn(CodeGenModule &CGM) {
// void *__cxa_begin_catch(void*);
llvm::FunctionType *FTy = llvm::FunctionType::get(
CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
return CGM.CreateRuntimeFunction(FTy, "__cxa_begin_catch");
}
static llvm::Constant *getEndCatchFn(CodeGenModule &CGM) {
// void __cxa_end_catch();
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
return CGM.CreateRuntimeFunction(FTy, "__cxa_end_catch");
}
static llvm::Constant *getGetExceptionPtrFn(CodeGenModule &CGM) {
// void *__cxa_get_exception_ptr(void*);
llvm::FunctionType *FTy = llvm::FunctionType::get(
CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
return CGM.CreateRuntimeFunction(FTy, "__cxa_get_exception_ptr");
}
namespace {
/// A cleanup to call __cxa_end_catch. In many cases, the caught
/// exception type lets us state definitively that the thrown exception
/// type does not have a destructor. In particular:
/// - Catch-alls tell us nothing, so we have to conservatively
/// assume that the thrown exception might have a destructor.
/// - Catches by reference behave according to their base types.
/// - Catches of non-record types will only trigger for exceptions
/// of non-record types, which never have destructors.
/// - Catches of record types can trigger for arbitrary subclasses
/// of the caught type, so we have to assume the actual thrown
/// exception type might have a throwing destructor, even if the
/// caught type's destructor is trivial or nothrow.
struct CallEndCatch final : EHScopeStack::Cleanup {
CallEndCatch(bool MightThrow) : MightThrow(MightThrow) {}
bool MightThrow;
void Emit(CodeGenFunction &CGF, Flags flags) override {
if (!MightThrow) {
CGF.EmitNounwindRuntimeCall(getEndCatchFn(CGF.CGM));
return;
}
CGF.EmitRuntimeCallOrInvoke(getEndCatchFn(CGF.CGM));
}
};
}
/// Emits a call to __cxa_begin_catch and enters a cleanup to call
/// __cxa_end_catch.
///
/// \param EndMightThrow - true if __cxa_end_catch might throw
static llvm::Value *CallBeginCatch(CodeGenFunction &CGF,
llvm::Value *Exn,
bool EndMightThrow) {
llvm::CallInst *call =
CGF.EmitNounwindRuntimeCall(getBeginCatchFn(CGF.CGM), Exn);
CGF.EHStack.pushCleanup<CallEndCatch>(NormalAndEHCleanup, EndMightThrow);
return call;
}
/// A "special initializer" callback for initializing a catch
/// parameter during catch initialization.
static void InitCatchParam(CodeGenFunction &CGF,
const VarDecl &CatchParam,
Address ParamAddr,
SourceLocation Loc) {
// Load the exception from where the landing pad saved it.
llvm::Value *Exn = CGF.getExceptionFromSlot();
CanQualType CatchType =
CGF.CGM.getContext().getCanonicalType(CatchParam.getType());
llvm::Type *LLVMCatchTy = CGF.ConvertTypeForMem(CatchType);
// If we're catching by reference, we can just cast the object
// pointer to the appropriate pointer.
if (isa<ReferenceType>(CatchType)) {
QualType CaughtType = cast<ReferenceType>(CatchType)->getPointeeType();
bool EndCatchMightThrow = CaughtType->isRecordType();
// __cxa_begin_catch returns the adjusted object pointer.
llvm::Value *AdjustedExn = CallBeginCatch(CGF, Exn, EndCatchMightThrow);
// We have no way to tell the personality function that we're
// catching by reference, so if we're catching a pointer,
// __cxa_begin_catch will actually return that pointer by value.
if (const PointerType *PT = dyn_cast<PointerType>(CaughtType)) {
QualType PointeeType = PT->getPointeeType();
// When catching by reference, generally we should just ignore
// this by-value pointer and use the exception object instead.
if (!PointeeType->isRecordType()) {
// Exn points to the struct _Unwind_Exception header, which
// we have to skip past in order to reach the exception data.
unsigned HeaderSize =
CGF.CGM.getTargetCodeGenInfo().getSizeOfUnwindException();
AdjustedExn = CGF.Builder.CreateConstGEP1_32(Exn, HeaderSize);
// However, if we're catching a pointer-to-record type that won't
// work, because the personality function might have adjusted
// the pointer. There's actually no way for us to fully satisfy
// the language/ABI contract here: we can't use Exn because it
// might have the wrong adjustment, but we can't use the by-value
// pointer because it's off by a level of abstraction.
//
// The current solution is to dump the adjusted pointer into an
// alloca, which breaks language semantics (because changing the
// pointer doesn't change the exception) but at least works.
// The better solution would be to filter out non-exact matches
// and rethrow them, but this is tricky because the rethrow
// really needs to be catchable by other sites at this landing
// pad. The best solution is to fix the personality function.
} else {
// Pull the pointer for the reference type off.
llvm::Type *PtrTy =
cast<llvm::PointerType>(LLVMCatchTy)->getElementType();
// Create the temporary and write the adjusted pointer into it.
Address ExnPtrTmp =
CGF.CreateTempAlloca(PtrTy, CGF.getPointerAlign(), "exn.byref.tmp");
llvm::Value *Casted = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);
CGF.Builder.CreateStore(Casted, ExnPtrTmp);
// Bind the reference to the temporary.
AdjustedExn = ExnPtrTmp.getPointer();
}
}
llvm::Value *ExnCast =
CGF.Builder.CreateBitCast(AdjustedExn, LLVMCatchTy, "exn.byref");
CGF.Builder.CreateStore(ExnCast, ParamAddr);
return;
}
// Scalars and complexes.
TypeEvaluationKind TEK = CGF.getEvaluationKind(CatchType);
if (TEK != TEK_Aggregate) {
llvm::Value *AdjustedExn = CallBeginCatch(CGF, Exn, false);
// If the catch type is a pointer type, __cxa_begin_catch returns
// the pointer by value.
if (CatchType->hasPointerRepresentation()) {
llvm::Value *CastExn =
CGF.Builder.CreateBitCast(AdjustedExn, LLVMCatchTy, "exn.casted");
switch (CatchType.getQualifiers().getObjCLifetime()) {
case Qualifiers::OCL_Strong:
CastExn = CGF.EmitARCRetainNonBlock(CastExn);
// fallthrough
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
case Qualifiers::OCL_Autoreleasing:
CGF.Builder.CreateStore(CastExn, ParamAddr);
return;
case Qualifiers::OCL_Weak:
CGF.EmitARCInitWeak(ParamAddr, CastExn);
return;
}
llvm_unreachable("bad ownership qualifier!");
}
// Otherwise, it returns a pointer into the exception object.
llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok
llvm::Value *Cast = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);
LValue srcLV = CGF.MakeNaturalAlignAddrLValue(Cast, CatchType);
LValue destLV = CGF.MakeAddrLValue(ParamAddr, CatchType);
switch (TEK) {
case TEK_Complex:
CGF.EmitStoreOfComplex(CGF.EmitLoadOfComplex(srcLV, Loc), destLV,
/*init*/ true);
return;
case TEK_Scalar: {
llvm::Value *ExnLoad = CGF.EmitLoadOfScalar(srcLV, Loc);
CGF.EmitStoreOfScalar(ExnLoad, destLV, /*init*/ true);
return;
}
case TEK_Aggregate:
llvm_unreachable("evaluation kind filtered out!");
}
llvm_unreachable("bad evaluation kind");
}
assert(isa<RecordType>(CatchType) && "unexpected catch type!");
auto catchRD = CatchType->getAsCXXRecordDecl();
CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD);
llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok
// Check for a copy expression. If we don't have a copy expression,
// that means a trivial copy is okay.
const Expr *copyExpr = CatchParam.getInit();
if (!copyExpr) {
llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
caughtExnAlignment);
CGF.EmitAggregateCopy(ParamAddr, adjustedExn, CatchType);
return;
}
// We have to call __cxa_get_exception_ptr to get the adjusted
// pointer before copying.
llvm::CallInst *rawAdjustedExn =
CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);
// Cast that to the appropriate type.
Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
caughtExnAlignment);
// The copy expression is defined in terms of an OpaqueValueExpr.
// Find it and map it to the adjusted expression.
CodeGenFunction::OpaqueValueMapping
opaque(CGF, OpaqueValueExpr::findInCopyConstruct(copyExpr),
CGF.MakeAddrLValue(adjustedExn, CatchParam.getType()));
// Call the copy ctor in a terminate scope.
CGF.EHStack.pushTerminate();
// Perform the copy construction.
CGF.EmitAggExpr(copyExpr,
AggValueSlot::forAddr(ParamAddr, Qualifiers(),
AggValueSlot::IsNotDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
AggValueSlot::IsNotAliased));
// Leave the terminate scope.
CGF.EHStack.popTerminate();
// Undo the opaque value mapping.
opaque.pop();
// Finally we can call __cxa_begin_catch.
CallBeginCatch(CGF, Exn, true);
}
/// Begins a catch statement by initializing the catch variable and
/// calling __cxa_begin_catch.
void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF,
const CXXCatchStmt *S) {
// We have to be very careful with the ordering of cleanups here:
// C++ [except.throw]p4:
// The destruction [of the exception temporary] occurs
// immediately after the destruction of the object declared in
// the exception-declaration in the handler.
//
// So the precise ordering is:
// 1. Construct catch variable.
// 2. __cxa_begin_catch
// 3. Enter __cxa_end_catch cleanup
// 4. Enter dtor cleanup
//
// We do this by using a slightly abnormal initialization process.
// Delegation sequence:
// - ExitCXXTryStmt opens a RunCleanupsScope
// - EmitAutoVarAlloca creates the variable and debug info
// - InitCatchParam initializes the variable from the exception
// - CallBeginCatch calls __cxa_begin_catch
// - CallBeginCatch enters the __cxa_end_catch cleanup
// - EmitAutoVarCleanups enters the variable destructor cleanup
// - EmitCXXTryStmt emits the code for the catch body
// - EmitCXXTryStmt close the RunCleanupsScope
VarDecl *CatchParam = S->getExceptionDecl();
if (!CatchParam) {
llvm::Value *Exn = CGF.getExceptionFromSlot();
CallBeginCatch(CGF, Exn, true);
return;
}
// Emit the local.
CodeGenFunction::AutoVarEmission var = CGF.EmitAutoVarAlloca(*CatchParam);
InitCatchParam(CGF, *CatchParam, var.getObjectAddress(CGF), S->getLocStart());
CGF.EmitAutoVarCleanups(var);
}
/// Get or define the following function:
/// void @__clang_call_terminate(i8* %exn) nounwind noreturn
/// This code is used only in C++.
static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) {
llvm::FunctionType *fnTy =
llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
llvm::Constant *fnRef = CGM.CreateRuntimeFunction(
fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true);
llvm::Function *fn = dyn_cast<llvm::Function>(fnRef);
if (fn && fn->empty()) {
fn->setDoesNotThrow();
fn->setDoesNotReturn();
// What we really want is to massively penalize inlining without
// forbidding it completely. The difference between that and
// 'noinline' is negligible.
fn->addFnAttr(llvm::Attribute::NoInline);
// Allow this function to be shared across translation units, but
// we don't want it to turn into an exported symbol.
fn->setLinkage(llvm::Function::LinkOnceODRLinkage);
fn->setVisibility(llvm::Function::HiddenVisibility);
if (CGM.supportsCOMDAT())
fn->setComdat(CGM.getModule().getOrInsertComdat(fn->getName()));
// Set up the function.
llvm::BasicBlock *entry =
llvm::BasicBlock::Create(CGM.getLLVMContext(), "", fn);
CGBuilderTy builder(CGM, entry);
// Pull the exception pointer out of the parameter list.
llvm::Value *exn = &*fn->arg_begin();
// Call __cxa_begin_catch(exn).
llvm::CallInst *catchCall = builder.CreateCall(getBeginCatchFn(CGM), exn);
catchCall->setDoesNotThrow();
catchCall->setCallingConv(CGM.getRuntimeCC());
// Call std::terminate().
llvm::CallInst *termCall = builder.CreateCall(CGM.getTerminateFn());
termCall->setDoesNotThrow();
termCall->setDoesNotReturn();
termCall->setCallingConv(CGM.getRuntimeCC());
// std::terminate cannot return.
builder.CreateUnreachable();
}
return fnRef;
}
llvm::CallInst *
ItaniumCXXABI::emitTerminateForUnexpectedException(CodeGenFunction &CGF,
llvm::Value *Exn) {
// In C++, we want to call __cxa_begin_catch() before terminating.
if (Exn) {
assert(CGF.CGM.getLangOpts().CPlusPlus);
return CGF.EmitNounwindRuntimeCall(getClangCallTerminateFn(CGF.CGM), Exn);
}
return CGF.EmitNounwindRuntimeCall(CGF.CGM.getTerminateFn());
}
Index: head/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp (revision 322855)
@@ -1,4245 +1,4243 @@
//===--- MicrosoftCXXABI.cpp - Emit LLVM Code from ASTs for a Module ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This provides C++ code generation targeting the Microsoft Visual C++ ABI.
// The class in this file generates structures that follow the Microsoft
// Visual C++ ABI, which is actually not very well documented at all outside
// of Microsoft.
//
//===----------------------------------------------------------------------===//
#include "CGCXXABI.h"
#include "CGCleanup.h"
#include "CGVTables.h"
#include "CodeGenModule.h"
#include "CodeGenTypes.h"
#include "TargetInfo.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/VTableBuilder.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Intrinsics.h"
using namespace clang;
using namespace CodeGen;
namespace {
/// Holds all the vbtable globals for a given class.
struct VBTableGlobals {
const VPtrInfoVector *VBTables;
SmallVector<llvm::GlobalVariable *, 2> Globals;
};
class MicrosoftCXXABI : public CGCXXABI {
public:
MicrosoftCXXABI(CodeGenModule &CGM)
: CGCXXABI(CGM), BaseClassDescriptorType(nullptr),
ClassHierarchyDescriptorType(nullptr),
CompleteObjectLocatorType(nullptr), CatchableTypeType(nullptr),
ThrowInfoType(nullptr) {}
bool HasThisReturn(GlobalDecl GD) const override;
bool hasMostDerivedReturn(GlobalDecl GD) const override;
bool classifyReturnType(CGFunctionInfo &FI) const override;
RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override;
bool isSRetParameterAfterThis() const override { return true; }
bool isThisCompleteObject(GlobalDecl GD) const override {
// The Microsoft ABI doesn't use separate complete-object vs.
// base-object variants of constructors, but it does of destructors.
if (isa<CXXDestructorDecl>(GD.getDecl())) {
switch (GD.getDtorType()) {
case Dtor_Complete:
case Dtor_Deleting:
return true;
case Dtor_Base:
return false;
case Dtor_Comdat: llvm_unreachable("emitting dtor comdat as function?");
}
llvm_unreachable("bad dtor kind");
}
// No other kinds.
return false;
}
size_t getSrcArgforCopyCtor(const CXXConstructorDecl *CD,
FunctionArgList &Args) const override {
assert(Args.size() >= 2 &&
"expected the arglist to have at least two args!");
// The 'most_derived' parameter goes second if the ctor is variadic and
// has v-bases.
if (CD->getParent()->getNumVBases() > 0 &&
CD->getType()->castAs<FunctionProtoType>()->isVariadic())
return 2;
return 1;
}
std::vector<CharUnits> getVBPtrOffsets(const CXXRecordDecl *RD) override {
std::vector<CharUnits> VBPtrOffsets;
const ASTContext &Context = getContext();
const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
for (const std::unique_ptr<VPtrInfo> &VBT : *VBGlobals.VBTables) {
const ASTRecordLayout &SubobjectLayout =
Context.getASTRecordLayout(VBT->IntroducingObject);
CharUnits Offs = VBT->NonVirtualOffset;
Offs += SubobjectLayout.getVBPtrOffset();
if (VBT->getVBaseWithVPtr())
Offs += Layout.getVBaseClassOffset(VBT->getVBaseWithVPtr());
VBPtrOffsets.push_back(Offs);
}
llvm::array_pod_sort(VBPtrOffsets.begin(), VBPtrOffsets.end());
return VBPtrOffsets;
}
StringRef GetPureVirtualCallName() override { return "_purecall"; }
StringRef GetDeletedVirtualCallName() override { return "_purecall"; }
void emitVirtualObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE,
Address Ptr, QualType ElementType,
const CXXDestructorDecl *Dtor) override;
void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) override;
void emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *C) override;
llvm::GlobalVariable *getMSCompleteObjectLocator(const CXXRecordDecl *RD,
const VPtrInfo &Info);
llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
CatchTypeInfo
getAddrOfCXXCatchHandlerType(QualType Ty, QualType CatchHandlerType) override;
/// MSVC needs an extra flag to indicate a catchall.
CatchTypeInfo getCatchAllTypeInfo() override {
return CatchTypeInfo{nullptr, 0x40};
}
bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
void EmitBadTypeidCall(CodeGenFunction &CGF) override;
llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy,
Address ThisPtr,
llvm::Type *StdTypeInfoPtrTy) override;
bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
QualType SrcRecordTy) override;
llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, Address Value,
QualType SrcRecordTy, QualType DestTy,
QualType DestRecordTy,
llvm::BasicBlock *CastEnd) override;
llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
QualType SrcRecordTy,
QualType DestTy) override;
bool EmitBadCastCall(CodeGenFunction &CGF) override;
bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override {
return false;
}
llvm::Value *
GetVirtualBaseClassOffset(CodeGenFunction &CGF, Address This,
const CXXRecordDecl *ClassDecl,
const CXXRecordDecl *BaseClassDecl) override;
llvm::BasicBlock *
EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
const CXXRecordDecl *RD) override;
llvm::BasicBlock *
EmitDtorCompleteObjectHandler(CodeGenFunction &CGF);
void initializeHiddenVirtualInheritanceMembers(CodeGenFunction &CGF,
const CXXRecordDecl *RD) override;
void EmitCXXConstructors(const CXXConstructorDecl *D) override;
// Background on MSVC destructors
// ==============================
//
// Both Itanium and MSVC ABIs have destructor variants. The variant names
// roughly correspond in the following way:
// Itanium Microsoft
// Base -> no name, just ~Class
// Complete -> vbase destructor
// Deleting -> scalar deleting destructor
// vector deleting destructor
//
// The base and complete destructors are the same as in Itanium, although the
// complete destructor does not accept a VTT parameter when there are virtual
// bases. A separate mechanism involving vtordisps is used to ensure that
// virtual methods of destroyed subobjects are not called.
//
// The deleting destructors accept an i32 bitfield as a second parameter. Bit
// 1 indicates if the memory should be deleted. Bit 2 indicates if the this
// pointer points to an array. The scalar deleting destructor assumes that
// bit 2 is zero, and therefore does not contain a loop.
//
// For virtual destructors, only one entry is reserved in the vftable, and it
// always points to the vector deleting destructor. The vector deleting
// destructor is the most general, so it can be used to destroy objects in
// place, delete single heap objects, or delete arrays.
//
// A TU defining a non-inline destructor is only guaranteed to emit a base
// destructor, and all of the other variants are emitted on an as-needed basis
// in COMDATs. Because a non-base destructor can be emitted in a TU that
// lacks a definition for the destructor, non-base destructors must always
// delegate to or alias the base destructor.
AddedStructorArgs
buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
SmallVectorImpl<CanQualType> &ArgTys) override;
/// Non-base dtors should be emitted as delegating thunks in this ABI.
bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
CXXDtorType DT) const override {
return DT != Dtor_Base;
}
void EmitCXXDestructors(const CXXDestructorDecl *D) override;
const CXXRecordDecl *
getThisArgumentTypeForMethod(const CXXMethodDecl *MD) override {
MD = MD->getCanonicalDecl();
if (MD->isVirtual() && !isa<CXXDestructorDecl>(MD)) {
MicrosoftVTableContext::MethodVFTableLocation ML =
CGM.getMicrosoftVTableContext().getMethodVFTableLocation(MD);
// The vbases might be ordered differently in the final overrider object
// and the complete object, so the "this" argument may sometimes point to
// memory that has no particular type (e.g. past the complete object).
// In this case, we just use a generic pointer type.
// FIXME: might want to have a more precise type in the non-virtual
// multiple inheritance case.
if (ML.VBase || !ML.VFPtrOffset.isZero())
return nullptr;
}
return MD->getParent();
}
Address
adjustThisArgumentForVirtualFunctionCall(CodeGenFunction &CGF, GlobalDecl GD,
Address This,
bool VirtualCall) override;
void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
FunctionArgList &Params) override;
llvm::Value *adjustThisParameterInVirtualFunctionPrologue(
CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) override;
void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
AddedStructorArgs
addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
CXXCtorType Type, bool ForVirtualBase,
bool Delegating, CallArgList &Args) override;
void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
bool Delegating, Address This) override;
void emitVTableTypeMetadata(const VPtrInfo &Info, const CXXRecordDecl *RD,
llvm::GlobalVariable *VTable);
void emitVTableDefinitions(CodeGenVTables &CGVT,
const CXXRecordDecl *RD) override;
bool isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF,
CodeGenFunction::VPtr Vptr) override;
/// Don't initialize vptrs if dynamic class
/// is marked with with the 'novtable' attribute.
bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) override {
return !VTableClass->hasAttr<MSNoVTableAttr>();
}
llvm::Constant *
getVTableAddressPoint(BaseSubobject Base,
const CXXRecordDecl *VTableClass) override;
llvm::Value *getVTableAddressPointInStructor(
CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
BaseSubobject Base, const CXXRecordDecl *NearestVBase) override;
llvm::Constant *
getVTableAddressPointForConstExpr(BaseSubobject Base,
const CXXRecordDecl *VTableClass) override;
llvm::GlobalVariable *getAddrOfVTable(const CXXRecordDecl *RD,
CharUnits VPtrOffset) override;
CGCallee getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
Address This, llvm::Type *Ty,
SourceLocation Loc) override;
llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
const CXXDestructorDecl *Dtor,
CXXDtorType DtorType,
Address This,
const CXXMemberCallExpr *CE) override;
void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD,
CallArgList &CallArgs) override {
assert(GD.getDtorType() == Dtor_Deleting &&
"Only deleting destructor thunks are available in this ABI");
CallArgs.add(RValue::get(getStructorImplicitParamValue(CGF)),
getContext().IntTy);
}
void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
llvm::GlobalVariable *
getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
llvm::GlobalVariable::LinkageTypes Linkage);
llvm::GlobalVariable *
getAddrOfVirtualDisplacementMap(const CXXRecordDecl *SrcRD,
const CXXRecordDecl *DstRD) {
SmallString<256> OutName;
llvm::raw_svector_ostream Out(OutName);
getMangleContext().mangleCXXVirtualDisplacementMap(SrcRD, DstRD, Out);
StringRef MangledName = OutName.str();
if (auto *VDispMap = CGM.getModule().getNamedGlobal(MangledName))
return VDispMap;
MicrosoftVTableContext &VTContext = CGM.getMicrosoftVTableContext();
unsigned NumEntries = 1 + SrcRD->getNumVBases();
SmallVector<llvm::Constant *, 4> Map(NumEntries,
llvm::UndefValue::get(CGM.IntTy));
Map[0] = llvm::ConstantInt::get(CGM.IntTy, 0);
bool AnyDifferent = false;
for (const auto &I : SrcRD->vbases()) {
const CXXRecordDecl *VBase = I.getType()->getAsCXXRecordDecl();
if (!DstRD->isVirtuallyDerivedFrom(VBase))
continue;
unsigned SrcVBIndex = VTContext.getVBTableIndex(SrcRD, VBase);
unsigned DstVBIndex = VTContext.getVBTableIndex(DstRD, VBase);
Map[SrcVBIndex] = llvm::ConstantInt::get(CGM.IntTy, DstVBIndex * 4);
AnyDifferent |= SrcVBIndex != DstVBIndex;
}
// This map would be useless, don't use it.
if (!AnyDifferent)
return nullptr;
llvm::ArrayType *VDispMapTy = llvm::ArrayType::get(CGM.IntTy, Map.size());
llvm::Constant *Init = llvm::ConstantArray::get(VDispMapTy, Map);
llvm::GlobalValue::LinkageTypes Linkage =
SrcRD->isExternallyVisible() && DstRD->isExternallyVisible()
? llvm::GlobalValue::LinkOnceODRLinkage
: llvm::GlobalValue::InternalLinkage;
auto *VDispMap = new llvm::GlobalVariable(
CGM.getModule(), VDispMapTy, /*Constant=*/true, Linkage,
/*Initializer=*/Init, MangledName);
return VDispMap;
}
void emitVBTableDefinition(const VPtrInfo &VBT, const CXXRecordDecl *RD,
llvm::GlobalVariable *GV) const;
void setThunkLinkage(llvm::Function *Thunk, bool ForVTable,
GlobalDecl GD, bool ReturnAdjustment) override {
// Never dllimport/dllexport thunks.
Thunk->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
GVALinkage Linkage =
getContext().GetGVALinkageForFunction(cast<FunctionDecl>(GD.getDecl()));
if (Linkage == GVA_Internal)
Thunk->setLinkage(llvm::GlobalValue::InternalLinkage);
else if (ReturnAdjustment)
Thunk->setLinkage(llvm::GlobalValue::WeakODRLinkage);
else
Thunk->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
}
llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This,
const ThisAdjustment &TA) override;
llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
const ReturnAdjustment &RA) override;
void EmitThreadLocalInitFuncs(
CodeGenModule &CGM, ArrayRef<const VarDecl *> CXXThreadLocals,
ArrayRef<llvm::Function *> CXXThreadLocalInits,
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
bool usesThreadWrapperFunction() const override { return false; }
LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
QualType LValType) override;
void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
llvm::GlobalVariable *DeclPtr,
bool PerformInit) override;
void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
llvm::Constant *Dtor, llvm::Constant *Addr) override;
// ==== Notes on array cookies =========
//
// MSVC seems to only use cookies when the class has a destructor; a
// two-argument usual array deallocation function isn't sufficient.
//
// For example, this code prints "100" and "1":
// struct A {
// char x;
// void *operator new[](size_t sz) {
// printf("%u\n", sz);
// return malloc(sz);
// }
// void operator delete[](void *p, size_t sz) {
// printf("%u\n", sz);
// free(p);
// }
// };
// int main() {
// A *p = new A[100];
// delete[] p;
// }
// Whereas it prints "104" and "104" if you give A a destructor.
bool requiresArrayCookie(const CXXDeleteExpr *expr,
QualType elementType) override;
bool requiresArrayCookie(const CXXNewExpr *expr) override;
CharUnits getArrayCookieSizeImpl(QualType type) override;
Address InitializeArrayCookie(CodeGenFunction &CGF,
Address NewPtr,
llvm::Value *NumElements,
const CXXNewExpr *expr,
QualType ElementType) override;
llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF,
Address allocPtr,
CharUnits cookieSize) override;
friend struct MSRTTIBuilder;
bool isImageRelative() const {
return CGM.getTarget().getPointerWidth(/*AddressSpace=*/0) == 64;
}
// 5 routines for constructing the llvm types for MS RTTI structs.
llvm::StructType *getTypeDescriptorType(StringRef TypeInfoString) {
llvm::SmallString<32> TDTypeName("rtti.TypeDescriptor");
TDTypeName += llvm::utostr(TypeInfoString.size());
llvm::StructType *&TypeDescriptorType =
TypeDescriptorTypeMap[TypeInfoString.size()];
if (TypeDescriptorType)
return TypeDescriptorType;
llvm::Type *FieldTypes[] = {
CGM.Int8PtrPtrTy,
CGM.Int8PtrTy,
llvm::ArrayType::get(CGM.Int8Ty, TypeInfoString.size() + 1)};
TypeDescriptorType =
llvm::StructType::create(CGM.getLLVMContext(), FieldTypes, TDTypeName);
return TypeDescriptorType;
}
llvm::Type *getImageRelativeType(llvm::Type *PtrType) {
if (!isImageRelative())
return PtrType;
return CGM.IntTy;
}
llvm::StructType *getBaseClassDescriptorType() {
if (BaseClassDescriptorType)
return BaseClassDescriptorType;
llvm::Type *FieldTypes[] = {
getImageRelativeType(CGM.Int8PtrTy),
CGM.IntTy,
CGM.IntTy,
CGM.IntTy,
CGM.IntTy,
CGM.IntTy,
getImageRelativeType(getClassHierarchyDescriptorType()->getPointerTo()),
};
BaseClassDescriptorType = llvm::StructType::create(
CGM.getLLVMContext(), FieldTypes, "rtti.BaseClassDescriptor");
return BaseClassDescriptorType;
}
llvm::StructType *getClassHierarchyDescriptorType() {
if (ClassHierarchyDescriptorType)
return ClassHierarchyDescriptorType;
// Forward-declare RTTIClassHierarchyDescriptor to break a cycle.
ClassHierarchyDescriptorType = llvm::StructType::create(
CGM.getLLVMContext(), "rtti.ClassHierarchyDescriptor");
llvm::Type *FieldTypes[] = {
CGM.IntTy,
CGM.IntTy,
CGM.IntTy,
getImageRelativeType(
getBaseClassDescriptorType()->getPointerTo()->getPointerTo()),
};
ClassHierarchyDescriptorType->setBody(FieldTypes);
return ClassHierarchyDescriptorType;
}
llvm::StructType *getCompleteObjectLocatorType() {
if (CompleteObjectLocatorType)
return CompleteObjectLocatorType;
CompleteObjectLocatorType = llvm::StructType::create(
CGM.getLLVMContext(), "rtti.CompleteObjectLocator");
llvm::Type *FieldTypes[] = {
CGM.IntTy,
CGM.IntTy,
CGM.IntTy,
getImageRelativeType(CGM.Int8PtrTy),
getImageRelativeType(getClassHierarchyDescriptorType()->getPointerTo()),
getImageRelativeType(CompleteObjectLocatorType),
};
llvm::ArrayRef<llvm::Type *> FieldTypesRef(FieldTypes);
if (!isImageRelative())
FieldTypesRef = FieldTypesRef.drop_back();
CompleteObjectLocatorType->setBody(FieldTypesRef);
return CompleteObjectLocatorType;
}
llvm::GlobalVariable *getImageBase() {
StringRef Name = "__ImageBase";
if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name))
return GV;
return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty,
/*isConstant=*/true,
llvm::GlobalValue::ExternalLinkage,
/*Initializer=*/nullptr, Name);
}
llvm::Constant *getImageRelativeConstant(llvm::Constant *PtrVal) {
if (!isImageRelative())
return PtrVal;
if (PtrVal->isNullValue())
return llvm::Constant::getNullValue(CGM.IntTy);
llvm::Constant *ImageBaseAsInt =
llvm::ConstantExpr::getPtrToInt(getImageBase(), CGM.IntPtrTy);
llvm::Constant *PtrValAsInt =
llvm::ConstantExpr::getPtrToInt(PtrVal, CGM.IntPtrTy);
llvm::Constant *Diff =
llvm::ConstantExpr::getSub(PtrValAsInt, ImageBaseAsInt,
/*HasNUW=*/true, /*HasNSW=*/true);
return llvm::ConstantExpr::getTrunc(Diff, CGM.IntTy);
}
private:
MicrosoftMangleContext &getMangleContext() {
return cast<MicrosoftMangleContext>(CodeGen::CGCXXABI::getMangleContext());
}
llvm::Constant *getZeroInt() {
return llvm::ConstantInt::get(CGM.IntTy, 0);
}
llvm::Constant *getAllOnesInt() {
return llvm::Constant::getAllOnesValue(CGM.IntTy);
}
CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) override;
void
GetNullMemberPointerFields(const MemberPointerType *MPT,
llvm::SmallVectorImpl<llvm::Constant *> &fields);
/// \brief Shared code for virtual base adjustment. Returns the offset from
/// the vbptr to the virtual base. Optionally returns the address of the
/// vbptr itself.
llvm::Value *GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
Address Base,
llvm::Value *VBPtrOffset,
llvm::Value *VBTableOffset,
llvm::Value **VBPtr = nullptr);
llvm::Value *GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
Address Base,
int32_t VBPtrOffset,
int32_t VBTableOffset,
llvm::Value **VBPtr = nullptr) {
assert(VBTableOffset % 4 == 0 && "should be byte offset into table of i32s");
llvm::Value *VBPOffset = llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset),
*VBTOffset = llvm::ConstantInt::get(CGM.IntTy, VBTableOffset);
return GetVBaseOffsetFromVBPtr(CGF, Base, VBPOffset, VBTOffset, VBPtr);
}
std::pair<Address, llvm::Value *>
performBaseAdjustment(CodeGenFunction &CGF, Address Value,
QualType SrcRecordTy);
/// \brief Performs a full virtual base adjustment. Used to dereference
/// pointers to members of virtual bases.
llvm::Value *AdjustVirtualBase(CodeGenFunction &CGF, const Expr *E,
const CXXRecordDecl *RD, Address Base,
llvm::Value *VirtualBaseAdjustmentOffset,
llvm::Value *VBPtrOffset /* optional */);
/// \brief Emits a full member pointer with the fields common to data and
/// function member pointers.
llvm::Constant *EmitFullMemberPointer(llvm::Constant *FirstField,
bool IsMemberFunction,
const CXXRecordDecl *RD,
CharUnits NonVirtualBaseAdjustment,
unsigned VBTableIndex);
bool MemberPointerConstantIsNull(const MemberPointerType *MPT,
llvm::Constant *MP);
/// \brief - Initialize all vbptrs of 'this' with RD as the complete type.
void EmitVBPtrStores(CodeGenFunction &CGF, const CXXRecordDecl *RD);
/// \brief Caching wrapper around VBTableBuilder::enumerateVBTables().
const VBTableGlobals &enumerateVBTables(const CXXRecordDecl *RD);
/// \brief Generate a thunk for calling a virtual member function MD.
llvm::Function *EmitVirtualMemPtrThunk(
const CXXMethodDecl *MD,
const MicrosoftVTableContext::MethodVFTableLocation &ML);
public:
llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT) override;
bool isZeroInitializable(const MemberPointerType *MPT) override;
bool isMemberPointerConvertible(const MemberPointerType *MPT) const override {
const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
return RD->hasAttr<MSInheritanceAttr>();
}
llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT) override;
llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
CharUnits offset) override;
llvm::Constant *EmitMemberFunctionPointer(const CXXMethodDecl *MD) override;
llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT) override;
llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
llvm::Value *L,
llvm::Value *R,
const MemberPointerType *MPT,
bool Inequality) override;
llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
llvm::Value *MemPtr,
const MemberPointerType *MPT) override;
llvm::Value *
EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
Address Base, llvm::Value *MemPtr,
const MemberPointerType *MPT) override;
llvm::Value *EmitNonNullMemberPointerConversion(
const MemberPointerType *SrcTy, const MemberPointerType *DstTy,
CastKind CK, CastExpr::path_const_iterator PathBegin,
CastExpr::path_const_iterator PathEnd, llvm::Value *Src,
CGBuilderTy &Builder);
llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
const CastExpr *E,
llvm::Value *Src) override;
llvm::Constant *EmitMemberPointerConversion(const CastExpr *E,
llvm::Constant *Src) override;
llvm::Constant *EmitMemberPointerConversion(
const MemberPointerType *SrcTy, const MemberPointerType *DstTy,
CastKind CK, CastExpr::path_const_iterator PathBegin,
CastExpr::path_const_iterator PathEnd, llvm::Constant *Src);
CGCallee
EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF, const Expr *E,
Address This, llvm::Value *&ThisPtrForCall,
llvm::Value *MemPtr,
const MemberPointerType *MPT) override;
void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
llvm::StructType *getCatchableTypeType() {
if (CatchableTypeType)
return CatchableTypeType;
llvm::Type *FieldTypes[] = {
CGM.IntTy, // Flags
getImageRelativeType(CGM.Int8PtrTy), // TypeDescriptor
CGM.IntTy, // NonVirtualAdjustment
CGM.IntTy, // OffsetToVBPtr
CGM.IntTy, // VBTableIndex
CGM.IntTy, // Size
getImageRelativeType(CGM.Int8PtrTy) // CopyCtor
};
CatchableTypeType = llvm::StructType::create(
CGM.getLLVMContext(), FieldTypes, "eh.CatchableType");
return CatchableTypeType;
}
llvm::StructType *getCatchableTypeArrayType(uint32_t NumEntries) {
llvm::StructType *&CatchableTypeArrayType =
CatchableTypeArrayTypeMap[NumEntries];
if (CatchableTypeArrayType)
return CatchableTypeArrayType;
llvm::SmallString<23> CTATypeName("eh.CatchableTypeArray.");
CTATypeName += llvm::utostr(NumEntries);
llvm::Type *CTType =
getImageRelativeType(getCatchableTypeType()->getPointerTo());
llvm::Type *FieldTypes[] = {
CGM.IntTy, // NumEntries
llvm::ArrayType::get(CTType, NumEntries) // CatchableTypes
};
CatchableTypeArrayType =
llvm::StructType::create(CGM.getLLVMContext(), FieldTypes, CTATypeName);
return CatchableTypeArrayType;
}
llvm::StructType *getThrowInfoType() {
if (ThrowInfoType)
return ThrowInfoType;
llvm::Type *FieldTypes[] = {
CGM.IntTy, // Flags
getImageRelativeType(CGM.Int8PtrTy), // CleanupFn
getImageRelativeType(CGM.Int8PtrTy), // ForwardCompat
getImageRelativeType(CGM.Int8PtrTy) // CatchableTypeArray
};
ThrowInfoType = llvm::StructType::create(CGM.getLLVMContext(), FieldTypes,
"eh.ThrowInfo");
return ThrowInfoType;
}
llvm::Constant *getThrowFn() {
// _CxxThrowException is passed an exception object and a ThrowInfo object
// which describes the exception.
llvm::Type *Args[] = {CGM.Int8PtrTy, getThrowInfoType()->getPointerTo()};
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false);
auto *Fn = cast<llvm::Function>(
CGM.CreateRuntimeFunction(FTy, "_CxxThrowException"));
// _CxxThrowException is stdcall on 32-bit x86 platforms.
if (CGM.getTarget().getTriple().getArch() == llvm::Triple::x86)
Fn->setCallingConv(llvm::CallingConv::X86_StdCall);
return Fn;
}
llvm::Function *getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
CXXCtorType CT);
llvm::Constant *getCatchableType(QualType T,
uint32_t NVOffset = 0,
int32_t VBPtrOffset = -1,
uint32_t VBIndex = 0);
llvm::GlobalVariable *getCatchableTypeArray(QualType T);
llvm::GlobalVariable *getThrowInfo(QualType T) override;
private:
typedef std::pair<const CXXRecordDecl *, CharUnits> VFTableIdTy;
typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalVariable *> VTablesMapTy;
typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalValue *> VFTablesMapTy;
/// \brief All the vftables that have been referenced.
VFTablesMapTy VFTablesMap;
VTablesMapTy VTablesMap;
/// \brief This set holds the record decls we've deferred vtable emission for.
llvm::SmallPtrSet<const CXXRecordDecl *, 4> DeferredVFTables;
/// \brief All the vbtables which have been referenced.
llvm::DenseMap<const CXXRecordDecl *, VBTableGlobals> VBTablesMap;
/// Info on the global variable used to guard initialization of static locals.
/// The BitIndex field is only used for externally invisible declarations.
struct GuardInfo {
GuardInfo() : Guard(nullptr), BitIndex(0) {}
llvm::GlobalVariable *Guard;
unsigned BitIndex;
};
/// Map from DeclContext to the current guard variable. We assume that the
/// AST is visited in source code order.
llvm::DenseMap<const DeclContext *, GuardInfo> GuardVariableMap;
llvm::DenseMap<const DeclContext *, GuardInfo> ThreadLocalGuardVariableMap;
llvm::DenseMap<const DeclContext *, unsigned> ThreadSafeGuardNumMap;
llvm::DenseMap<size_t, llvm::StructType *> TypeDescriptorTypeMap;
llvm::StructType *BaseClassDescriptorType;
llvm::StructType *ClassHierarchyDescriptorType;
llvm::StructType *CompleteObjectLocatorType;
llvm::DenseMap<QualType, llvm::GlobalVariable *> CatchableTypeArrays;
llvm::StructType *CatchableTypeType;
llvm::DenseMap<uint32_t, llvm::StructType *> CatchableTypeArrayTypeMap;
llvm::StructType *ThrowInfoType;
};
}
CGCXXABI::RecordArgABI
MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
switch (CGM.getTarget().getTriple().getArch()) {
default:
// FIXME: Implement for other architectures.
return RAA_Default;
case llvm::Triple::thumb:
// Use the simple Itanium rules for now.
// FIXME: This is incompatible with MSVC for arguments with a dtor and no
// copy ctor.
return !canCopyArgument(RD) ? RAA_Indirect : RAA_Default;
case llvm::Triple::x86:
// All record arguments are passed in memory on x86. Decide whether to
// construct the object directly in argument memory, or to construct the
// argument elsewhere and copy the bytes during the call.
// If C++ prohibits us from making a copy, construct the arguments directly
// into argument memory.
if (!canCopyArgument(RD))
return RAA_DirectInMemory;
// Otherwise, construct the argument into a temporary and copy the bytes
// into the outgoing argument memory.
return RAA_Default;
case llvm::Triple::x86_64:
- // Win64 passes objects with non-trivial copy ctors indirectly.
- if (RD->hasNonTrivialCopyConstructor())
- return RAA_Indirect;
-
- // If an object has a destructor, we'd really like to pass it indirectly
+ // If a class has a destructor, we'd really like to pass it indirectly
// because it allows us to elide copies. Unfortunately, MSVC makes that
// impossible for small types, which it will pass in a single register or
// stack slot. Most objects with dtors are large-ish, so handle that early.
// We can't call out all large objects as being indirect because there are
// multiple x64 calling conventions and the C++ ABI code shouldn't dictate
// how we pass large POD types.
+ //
+ // Note: This permits small classes with nontrivial destructors to be
+ // passed in registers, which is non-conforming.
if (RD->hasNonTrivialDestructor() &&
getContext().getTypeSize(RD->getTypeForDecl()) > 64)
return RAA_Indirect;
- // If this is true, the implicit copy constructor that Sema would have
- // created would not be deleted. FIXME: We should provide a more direct way
- // for CodeGen to ask whether the constructor was deleted.
- if (!RD->hasUserDeclaredCopyConstructor() &&
- !RD->hasUserDeclaredMoveConstructor() &&
- !RD->needsOverloadResolutionForMoveConstructor() &&
- !RD->hasUserDeclaredMoveAssignment() &&
- !RD->needsOverloadResolutionForMoveAssignment())
- return RAA_Default;
-
- // Otherwise, Sema should have created an implicit copy constructor if
- // needed.
- assert(!RD->needsImplicitCopyConstructor());
-
- // We have to make sure the trivial copy constructor isn't deleted.
- for (const CXXConstructorDecl *CD : RD->ctors()) {
- if (CD->isCopyConstructor()) {
- assert(CD->isTrivial());
- // We had at least one undeleted trivial copy ctor. Return directly.
- if (!CD->isDeleted())
- return RAA_Default;
+ // If a class has at least one non-deleted, trivial copy constructor, it
+ // is passed according to the C ABI. Otherwise, it is passed indirectly.
+ //
+ // Note: This permits classes with non-trivial copy or move ctors to be
+ // passed in registers, so long as they *also* have a trivial copy ctor,
+ // which is non-conforming.
+ if (RD->needsImplicitCopyConstructor()) {
+ // If the copy ctor has not yet been declared, we can read its triviality
+ // off the AST.
+ if (!RD->defaultedCopyConstructorIsDeleted() &&
+ RD->hasTrivialCopyConstructor())
+ return RAA_Default;
+ } else {
+ // Otherwise, we need to find the copy constructor(s) and ask.
+ for (const CXXConstructorDecl *CD : RD->ctors()) {
+ if (CD->isCopyConstructor()) {
+ // We had at least one nondeleted trivial copy ctor. Return directly.
+ if (!CD->isDeleted() && CD->isTrivial())
+ return RAA_Default;
+ }
}
}
- // The trivial copy constructor was deleted. Return indirectly.
+ // We have no trivial, non-deleted copy constructor.
return RAA_Indirect;
}
llvm_unreachable("invalid enum");
}
void MicrosoftCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
const CXXDeleteExpr *DE,
Address Ptr,
QualType ElementType,
const CXXDestructorDecl *Dtor) {
// FIXME: Provide a source location here even though there's no
// CXXMemberCallExpr for dtor call.
bool UseGlobalDelete = DE->isGlobalDelete();
CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
llvm::Value *MDThis =
EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, /*CE=*/nullptr);
if (UseGlobalDelete)
CGF.EmitDeleteCall(DE->getOperatorDelete(), MDThis, ElementType);
}
void MicrosoftCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
llvm::Value *Args[] = {
llvm::ConstantPointerNull::get(CGM.Int8PtrTy),
llvm::ConstantPointerNull::get(getThrowInfoType()->getPointerTo())};
auto *Fn = getThrowFn();
if (isNoReturn)
CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, Args);
else
CGF.EmitRuntimeCallOrInvoke(Fn, Args);
}
namespace {
struct CatchRetScope final : EHScopeStack::Cleanup {
llvm::CatchPadInst *CPI;
CatchRetScope(llvm::CatchPadInst *CPI) : CPI(CPI) {}
void Emit(CodeGenFunction &CGF, Flags flags) override {
llvm::BasicBlock *BB = CGF.createBasicBlock("catchret.dest");
CGF.Builder.CreateCatchRet(CPI, BB);
CGF.EmitBlock(BB);
}
};
}
void MicrosoftCXXABI::emitBeginCatch(CodeGenFunction &CGF,
const CXXCatchStmt *S) {
// In the MS ABI, the runtime handles the copy, and the catch handler is
// responsible for destruction.
VarDecl *CatchParam = S->getExceptionDecl();
llvm::BasicBlock *CatchPadBB = CGF.Builder.GetInsertBlock();
llvm::CatchPadInst *CPI =
cast<llvm::CatchPadInst>(CatchPadBB->getFirstNonPHI());
CGF.CurrentFuncletPad = CPI;
// If this is a catch-all or the catch parameter is unnamed, we don't need to
// emit an alloca to the object.
if (!CatchParam || !CatchParam->getDeclName()) {
CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI);
return;
}
CodeGenFunction::AutoVarEmission var = CGF.EmitAutoVarAlloca(*CatchParam);
CPI->setArgOperand(2, var.getObjectAddress(CGF).getPointer());
CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI);
CGF.EmitAutoVarCleanups(var);
}
/// We need to perform a generic polymorphic operation (like a typeid
/// or a cast), which requires an object with a vfptr. Adjust the
/// address to point to an object with a vfptr.
std::pair<Address, llvm::Value *>
MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value,
QualType SrcRecordTy) {
Value = CGF.Builder.CreateBitCast(Value, CGF.Int8PtrTy);
const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
const ASTContext &Context = getContext();
// If the class itself has a vfptr, great. This check implicitly
// covers non-virtual base subobjects: a class with its own virtual
// functions would be a candidate to be a primary base.
if (Context.getASTRecordLayout(SrcDecl).hasExtendableVFPtr())
return std::make_pair(Value, llvm::ConstantInt::get(CGF.Int32Ty, 0));
// Okay, one of the vbases must have a vfptr, or else this isn't
// actually a polymorphic class.
const CXXRecordDecl *PolymorphicBase = nullptr;
for (auto &Base : SrcDecl->vbases()) {
const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
if (Context.getASTRecordLayout(BaseDecl).hasExtendableVFPtr()) {
PolymorphicBase = BaseDecl;
break;
}
}
assert(PolymorphicBase && "polymorphic class has no apparent vfptr?");
llvm::Value *Offset =
GetVirtualBaseClassOffset(CGF, Value, SrcDecl, PolymorphicBase);
llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(Value.getPointer(), Offset);
CharUnits VBaseAlign =
CGF.CGM.getVBaseAlignment(Value.getAlignment(), SrcDecl, PolymorphicBase);
return std::make_pair(Address(Ptr, VBaseAlign), Offset);
}
bool MicrosoftCXXABI::shouldTypeidBeNullChecked(bool IsDeref,
QualType SrcRecordTy) {
const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
return IsDeref &&
!getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr();
}
static llvm::CallSite emitRTtypeidCall(CodeGenFunction &CGF,
llvm::Value *Argument) {
llvm::Type *ArgTypes[] = {CGF.Int8PtrTy};
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false);
llvm::Value *Args[] = {Argument};
llvm::Constant *Fn = CGF.CGM.CreateRuntimeFunction(FTy, "__RTtypeid");
return CGF.EmitRuntimeCallOrInvoke(Fn, Args);
}
void MicrosoftCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) {
llvm::CallSite Call =
emitRTtypeidCall(CGF, llvm::Constant::getNullValue(CGM.VoidPtrTy));
Call.setDoesNotReturn();
CGF.Builder.CreateUnreachable();
}
llvm::Value *MicrosoftCXXABI::EmitTypeid(CodeGenFunction &CGF,
QualType SrcRecordTy,
Address ThisPtr,
llvm::Type *StdTypeInfoPtrTy) {
std::tie(ThisPtr, std::ignore) =
performBaseAdjustment(CGF, ThisPtr, SrcRecordTy);
auto Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()).getInstruction();
return CGF.Builder.CreateBitCast(Typeid, StdTypeInfoPtrTy);
}
bool MicrosoftCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
QualType SrcRecordTy) {
const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
return SrcIsPtr &&
!getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr();
}
llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall(
CodeGenFunction &CGF, Address This, QualType SrcRecordTy,
QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) {
llvm::Type *DestLTy = CGF.ConvertType(DestTy);
llvm::Value *SrcRTTI =
CGF.CGM.GetAddrOfRTTIDescriptor(SrcRecordTy.getUnqualifiedType());
llvm::Value *DestRTTI =
CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType());
llvm::Value *Offset;
std::tie(This, Offset) = performBaseAdjustment(CGF, This, SrcRecordTy);
llvm::Value *ThisPtr = This.getPointer();
Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty);
// PVOID __RTDynamicCast(
// PVOID inptr,
// LONG VfDelta,
// PVOID SrcType,
// PVOID TargetType,
// BOOL isReference)
llvm::Type *ArgTypes[] = {CGF.Int8PtrTy, CGF.Int32Ty, CGF.Int8PtrTy,
CGF.Int8PtrTy, CGF.Int32Ty};
llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction(
llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false),
"__RTDynamicCast");
llvm::Value *Args[] = {
ThisPtr, Offset, SrcRTTI, DestRTTI,
llvm::ConstantInt::get(CGF.Int32Ty, DestTy->isReferenceType())};
ThisPtr = CGF.EmitRuntimeCallOrInvoke(Function, Args).getInstruction();
return CGF.Builder.CreateBitCast(ThisPtr, DestLTy);
}
llvm::Value *
MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
QualType SrcRecordTy,
QualType DestTy) {
std::tie(Value, std::ignore) = performBaseAdjustment(CGF, Value, SrcRecordTy);
// PVOID __RTCastToVoid(
// PVOID inptr)
llvm::Type *ArgTypes[] = {CGF.Int8PtrTy};
llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction(
llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false),
"__RTCastToVoid");
llvm::Value *Args[] = {Value.getPointer()};
return CGF.EmitRuntimeCall(Function, Args);
}
bool MicrosoftCXXABI::EmitBadCastCall(CodeGenFunction &CGF) {
return false;
}
llvm::Value *MicrosoftCXXABI::GetVirtualBaseClassOffset(
CodeGenFunction &CGF, Address This, const CXXRecordDecl *ClassDecl,
const CXXRecordDecl *BaseClassDecl) {
const ASTContext &Context = getContext();
int64_t VBPtrChars =
Context.getASTRecordLayout(ClassDecl).getVBPtrOffset().getQuantity();
llvm::Value *VBPtrOffset = llvm::ConstantInt::get(CGM.PtrDiffTy, VBPtrChars);
CharUnits IntSize = Context.getTypeSizeInChars(Context.IntTy);
CharUnits VBTableChars =
IntSize *
CGM.getMicrosoftVTableContext().getVBTableIndex(ClassDecl, BaseClassDecl);
llvm::Value *VBTableOffset =
llvm::ConstantInt::get(CGM.IntTy, VBTableChars.getQuantity());
llvm::Value *VBPtrToNewBase =
GetVBaseOffsetFromVBPtr(CGF, This, VBPtrOffset, VBTableOffset);
VBPtrToNewBase =
CGF.Builder.CreateSExtOrBitCast(VBPtrToNewBase, CGM.PtrDiffTy);
return CGF.Builder.CreateNSWAdd(VBPtrOffset, VBPtrToNewBase);
}
bool MicrosoftCXXABI::HasThisReturn(GlobalDecl GD) const {
return isa<CXXConstructorDecl>(GD.getDecl());
}
static bool isDeletingDtor(GlobalDecl GD) {
return isa<CXXDestructorDecl>(GD.getDecl()) &&
GD.getDtorType() == Dtor_Deleting;
}
bool MicrosoftCXXABI::hasMostDerivedReturn(GlobalDecl GD) const {
return isDeletingDtor(GD);
}
bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
const CXXRecordDecl *RD = FI.getReturnType()->getAsCXXRecordDecl();
if (!RD)
return false;
CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
if (FI.isInstanceMethod()) {
// If it's an instance method, aggregates are always returned indirectly via
// the second parameter.
FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod());
return true;
} else if (!RD->isPOD()) {
// If it's a free function, non-POD types are returned indirectly.
FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
return true;
}
// Otherwise, use the C ABI rules.
return false;
}
llvm::BasicBlock *
MicrosoftCXXABI::EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
const CXXRecordDecl *RD) {
llvm::Value *IsMostDerivedClass = getStructorImplicitParamValue(CGF);
assert(IsMostDerivedClass &&
"ctor for a class with virtual bases must have an implicit parameter");
llvm::Value *IsCompleteObject =
CGF.Builder.CreateIsNotNull(IsMostDerivedClass, "is_complete_object");
llvm::BasicBlock *CallVbaseCtorsBB = CGF.createBasicBlock("ctor.init_vbases");
llvm::BasicBlock *SkipVbaseCtorsBB = CGF.createBasicBlock("ctor.skip_vbases");
CGF.Builder.CreateCondBr(IsCompleteObject,
CallVbaseCtorsBB, SkipVbaseCtorsBB);
CGF.EmitBlock(CallVbaseCtorsBB);
// Fill in the vbtable pointers here.
EmitVBPtrStores(CGF, RD);
// CGF will put the base ctor calls in this basic block for us later.
return SkipVbaseCtorsBB;
}
llvm::BasicBlock *
MicrosoftCXXABI::EmitDtorCompleteObjectHandler(CodeGenFunction &CGF) {
llvm::Value *IsMostDerivedClass = getStructorImplicitParamValue(CGF);
assert(IsMostDerivedClass &&
"ctor for a class with virtual bases must have an implicit parameter");
llvm::Value *IsCompleteObject =
CGF.Builder.CreateIsNotNull(IsMostDerivedClass, "is_complete_object");
llvm::BasicBlock *CallVbaseDtorsBB = CGF.createBasicBlock("Dtor.dtor_vbases");
llvm::BasicBlock *SkipVbaseDtorsBB = CGF.createBasicBlock("Dtor.skip_vbases");
CGF.Builder.CreateCondBr(IsCompleteObject,
CallVbaseDtorsBB, SkipVbaseDtorsBB);
CGF.EmitBlock(CallVbaseDtorsBB);
// CGF will put the base dtor calls in this basic block for us later.
return SkipVbaseDtorsBB;
}
void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
CodeGenFunction &CGF, const CXXRecordDecl *RD) {
// In most cases, an override for a vbase virtual method can adjust
// the "this" parameter by applying a constant offset.
// However, this is not enough while a constructor or a destructor of some
// class X is being executed if all the following conditions are met:
// - X has virtual bases, (1)
// - X overrides a virtual method M of a vbase Y, (2)
// - X itself is a vbase of the most derived class.
//
// If (1) and (2) are true, the vtorDisp for vbase Y is a hidden member of X
// which holds the extra amount of "this" adjustment we must do when we use
// the X vftables (i.e. during X ctor or dtor).
// Outside the ctors and dtors, the values of vtorDisps are zero.
const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
typedef ASTRecordLayout::VBaseOffsetsMapTy VBOffsets;
const VBOffsets &VBaseMap = Layout.getVBaseOffsetsMap();
CGBuilderTy &Builder = CGF.Builder;
unsigned AS = getThisAddress(CGF).getAddressSpace();
llvm::Value *Int8This = nullptr; // Initialize lazily.
for (VBOffsets::const_iterator I = VBaseMap.begin(), E = VBaseMap.end();
I != E; ++I) {
if (!I->second.hasVtorDisp())
continue;
llvm::Value *VBaseOffset =
GetVirtualBaseClassOffset(CGF, getThisAddress(CGF), RD, I->first);
uint64_t ConstantVBaseOffset =
Layout.getVBaseClassOffset(I->first).getQuantity();
// vtorDisp_for_vbase = vbptr[vbase_idx] - offsetof(RD, vbase).
llvm::Value *VtorDispValue = Builder.CreateSub(
VBaseOffset, llvm::ConstantInt::get(CGM.PtrDiffTy, ConstantVBaseOffset),
"vtordisp.value");
VtorDispValue = Builder.CreateTruncOrBitCast(VtorDispValue, CGF.Int32Ty);
if (!Int8This)
Int8This = Builder.CreateBitCast(getThisValue(CGF),
CGF.Int8Ty->getPointerTo(AS));
llvm::Value *VtorDispPtr = Builder.CreateInBoundsGEP(Int8This, VBaseOffset);
// vtorDisp is always the 32-bits before the vbase in the class layout.
VtorDispPtr = Builder.CreateConstGEP1_32(VtorDispPtr, -4);
VtorDispPtr = Builder.CreateBitCast(
VtorDispPtr, CGF.Int32Ty->getPointerTo(AS), "vtordisp.ptr");
Builder.CreateAlignedStore(VtorDispValue, VtorDispPtr,
CharUnits::fromQuantity(4));
}
}
static bool hasDefaultCXXMethodCC(ASTContext &Context,
const CXXMethodDecl *MD) {
CallingConv ExpectedCallingConv = Context.getDefaultCallingConvention(
/*IsVariadic=*/false, /*IsCXXMethod=*/true);
CallingConv ActualCallingConv =
MD->getType()->getAs<FunctionProtoType>()->getCallConv();
return ExpectedCallingConv == ActualCallingConv;
}
void MicrosoftCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
// There's only one constructor type in this ABI.
CGM.EmitGlobal(GlobalDecl(D, Ctor_Complete));
// Exported default constructors either have a simple call-site where they use
// the typical calling convention and have a single 'this' pointer for an
// argument -or- they get a wrapper function which appropriately thunks to the
// real default constructor. This thunk is the default constructor closure.
if (D->hasAttr<DLLExportAttr>() && D->isDefaultConstructor())
if (!hasDefaultCXXMethodCC(getContext(), D) || D->getNumParams() != 0) {
llvm::Function *Fn = getAddrOfCXXCtorClosure(D, Ctor_DefaultClosure);
Fn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
Fn->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
}
}
void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF,
const CXXRecordDecl *RD) {
Address This = getThisAddress(CGF);
This = CGF.Builder.CreateElementBitCast(This, CGM.Int8Ty, "this.int8");
const ASTContext &Context = getContext();
const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
for (unsigned I = 0, E = VBGlobals.VBTables->size(); I != E; ++I) {
const std::unique_ptr<VPtrInfo> &VBT = (*VBGlobals.VBTables)[I];
llvm::GlobalVariable *GV = VBGlobals.Globals[I];
const ASTRecordLayout &SubobjectLayout =
Context.getASTRecordLayout(VBT->IntroducingObject);
CharUnits Offs = VBT->NonVirtualOffset;
Offs += SubobjectLayout.getVBPtrOffset();
if (VBT->getVBaseWithVPtr())
Offs += Layout.getVBaseClassOffset(VBT->getVBaseWithVPtr());
Address VBPtr = CGF.Builder.CreateConstInBoundsByteGEP(This, Offs);
llvm::Value *GVPtr =
CGF.Builder.CreateConstInBoundsGEP2_32(GV->getValueType(), GV, 0, 0);
VBPtr = CGF.Builder.CreateElementBitCast(VBPtr, GVPtr->getType(),
"vbptr." + VBT->ObjectWithVPtr->getName());
CGF.Builder.CreateStore(GVPtr, VBPtr);
}
}
CGCXXABI::AddedStructorArgs
MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
SmallVectorImpl<CanQualType> &ArgTys) {
AddedStructorArgs Added;
// TODO: 'for base' flag
if (T == StructorType::Deleting) {
// The scalar deleting destructor takes an implicit int parameter.
ArgTys.push_back(getContext().IntTy);
++Added.Suffix;
}
auto *CD = dyn_cast<CXXConstructorDecl>(MD);
if (!CD)
return Added;
// All parameters are already in place except is_most_derived, which goes
// after 'this' if it's variadic and last if it's not.
const CXXRecordDecl *Class = CD->getParent();
const FunctionProtoType *FPT = CD->getType()->castAs<FunctionProtoType>();
if (Class->getNumVBases()) {
if (FPT->isVariadic()) {
ArgTys.insert(ArgTys.begin() + 1, getContext().IntTy);
++Added.Prefix;
} else {
ArgTys.push_back(getContext().IntTy);
++Added.Suffix;
}
}
return Added;
}
void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
// The TU defining a dtor is only guaranteed to emit a base destructor. All
// other destructor variants are delegating thunks.
CGM.EmitGlobal(GlobalDecl(D, Dtor_Base));
}
CharUnits
MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) {
GD = GD.getCanonicalDecl();
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
GlobalDecl LookupGD = GD;
if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
// Complete destructors take a pointer to the complete object as a
// parameter, thus don't need this adjustment.
if (GD.getDtorType() == Dtor_Complete)
return CharUnits();
// There's no Dtor_Base in vftable but it shares the this adjustment with
// the deleting one, so look it up instead.
LookupGD = GlobalDecl(DD, Dtor_Deleting);
}
MicrosoftVTableContext::MethodVFTableLocation ML =
CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);
CharUnits Adjustment = ML.VFPtrOffset;
// Normal virtual instance methods need to adjust from the vfptr that first
// defined the virtual method to the virtual base subobject, but destructors
// do not. The vector deleting destructor thunk applies this adjustment for
// us if necessary.
if (isa<CXXDestructorDecl>(MD))
Adjustment = CharUnits::Zero();
if (ML.VBase) {
const ASTRecordLayout &DerivedLayout =
getContext().getASTRecordLayout(MD->getParent());
Adjustment += DerivedLayout.getVBaseClassOffset(ML.VBase);
}
return Adjustment;
}
Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
CodeGenFunction &CGF, GlobalDecl GD, Address This,
bool VirtualCall) {
if (!VirtualCall) {
// If the call of a virtual function is not virtual, we just have to
// compensate for the adjustment the virtual function does in its prologue.
CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(GD);
if (Adjustment.isZero())
return This;
This = CGF.Builder.CreateElementBitCast(This, CGF.Int8Ty);
assert(Adjustment.isPositive());
return CGF.Builder.CreateConstByteGEP(This, Adjustment);
}
GD = GD.getCanonicalDecl();
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
GlobalDecl LookupGD = GD;
if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
// Complete dtors take a pointer to the complete object,
// thus don't need adjustment.
if (GD.getDtorType() == Dtor_Complete)
return This;
// There's only Dtor_Deleting in vftable but it shares the this adjustment
// with the base one, so look up the deleting one instead.
LookupGD = GlobalDecl(DD, Dtor_Deleting);
}
MicrosoftVTableContext::MethodVFTableLocation ML =
CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);
CharUnits StaticOffset = ML.VFPtrOffset;
// Base destructors expect 'this' to point to the beginning of the base
// subobject, not the first vfptr that happens to contain the virtual dtor.
// However, we still need to apply the virtual base adjustment.
if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
StaticOffset = CharUnits::Zero();
Address Result = This;
if (ML.VBase) {
Result = CGF.Builder.CreateElementBitCast(Result, CGF.Int8Ty);
const CXXRecordDecl *Derived = MD->getParent();
const CXXRecordDecl *VBase = ML.VBase;
llvm::Value *VBaseOffset =
GetVirtualBaseClassOffset(CGF, Result, Derived, VBase);
llvm::Value *VBasePtr =
CGF.Builder.CreateInBoundsGEP(Result.getPointer(), VBaseOffset);
CharUnits VBaseAlign =
CGF.CGM.getVBaseAlignment(Result.getAlignment(), Derived, VBase);
Result = Address(VBasePtr, VBaseAlign);
}
if (!StaticOffset.isZero()) {
assert(StaticOffset.isPositive());
Result = CGF.Builder.CreateElementBitCast(Result, CGF.Int8Ty);
if (ML.VBase) {
// Non-virtual adjustment might result in a pointer outside the allocated
// object, e.g. if the final overrider class is laid out after the virtual
// base that declares a method in the most derived class.
// FIXME: Update the code that emits this adjustment in thunks prologues.
Result = CGF.Builder.CreateConstByteGEP(Result, StaticOffset);
} else {
Result = CGF.Builder.CreateConstInBoundsByteGEP(Result, StaticOffset);
}
}
return Result;
}
void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
QualType &ResTy,
FunctionArgList &Params) {
ASTContext &Context = getContext();
const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD));
if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
auto *IsMostDerived = ImplicitParamDecl::Create(
Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(),
&Context.Idents.get("is_most_derived"), Context.IntTy,
ImplicitParamDecl::Other);
// The 'most_derived' parameter goes second if the ctor is variadic and last
// if it's not. Dtors can't be variadic.
const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
if (FPT->isVariadic())
Params.insert(Params.begin() + 1, IsMostDerived);
else
Params.push_back(IsMostDerived);
getStructorImplicitParamDecl(CGF) = IsMostDerived;
} else if (isDeletingDtor(CGF.CurGD)) {
auto *ShouldDelete = ImplicitParamDecl::Create(
Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(),
&Context.Idents.get("should_call_delete"), Context.IntTy,
ImplicitParamDecl::Other);
Params.push_back(ShouldDelete);
getStructorImplicitParamDecl(CGF) = ShouldDelete;
}
}
llvm::Value *MicrosoftCXXABI::adjustThisParameterInVirtualFunctionPrologue(
CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) {
// In this ABI, every virtual function takes a pointer to one of the
// subobjects that first defines it as the 'this' parameter, rather than a
// pointer to the final overrider subobject. Thus, we need to adjust it back
// to the final overrider subobject before use.
// See comments in the MicrosoftVFTableContext implementation for the details.
CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(GD);
if (Adjustment.isZero())
return This;
unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS),
*thisTy = This->getType();
This = CGF.Builder.CreateBitCast(This, charPtrTy);
assert(Adjustment.isPositive());
This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This,
-Adjustment.getQuantity());
return CGF.Builder.CreateBitCast(This, thisTy);
}
void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
// Naked functions have no prolog.
if (CGF.CurFuncDecl && CGF.CurFuncDecl->hasAttr<NakedAttr>())
return;
EmitThisParam(CGF);
/// If this is a function that the ABI specifies returns 'this', initialize
/// the return slot to 'this' at the start of the function.
///
/// Unlike the setting of return types, this is done within the ABI
/// implementation instead of by clients of CGCXXABI because:
/// 1) getThisValue is currently protected
/// 2) in theory, an ABI could implement 'this' returns some other way;
/// HasThisReturn only specifies a contract, not the implementation
if (HasThisReturn(CGF.CurGD))
CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
else if (hasMostDerivedReturn(CGF.CurGD))
CGF.Builder.CreateStore(CGF.EmitCastToVoidPtr(getThisValue(CGF)),
CGF.ReturnValue);
const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
assert(getStructorImplicitParamDecl(CGF) &&
"no implicit parameter for a constructor with virtual bases?");
getStructorImplicitParamValue(CGF)
= CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(getStructorImplicitParamDecl(CGF)),
"is_most_derived");
}
if (isDeletingDtor(CGF.CurGD)) {
assert(getStructorImplicitParamDecl(CGF) &&
"no implicit parameter for a deleting destructor?");
getStructorImplicitParamValue(CGF)
= CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(getStructorImplicitParamDecl(CGF)),
"should_call_delete");
}
}
CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs(
CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
bool ForVirtualBase, bool Delegating, CallArgList &Args) {
assert(Type == Ctor_Complete || Type == Ctor_Base);
// Check if we need a 'most_derived' parameter.
if (!D->getParent()->getNumVBases())
return AddedStructorArgs{};
// Add the 'most_derived' argument second if we are variadic or last if not.
const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
llvm::Value *MostDerivedArg;
if (Delegating) {
MostDerivedArg = getStructorImplicitParamValue(CGF);
} else {
MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
}
RValue RV = RValue::get(MostDerivedArg);
if (FPT->isVariadic()) {
Args.insert(Args.begin() + 1,
CallArg(RV, getContext().IntTy, /*needscopy=*/false));
return AddedStructorArgs::prefix(1);
}
Args.add(RV, getContext().IntTy);
return AddedStructorArgs::suffix(1);
}
void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
bool Delegating, Address This) {
CGCallee Callee = CGCallee::forDirect(
CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
DD);
if (DD->isVirtual()) {
assert(Type != CXXDtorType::Dtor_Deleting &&
"The deleting destructor should only be called via a virtual call");
This = adjustThisArgumentForVirtualFunctionCall(CGF, GlobalDecl(DD, Type),
This, false);
}
llvm::BasicBlock *BaseDtorEndBB = nullptr;
if (ForVirtualBase && isa<CXXConstructorDecl>(CGF.CurCodeDecl)) {
BaseDtorEndBB = EmitDtorCompleteObjectHandler(CGF);
}
CGF.EmitCXXDestructorCall(DD, Callee, This.getPointer(),
/*ImplicitParam=*/nullptr,
/*ImplicitParamTy=*/QualType(), nullptr,
getFromDtorType(Type));
if (BaseDtorEndBB) {
// Complete object handler should continue to be the remaining
CGF.Builder.CreateBr(BaseDtorEndBB);
CGF.EmitBlock(BaseDtorEndBB);
}
}
void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info,
const CXXRecordDecl *RD,
llvm::GlobalVariable *VTable) {
if (!CGM.getCodeGenOpts().LTOUnit)
return;
// The location of the first virtual function pointer in the virtual table,
// aka the "address point" on Itanium. This is at offset 0 if RTTI is
// disabled, or sizeof(void*) if RTTI is enabled.
CharUnits AddressPoint =
getContext().getLangOpts().RTTIData
? getContext().toCharUnitsFromBits(
getContext().getTargetInfo().getPointerWidth(0))
: CharUnits::Zero();
if (Info.PathToIntroducingObject.empty()) {
CGM.AddVTableTypeMetadata(VTable, AddressPoint, RD);
return;
}
// Add a bitset entry for the least derived base belonging to this vftable.
CGM.AddVTableTypeMetadata(VTable, AddressPoint,
Info.PathToIntroducingObject.back());
// Add a bitset entry for each derived class that is laid out at the same
// offset as the least derived base.
for (unsigned I = Info.PathToIntroducingObject.size() - 1; I != 0; --I) {
const CXXRecordDecl *DerivedRD = Info.PathToIntroducingObject[I - 1];
const CXXRecordDecl *BaseRD = Info.PathToIntroducingObject[I];
const ASTRecordLayout &Layout =
getContext().getASTRecordLayout(DerivedRD);
CharUnits Offset;
auto VBI = Layout.getVBaseOffsetsMap().find(BaseRD);
if (VBI == Layout.getVBaseOffsetsMap().end())
Offset = Layout.getBaseClassOffset(BaseRD);
else
Offset = VBI->second.VBaseOffset;
if (!Offset.isZero())
return;
CGM.AddVTableTypeMetadata(VTable, AddressPoint, DerivedRD);
}
// Finally do the same for the most derived class.
if (Info.FullOffsetInMDC.isZero())
CGM.AddVTableTypeMetadata(VTable, AddressPoint, RD);
}
void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
const CXXRecordDecl *RD) {
MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext();
const VPtrInfoVector &VFPtrs = VFTContext.getVFPtrOffsets(RD);
for (const std::unique_ptr<VPtrInfo>& Info : VFPtrs) {
llvm::GlobalVariable *VTable = getAddrOfVTable(RD, Info->FullOffsetInMDC);
if (VTable->hasInitializer())
continue;
const VTableLayout &VTLayout =
VFTContext.getVFTableLayout(RD, Info->FullOffsetInMDC);
llvm::Constant *RTTI = nullptr;
if (any_of(VTLayout.vtable_components(),
[](const VTableComponent &VTC) { return VTC.isRTTIKind(); }))
RTTI = getMSCompleteObjectLocator(RD, *Info);
ConstantInitBuilder Builder(CGM);
auto Components = Builder.beginStruct();
CGVT.createVTableInitializer(Components, VTLayout, RTTI);
Components.finishAndSetAsInitializer(VTable);
emitVTableTypeMetadata(*Info, RD, VTable);
}
}
bool MicrosoftCXXABI::isVirtualOffsetNeededForVTableField(
CodeGenFunction &CGF, CodeGenFunction::VPtr Vptr) {
return Vptr.NearestVBase != nullptr;
}
llvm::Value *MicrosoftCXXABI::getVTableAddressPointInStructor(
CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
const CXXRecordDecl *NearestVBase) {
llvm::Constant *VTableAddressPoint = getVTableAddressPoint(Base, VTableClass);
if (!VTableAddressPoint) {
assert(Base.getBase()->getNumVBases() &&
!getContext().getASTRecordLayout(Base.getBase()).hasOwnVFPtr());
}
return VTableAddressPoint;
}
static void mangleVFTableName(MicrosoftMangleContext &MangleContext,
const CXXRecordDecl *RD, const VPtrInfo &VFPtr,
SmallString<256> &Name) {
llvm::raw_svector_ostream Out(Name);
MangleContext.mangleCXXVFTable(RD, VFPtr.MangledPath, Out);
}
llvm::Constant *
MicrosoftCXXABI::getVTableAddressPoint(BaseSubobject Base,
const CXXRecordDecl *VTableClass) {
(void)getAddrOfVTable(VTableClass, Base.getBaseOffset());
VFTableIdTy ID(VTableClass, Base.getBaseOffset());
return VFTablesMap[ID];
}
llvm::Constant *MicrosoftCXXABI::getVTableAddressPointForConstExpr(
BaseSubobject Base, const CXXRecordDecl *VTableClass) {
llvm::Constant *VFTable = getVTableAddressPoint(Base, VTableClass);
assert(VFTable && "Couldn't find a vftable for the given base?");
return VFTable;
}
llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
CharUnits VPtrOffset) {
// getAddrOfVTable may return 0 if asked to get an address of a vtable which
// shouldn't be used in the given record type. We want to cache this result in
// VFTablesMap, thus a simple zero check is not sufficient.
VFTableIdTy ID(RD, VPtrOffset);
VTablesMapTy::iterator I;
bool Inserted;
std::tie(I, Inserted) = VTablesMap.insert(std::make_pair(ID, nullptr));
if (!Inserted)
return I->second;
llvm::GlobalVariable *&VTable = I->second;
MicrosoftVTableContext &VTContext = CGM.getMicrosoftVTableContext();
const VPtrInfoVector &VFPtrs = VTContext.getVFPtrOffsets(RD);
if (DeferredVFTables.insert(RD).second) {
// We haven't processed this record type before.
// Queue up this vtable for possible deferred emission.
CGM.addDeferredVTable(RD);
#ifndef NDEBUG
// Create all the vftables at once in order to make sure each vftable has
// a unique mangled name.
llvm::StringSet<> ObservedMangledNames;
for (size_t J = 0, F = VFPtrs.size(); J != F; ++J) {
SmallString<256> Name;
mangleVFTableName(getMangleContext(), RD, *VFPtrs[J], Name);
if (!ObservedMangledNames.insert(Name.str()).second)
llvm_unreachable("Already saw this mangling before?");
}
#endif
}
const std::unique_ptr<VPtrInfo> *VFPtrI = std::find_if(
VFPtrs.begin(), VFPtrs.end(), [&](const std::unique_ptr<VPtrInfo>& VPI) {
return VPI->FullOffsetInMDC == VPtrOffset;
});
if (VFPtrI == VFPtrs.end()) {
VFTablesMap[ID] = nullptr;
return nullptr;
}
const std::unique_ptr<VPtrInfo> &VFPtr = *VFPtrI;
SmallString<256> VFTableName;
mangleVFTableName(getMangleContext(), RD, *VFPtr, VFTableName);
// Classes marked __declspec(dllimport) need vftables generated on the
// import-side in order to support features like constexpr. No other
// translation unit relies on the emission of the local vftable, translation
// units are expected to generate them as needed.
//
// Because of this unique behavior, we maintain this logic here instead of
// getVTableLinkage.
llvm::GlobalValue::LinkageTypes VFTableLinkage =
RD->hasAttr<DLLImportAttr>() ? llvm::GlobalValue::LinkOnceODRLinkage
: CGM.getVTableLinkage(RD);
bool VFTableComesFromAnotherTU =
llvm::GlobalValue::isAvailableExternallyLinkage(VFTableLinkage) ||
llvm::GlobalValue::isExternalLinkage(VFTableLinkage);
bool VTableAliasIsRequred =
!VFTableComesFromAnotherTU && getContext().getLangOpts().RTTIData;
if (llvm::GlobalValue *VFTable =
CGM.getModule().getNamedGlobal(VFTableName)) {
VFTablesMap[ID] = VFTable;
VTable = VTableAliasIsRequred
? cast<llvm::GlobalVariable>(
cast<llvm::GlobalAlias>(VFTable)->getBaseObject())
: cast<llvm::GlobalVariable>(VFTable);
return VTable;
}
const VTableLayout &VTLayout =
VTContext.getVFTableLayout(RD, VFPtr->FullOffsetInMDC);
llvm::GlobalValue::LinkageTypes VTableLinkage =
VTableAliasIsRequred ? llvm::GlobalValue::PrivateLinkage : VFTableLinkage;
StringRef VTableName = VTableAliasIsRequred ? StringRef() : VFTableName.str();
llvm::Type *VTableType = CGM.getVTables().getVTableType(VTLayout);
// Create a backing variable for the contents of VTable. The VTable may
// or may not include space for a pointer to RTTI data.
llvm::GlobalValue *VFTable;
VTable = new llvm::GlobalVariable(CGM.getModule(), VTableType,
/*isConstant=*/true, VTableLinkage,
/*Initializer=*/nullptr, VTableName);
VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
llvm::Comdat *C = nullptr;
if (!VFTableComesFromAnotherTU &&
(llvm::GlobalValue::isWeakForLinker(VFTableLinkage) ||
(llvm::GlobalValue::isLocalLinkage(VFTableLinkage) &&
VTableAliasIsRequred)))
C = CGM.getModule().getOrInsertComdat(VFTableName.str());
// Only insert a pointer into the VFTable for RTTI data if we are not
// importing it. We never reference the RTTI data directly so there is no
// need to make room for it.
if (VTableAliasIsRequred) {
llvm::Value *GEPIndices[] = {llvm::ConstantInt::get(CGM.Int32Ty, 0),
llvm::ConstantInt::get(CGM.Int32Ty, 0),
llvm::ConstantInt::get(CGM.Int32Ty, 1)};
// Create a GEP which points just after the first entry in the VFTable,
// this should be the location of the first virtual method.
llvm::Constant *VTableGEP = llvm::ConstantExpr::getInBoundsGetElementPtr(
VTable->getValueType(), VTable, GEPIndices);
if (llvm::GlobalValue::isWeakForLinker(VFTableLinkage)) {
VFTableLinkage = llvm::GlobalValue::ExternalLinkage;
if (C)
C->setSelectionKind(llvm::Comdat::Largest);
}
VFTable = llvm::GlobalAlias::create(CGM.Int8PtrTy,
/*AddressSpace=*/0, VFTableLinkage,
VFTableName.str(), VTableGEP,
&CGM.getModule());
VFTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
} else {
// We don't need a GlobalAlias to be a symbol for the VTable if we won't
// be referencing any RTTI data.
// The GlobalVariable will end up being an appropriate definition of the
// VFTable.
VFTable = VTable;
}
if (C)
VTable->setComdat(C);
if (RD->hasAttr<DLLExportAttr>())
VFTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
VFTablesMap[ID] = VFTable;
return VTable;
}
CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
GlobalDecl GD,
Address This,
llvm::Type *Ty,
SourceLocation Loc) {
GD = GD.getCanonicalDecl();
CGBuilderTy &Builder = CGF.Builder;
Ty = Ty->getPointerTo()->getPointerTo();
Address VPtr =
adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
llvm::Value *VTable = CGF.GetVTablePtr(VPtr, Ty, MethodDecl->getParent());
MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext();
MicrosoftVTableContext::MethodVFTableLocation ML =
VFTContext.getMethodVFTableLocation(GD);
// Compute the identity of the most derived class whose virtual table is
// located at the MethodVFTableLocation ML.
auto getObjectWithVPtr = [&] {
return llvm::find_if(VFTContext.getVFPtrOffsets(
ML.VBase ? ML.VBase : MethodDecl->getParent()),
[&](const std::unique_ptr<VPtrInfo> &Info) {
return Info->FullOffsetInMDC == ML.VFPtrOffset;
})
->get()
->ObjectWithVPtr;
};
llvm::Value *VFunc;
if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
VFunc = CGF.EmitVTableTypeCheckedLoad(
getObjectWithVPtr(), VTable,
ML.Index * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8);
} else {
if (CGM.getCodeGenOpts().PrepareForLTO)
CGF.EmitTypeMetadataCodeForVCall(getObjectWithVPtr(), VTable, Loc);
llvm::Value *VFuncPtr =
Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
VFunc = Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
}
CGCallee Callee(MethodDecl, VFunc);
return Callee;
}
llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
Address This, const CXXMemberCallExpr *CE) {
assert(CE == nullptr || CE->arg_begin() == CE->arg_end());
assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
// We have only one destructor in the vftable but can get both behaviors
// by passing an implicit int parameter.
GlobalDecl GD(Dtor, Dtor_Deleting);
const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
Dtor, StructorType::Deleting);
llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
CGCallee Callee = getVirtualFunctionPointer(
CGF, GD, This, Ty, CE ? CE->getLocStart() : SourceLocation());
ASTContext &Context = getContext();
llvm::Value *ImplicitParam = llvm::ConstantInt::get(
llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()),
DtorType == Dtor_Deleting);
This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
RValue RV =
CGF.EmitCXXDestructorCall(Dtor, Callee, This.getPointer(), ImplicitParam,
Context.IntTy, CE, StructorType::Deleting);
return RV.getScalarVal();
}
const VBTableGlobals &
MicrosoftCXXABI::enumerateVBTables(const CXXRecordDecl *RD) {
// At this layer, we can key the cache off of a single class, which is much
// easier than caching each vbtable individually.
llvm::DenseMap<const CXXRecordDecl*, VBTableGlobals>::iterator Entry;
bool Added;
std::tie(Entry, Added) =
VBTablesMap.insert(std::make_pair(RD, VBTableGlobals()));
VBTableGlobals &VBGlobals = Entry->second;
if (!Added)
return VBGlobals;
MicrosoftVTableContext &Context = CGM.getMicrosoftVTableContext();
VBGlobals.VBTables = &Context.enumerateVBTables(RD);
// Cache the globals for all vbtables so we don't have to recompute the
// mangled names.
llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
for (VPtrInfoVector::const_iterator I = VBGlobals.VBTables->begin(),
E = VBGlobals.VBTables->end();
I != E; ++I) {
VBGlobals.Globals.push_back(getAddrOfVBTable(**I, RD, Linkage));
}
return VBGlobals;
}
llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
const CXXMethodDecl *MD,
const MicrosoftVTableContext::MethodVFTableLocation &ML) {
assert(!isa<CXXConstructorDecl>(MD) && !isa<CXXDestructorDecl>(MD) &&
"can't form pointers to ctors or virtual dtors");
// Calculate the mangled name.
SmallString<256> ThunkName;
llvm::raw_svector_ostream Out(ThunkName);
getMangleContext().mangleVirtualMemPtrThunk(MD, Out);
// If the thunk has been generated previously, just return it.
if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
return cast<llvm::Function>(GV);
// Create the llvm::Function.
const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeMSMemberPointerThunk(MD);
llvm::FunctionType *ThunkTy = CGM.getTypes().GetFunctionType(FnInfo);
llvm::Function *ThunkFn =
llvm::Function::Create(ThunkTy, llvm::Function::ExternalLinkage,
ThunkName.str(), &CGM.getModule());
assert(ThunkFn->getName() == ThunkName && "name was uniqued!");
ThunkFn->setLinkage(MD->isExternallyVisible()
? llvm::GlobalValue::LinkOnceODRLinkage
: llvm::GlobalValue::InternalLinkage);
if (MD->isExternallyVisible())
ThunkFn->setComdat(CGM.getModule().getOrInsertComdat(ThunkFn->getName()));
CGM.SetLLVMFunctionAttributes(MD, FnInfo, ThunkFn);
CGM.SetLLVMFunctionAttributesForDefinition(MD, ThunkFn);
// Add the "thunk" attribute so that LLVM knows that the return type is
// meaningless. These thunks can be used to call functions with differing
// return types, and the caller is required to cast the prototype
// appropriately to extract the correct value.
ThunkFn->addFnAttr("thunk");
// These thunks can be compared, so they are not unnamed.
ThunkFn->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
// Start codegen.
CodeGenFunction CGF(CGM);
CGF.CurGD = GlobalDecl(MD);
CGF.CurFuncIsThunk = true;
// Build FunctionArgs, but only include the implicit 'this' parameter
// declaration.
FunctionArgList FunctionArgs;
buildThisParam(CGF, FunctionArgs);
// Start defining the function.
CGF.StartFunction(GlobalDecl(), FnInfo.getReturnType(), ThunkFn, FnInfo,
FunctionArgs, MD->getLocation(), SourceLocation());
EmitThisParam(CGF);
// Load the vfptr and then callee from the vftable. The callee should have
// adjusted 'this' so that the vfptr is at offset zero.
llvm::Value *VTable = CGF.GetVTablePtr(
getThisAddress(CGF), ThunkTy->getPointerTo()->getPointerTo(), MD->getParent());
llvm::Value *VFuncPtr =
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
llvm::Value *Callee =
CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
CGF.EmitMustTailThunk(MD, getThisValue(CGF), Callee);
return ThunkFn;
}
void MicrosoftCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
for (unsigned I = 0, E = VBGlobals.VBTables->size(); I != E; ++I) {
const std::unique_ptr<VPtrInfo>& VBT = (*VBGlobals.VBTables)[I];
llvm::GlobalVariable *GV = VBGlobals.Globals[I];
if (GV->isDeclaration())
emitVBTableDefinition(*VBT, RD, GV);
}
}
llvm::GlobalVariable *
MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
llvm::GlobalVariable::LinkageTypes Linkage) {
SmallString<256> OutName;
llvm::raw_svector_ostream Out(OutName);
getMangleContext().mangleCXXVBTable(RD, VBT.MangledPath, Out);
StringRef Name = OutName.str();
llvm::ArrayType *VBTableType =
llvm::ArrayType::get(CGM.IntTy, 1 + VBT.ObjectWithVPtr->getNumVBases());
assert(!CGM.getModule().getNamedGlobal(Name) &&
"vbtable with this name already exists: mangling bug?");
llvm::GlobalVariable *GV =
CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType, Linkage);
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
if (RD->hasAttr<DLLImportAttr>())
GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
else if (RD->hasAttr<DLLExportAttr>())
GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
if (!GV->hasExternalLinkage())
emitVBTableDefinition(VBT, RD, GV);
return GV;
}
void MicrosoftCXXABI::emitVBTableDefinition(const VPtrInfo &VBT,
const CXXRecordDecl *RD,
llvm::GlobalVariable *GV) const {
const CXXRecordDecl *ObjectWithVPtr = VBT.ObjectWithVPtr;
assert(RD->getNumVBases() && ObjectWithVPtr->getNumVBases() &&
"should only emit vbtables for classes with vbtables");
const ASTRecordLayout &BaseLayout =
getContext().getASTRecordLayout(VBT.IntroducingObject);
const ASTRecordLayout &DerivedLayout = getContext().getASTRecordLayout(RD);
SmallVector<llvm::Constant *, 4> Offsets(1 + ObjectWithVPtr->getNumVBases(),
nullptr);
// The offset from ObjectWithVPtr's vbptr to itself always leads.
CharUnits VBPtrOffset = BaseLayout.getVBPtrOffset();
Offsets[0] = llvm::ConstantInt::get(CGM.IntTy, -VBPtrOffset.getQuantity());
MicrosoftVTableContext &Context = CGM.getMicrosoftVTableContext();
for (const auto &I : ObjectWithVPtr->vbases()) {
const CXXRecordDecl *VBase = I.getType()->getAsCXXRecordDecl();
CharUnits Offset = DerivedLayout.getVBaseClassOffset(VBase);
assert(!Offset.isNegative());
// Make it relative to the subobject vbptr.
CharUnits CompleteVBPtrOffset = VBT.NonVirtualOffset + VBPtrOffset;
if (VBT.getVBaseWithVPtr())
CompleteVBPtrOffset +=
DerivedLayout.getVBaseClassOffset(VBT.getVBaseWithVPtr());
Offset -= CompleteVBPtrOffset;
unsigned VBIndex = Context.getVBTableIndex(ObjectWithVPtr, VBase);
assert(Offsets[VBIndex] == nullptr && "The same vbindex seen twice?");
Offsets[VBIndex] = llvm::ConstantInt::get(CGM.IntTy, Offset.getQuantity());
}
assert(Offsets.size() ==
cast<llvm::ArrayType>(cast<llvm::PointerType>(GV->getType())
->getElementType())->getNumElements());
llvm::ArrayType *VBTableType =
llvm::ArrayType::get(CGM.IntTy, Offsets.size());
llvm::Constant *Init = llvm::ConstantArray::get(VBTableType, Offsets);
GV->setInitializer(Init);
if (RD->hasAttr<DLLImportAttr>())
GV->setLinkage(llvm::GlobalVariable::AvailableExternallyLinkage);
}
llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF,
Address This,
const ThisAdjustment &TA) {
if (TA.isEmpty())
return This.getPointer();
This = CGF.Builder.CreateElementBitCast(This, CGF.Int8Ty);
llvm::Value *V;
if (TA.Virtual.isEmpty()) {
V = This.getPointer();
} else {
assert(TA.Virtual.Microsoft.VtordispOffset < 0);
// Adjust the this argument based on the vtordisp value.
Address VtorDispPtr =
CGF.Builder.CreateConstInBoundsByteGEP(This,
CharUnits::fromQuantity(TA.Virtual.Microsoft.VtordispOffset));
VtorDispPtr = CGF.Builder.CreateElementBitCast(VtorDispPtr, CGF.Int32Ty);
llvm::Value *VtorDisp = CGF.Builder.CreateLoad(VtorDispPtr, "vtordisp");
V = CGF.Builder.CreateGEP(This.getPointer(),
CGF.Builder.CreateNeg(VtorDisp));
// Unfortunately, having applied the vtordisp means that we no
// longer really have a known alignment for the vbptr step.
// We'll assume the vbptr is pointer-aligned.
if (TA.Virtual.Microsoft.VBPtrOffset) {
// If the final overrider is defined in a virtual base other than the one
// that holds the vfptr, we have to use a vtordispex thunk which looks up
// the vbtable of the derived class.
assert(TA.Virtual.Microsoft.VBPtrOffset > 0);
assert(TA.Virtual.Microsoft.VBOffsetOffset >= 0);
llvm::Value *VBPtr;
llvm::Value *VBaseOffset =
GetVBaseOffsetFromVBPtr(CGF, Address(V, CGF.getPointerAlign()),
-TA.Virtual.Microsoft.VBPtrOffset,
TA.Virtual.Microsoft.VBOffsetOffset, &VBPtr);
V = CGF.Builder.CreateInBoundsGEP(VBPtr, VBaseOffset);
}
}
if (TA.NonVirtual) {
// Non-virtual adjustment might result in a pointer outside the allocated
// object, e.g. if the final overrider class is laid out after the virtual
// base that declares a method in the most derived class.
V = CGF.Builder.CreateConstGEP1_32(V, TA.NonVirtual);
}
// Don't need to bitcast back, the call CodeGen will handle this.
return V;
}
llvm::Value *
MicrosoftCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
const ReturnAdjustment &RA) {
if (RA.isEmpty())
return Ret.getPointer();
auto OrigTy = Ret.getType();
Ret = CGF.Builder.CreateElementBitCast(Ret, CGF.Int8Ty);
llvm::Value *V = Ret.getPointer();
if (RA.Virtual.Microsoft.VBIndex) {
assert(RA.Virtual.Microsoft.VBIndex > 0);
int32_t IntSize = CGF.getIntSize().getQuantity();
llvm::Value *VBPtr;
llvm::Value *VBaseOffset =
GetVBaseOffsetFromVBPtr(CGF, Ret, RA.Virtual.Microsoft.VBPtrOffset,
IntSize * RA.Virtual.Microsoft.VBIndex, &VBPtr);
V = CGF.Builder.CreateInBoundsGEP(VBPtr, VBaseOffset);
}
if (RA.NonVirtual)
V = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, V, RA.NonVirtual);
// Cast back to the original type.
return CGF.Builder.CreateBitCast(V, OrigTy);
}
bool MicrosoftCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr,
QualType elementType) {
// Microsoft seems to completely ignore the possibility of a
// two-argument usual deallocation function.
return elementType.isDestructedType();
}
bool MicrosoftCXXABI::requiresArrayCookie(const CXXNewExpr *expr) {
// Microsoft seems to completely ignore the possibility of a
// two-argument usual deallocation function.
return expr->getAllocatedType().isDestructedType();
}
CharUnits MicrosoftCXXABI::getArrayCookieSizeImpl(QualType type) {
// The array cookie is always a size_t; we then pad that out to the
// alignment of the element type.
ASTContext &Ctx = getContext();
return std::max(Ctx.getTypeSizeInChars(Ctx.getSizeType()),
Ctx.getTypeAlignInChars(type));
}
llvm::Value *MicrosoftCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
Address allocPtr,
CharUnits cookieSize) {
Address numElementsPtr =
CGF.Builder.CreateElementBitCast(allocPtr, CGF.SizeTy);
return CGF.Builder.CreateLoad(numElementsPtr);
}
Address MicrosoftCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
Address newPtr,
llvm::Value *numElements,
const CXXNewExpr *expr,
QualType elementType) {
assert(requiresArrayCookie(expr));
// The size of the cookie.
CharUnits cookieSize = getArrayCookieSizeImpl(elementType);
// Compute an offset to the cookie.
Address cookiePtr = newPtr;
// Write the number of elements into the appropriate slot.
Address numElementsPtr
= CGF.Builder.CreateElementBitCast(cookiePtr, CGF.SizeTy);
CGF.Builder.CreateStore(numElements, numElementsPtr);
// Finally, compute a pointer to the actual data buffer by skipping
// over the cookie completely.
return CGF.Builder.CreateConstInBoundsByteGEP(newPtr, cookieSize);
}
static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD,
llvm::Constant *Dtor,
llvm::Constant *Addr) {
// Create a function which calls the destructor.
llvm::Constant *DtorStub = CGF.createAtExitStub(VD, Dtor, Addr);
// extern "C" int __tlregdtor(void (*f)(void));
llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get(
CGF.IntTy, DtorStub->getType(), /*IsVarArg=*/false);
llvm::Constant *TLRegDtor = CGF.CGM.CreateRuntimeFunction(
TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /*Local=*/true);
if (llvm::Function *TLRegDtorFn = dyn_cast<llvm::Function>(TLRegDtor))
TLRegDtorFn->setDoesNotThrow();
CGF.EmitNounwindRuntimeCall(TLRegDtor, DtorStub);
}
void MicrosoftCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
llvm::Constant *Dtor,
llvm::Constant *Addr) {
if (D.getTLSKind())
return emitGlobalDtorWithTLRegDtor(CGF, D, Dtor, Addr);
// The default behavior is to use atexit.
CGF.registerGlobalDtorWithAtExit(D, Dtor, Addr);
}
void MicrosoftCXXABI::EmitThreadLocalInitFuncs(
CodeGenModule &CGM, ArrayRef<const VarDecl *> CXXThreadLocals,
ArrayRef<llvm::Function *> CXXThreadLocalInits,
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) {
if (CXXThreadLocalInits.empty())
return;
CGM.AppendLinkerOptions(CGM.getTarget().getTriple().getArch() ==
llvm::Triple::x86
? "/include:___dyn_tls_init@12"
: "/include:__dyn_tls_init");
// This will create a GV in the .CRT$XDU section. It will point to our
// initialization function. The CRT will call all of these function
// pointers at start-up time and, eventually, at thread-creation time.
auto AddToXDU = [&CGM](llvm::Function *InitFunc) {
llvm::GlobalVariable *InitFuncPtr = new llvm::GlobalVariable(
CGM.getModule(), InitFunc->getType(), /*IsConstant=*/true,
llvm::GlobalVariable::InternalLinkage, InitFunc,
Twine(InitFunc->getName(), "$initializer$"));
InitFuncPtr->setSection(".CRT$XDU");
// This variable has discardable linkage, we have to add it to @llvm.used to
// ensure it won't get discarded.
CGM.addUsedGlobal(InitFuncPtr);
return InitFuncPtr;
};
std::vector<llvm::Function *> NonComdatInits;
for (size_t I = 0, E = CXXThreadLocalInitVars.size(); I != E; ++I) {
llvm::GlobalVariable *GV = cast<llvm::GlobalVariable>(
CGM.GetGlobalValue(CGM.getMangledName(CXXThreadLocalInitVars[I])));
llvm::Function *F = CXXThreadLocalInits[I];
// If the GV is already in a comdat group, then we have to join it.
if (llvm::Comdat *C = GV->getComdat())
AddToXDU(F)->setComdat(C);
else
NonComdatInits.push_back(F);
}
if (!NonComdatInits.empty()) {
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
llvm::Function *InitFunc = CGM.CreateGlobalInitOrDestructFunction(
FTy, "__tls_init", CGM.getTypes().arrangeNullaryFunction(),
SourceLocation(), /*TLS=*/true);
CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, NonComdatInits);
AddToXDU(InitFunc);
}
}
LValue MicrosoftCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
const VarDecl *VD,
QualType LValType) {
CGF.CGM.ErrorUnsupported(VD, "thread wrappers");
return LValue();
}
static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) {
StringRef VarName("_Init_thread_epoch");
CharUnits Align = CGM.getIntAlign();
if (auto *GV = CGM.getModule().getNamedGlobal(VarName))
return ConstantAddress(GV, Align);
auto *GV = new llvm::GlobalVariable(
CGM.getModule(), CGM.IntTy,
/*Constant=*/false, llvm::GlobalVariable::ExternalLinkage,
/*Initializer=*/nullptr, VarName,
/*InsertBefore=*/nullptr, llvm::GlobalVariable::GeneralDynamicTLSModel);
GV->setAlignment(Align.getQuantity());
return ConstantAddress(GV, Align);
}
static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) {
llvm::FunctionType *FTy =
llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()),
CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "_Init_thread_header",
llvm::AttributeList::get(CGM.getLLVMContext(),
llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind),
/*Local=*/true);
}
static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) {
llvm::FunctionType *FTy =
llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()),
CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "_Init_thread_footer",
llvm::AttributeList::get(CGM.getLLVMContext(),
llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind),
/*Local=*/true);
}
static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) {
llvm::FunctionType *FTy =
llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()),
CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "_Init_thread_abort",
llvm::AttributeList::get(CGM.getLLVMContext(),
llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind),
/*Local=*/true);
}
namespace {
struct ResetGuardBit final : EHScopeStack::Cleanup {
Address Guard;
unsigned GuardNum;
ResetGuardBit(Address Guard, unsigned GuardNum)
: Guard(Guard), GuardNum(GuardNum) {}
void Emit(CodeGenFunction &CGF, Flags flags) override {
// Reset the bit in the mask so that the static variable may be
// reinitialized.
CGBuilderTy &Builder = CGF.Builder;
llvm::LoadInst *LI = Builder.CreateLoad(Guard);
llvm::ConstantInt *Mask =
llvm::ConstantInt::get(CGF.IntTy, ~(1ULL << GuardNum));
Builder.CreateStore(Builder.CreateAnd(LI, Mask), Guard);
}
};
struct CallInitThreadAbort final : EHScopeStack::Cleanup {
llvm::Value *Guard;
CallInitThreadAbort(Address Guard) : Guard(Guard.getPointer()) {}
void Emit(CodeGenFunction &CGF, Flags flags) override {
// Calling _Init_thread_abort will reset the guard's state.
CGF.EmitNounwindRuntimeCall(getInitThreadAbortFn(CGF.CGM), Guard);
}
};
}
void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
llvm::GlobalVariable *GV,
bool PerformInit) {
// MSVC only uses guards for static locals.
if (!D.isStaticLocal()) {
assert(GV->hasWeakLinkage() || GV->hasLinkOnceLinkage());
// GlobalOpt is allowed to discard the initializer, so use linkonce_odr.
llvm::Function *F = CGF.CurFn;
F->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
F->setComdat(CGM.getModule().getOrInsertComdat(F->getName()));
CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
return;
}
bool ThreadlocalStatic = D.getTLSKind();
bool ThreadsafeStatic = getContext().getLangOpts().ThreadsafeStatics;
// Thread-safe static variables which aren't thread-specific have a
// per-variable guard.
bool HasPerVariableGuard = ThreadsafeStatic && !ThreadlocalStatic;
CGBuilderTy &Builder = CGF.Builder;
llvm::IntegerType *GuardTy = CGF.Int32Ty;
llvm::ConstantInt *Zero = llvm::ConstantInt::get(GuardTy, 0);
CharUnits GuardAlign = CharUnits::fromQuantity(4);
// Get the guard variable for this function if we have one already.
GuardInfo *GI = nullptr;
if (ThreadlocalStatic)
GI = &ThreadLocalGuardVariableMap[D.getDeclContext()];
else if (!ThreadsafeStatic)
GI = &GuardVariableMap[D.getDeclContext()];
llvm::GlobalVariable *GuardVar = GI ? GI->Guard : nullptr;
unsigned GuardNum;
if (D.isExternallyVisible()) {
// Externally visible variables have to be numbered in Sema to properly
// handle unreachable VarDecls.
GuardNum = getContext().getStaticLocalNumber(&D);
assert(GuardNum > 0);
GuardNum--;
} else if (HasPerVariableGuard) {
GuardNum = ThreadSafeGuardNumMap[D.getDeclContext()]++;
} else {
// Non-externally visible variables are numbered here in CodeGen.
GuardNum = GI->BitIndex++;
}
if (!HasPerVariableGuard && GuardNum >= 32) {
if (D.isExternallyVisible())
ErrorUnsupportedABI(CGF, "more than 32 guarded initializations");
GuardNum %= 32;
GuardVar = nullptr;
}
if (!GuardVar) {
// Mangle the name for the guard.
SmallString<256> GuardName;
{
llvm::raw_svector_ostream Out(GuardName);
if (HasPerVariableGuard)
getMangleContext().mangleThreadSafeStaticGuardVariable(&D, GuardNum,
Out);
else
getMangleContext().mangleStaticGuardVariable(&D, Out);
}
// Create the guard variable with a zero-initializer. Just absorb linkage,
// visibility and dll storage class from the guarded variable.
GuardVar =
new llvm::GlobalVariable(CGM.getModule(), GuardTy, /*isConstant=*/false,
GV->getLinkage(), Zero, GuardName.str());
GuardVar->setVisibility(GV->getVisibility());
GuardVar->setDLLStorageClass(GV->getDLLStorageClass());
GuardVar->setAlignment(GuardAlign.getQuantity());
if (GuardVar->isWeakForLinker())
GuardVar->setComdat(
CGM.getModule().getOrInsertComdat(GuardVar->getName()));
if (D.getTLSKind())
GuardVar->setThreadLocal(true);
if (GI && !HasPerVariableGuard)
GI->Guard = GuardVar;
}
ConstantAddress GuardAddr(GuardVar, GuardAlign);
assert(GuardVar->getLinkage() == GV->getLinkage() &&
"static local from the same function had different linkage");
if (!HasPerVariableGuard) {
// Pseudo code for the test:
// if (!(GuardVar & MyGuardBit)) {
// GuardVar |= MyGuardBit;
// ... initialize the object ...;
// }
// Test our bit from the guard variable.
llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1ULL << GuardNum);
llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr);
llvm::Value *IsInitialized =
Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
Builder.CreateCondBr(IsInitialized, EndBlock, InitBlock);
// Set our bit in the guard variable and emit the initializer and add a global
// destructor if appropriate.
CGF.EmitBlock(InitBlock);
Builder.CreateStore(Builder.CreateOr(LI, Bit), GuardAddr);
CGF.EHStack.pushCleanup<ResetGuardBit>(EHCleanup, GuardAddr, GuardNum);
CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
CGF.PopCleanupBlock();
Builder.CreateBr(EndBlock);
// Continue.
CGF.EmitBlock(EndBlock);
} else {
// Pseudo code for the test:
// if (TSS > _Init_thread_epoch) {
// _Init_thread_header(&TSS);
// if (TSS == -1) {
// ... initialize the object ...;
// _Init_thread_footer(&TSS);
// }
// }
//
// The algorithm is almost identical to what can be found in the appendix
// found in N2325.
// This BasicBLock determines whether or not we have any work to do.
llvm::LoadInst *FirstGuardLoad = Builder.CreateLoad(GuardAddr);
FirstGuardLoad->setOrdering(llvm::AtomicOrdering::Unordered);
llvm::LoadInst *InitThreadEpoch =
Builder.CreateLoad(getInitThreadEpochPtr(CGM));
llvm::Value *IsUninitialized =
Builder.CreateICmpSGT(FirstGuardLoad, InitThreadEpoch);
llvm::BasicBlock *AttemptInitBlock = CGF.createBasicBlock("init.attempt");
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
Builder.CreateCondBr(IsUninitialized, AttemptInitBlock, EndBlock);
// This BasicBlock attempts to determine whether or not this thread is
// responsible for doing the initialization.
CGF.EmitBlock(AttemptInitBlock);
CGF.EmitNounwindRuntimeCall(getInitThreadHeaderFn(CGM),
GuardAddr.getPointer());
llvm::LoadInst *SecondGuardLoad = Builder.CreateLoad(GuardAddr);
SecondGuardLoad->setOrdering(llvm::AtomicOrdering::Unordered);
llvm::Value *ShouldDoInit =
Builder.CreateICmpEQ(SecondGuardLoad, getAllOnesInt());
llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
Builder.CreateCondBr(ShouldDoInit, InitBlock, EndBlock);
// Ok, we ended up getting selected as the initializing thread.
CGF.EmitBlock(InitBlock);
CGF.EHStack.pushCleanup<CallInitThreadAbort>(EHCleanup, GuardAddr);
CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
CGF.PopCleanupBlock();
CGF.EmitNounwindRuntimeCall(getInitThreadFooterFn(CGM),
GuardAddr.getPointer());
Builder.CreateBr(EndBlock);
CGF.EmitBlock(EndBlock);
}
}
bool MicrosoftCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
// Null-ness for function memptrs only depends on the first field, which is
// the function pointer. The rest don't matter, so we can zero initialize.
if (MPT->isMemberFunctionPointer())
return true;
// The virtual base adjustment field is always -1 for null, so if we have one
// we can't zero initialize. The field offset is sometimes also -1 if 0 is a
// valid field offset.
const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
return (!MSInheritanceAttr::hasVBTableOffsetField(Inheritance) &&
RD->nullFieldOffsetIsZero());
}
llvm::Type *
MicrosoftCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
llvm::SmallVector<llvm::Type *, 4> fields;
if (MPT->isMemberFunctionPointer())
fields.push_back(CGM.VoidPtrTy); // FunctionPointerOrVirtualThunk
else
fields.push_back(CGM.IntTy); // FieldOffset
if (MSInheritanceAttr::hasNVOffsetField(MPT->isMemberFunctionPointer(),
Inheritance))
fields.push_back(CGM.IntTy);
if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
fields.push_back(CGM.IntTy);
if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
fields.push_back(CGM.IntTy); // VirtualBaseAdjustmentOffset
if (fields.size() == 1)
return fields[0];
return llvm::StructType::get(CGM.getLLVMContext(), fields);
}
void MicrosoftCXXABI::
GetNullMemberPointerFields(const MemberPointerType *MPT,
llvm::SmallVectorImpl<llvm::Constant *> &fields) {
assert(fields.empty());
const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
if (MPT->isMemberFunctionPointer()) {
// FunctionPointerOrVirtualThunk
fields.push_back(llvm::Constant::getNullValue(CGM.VoidPtrTy));
} else {
if (RD->nullFieldOffsetIsZero())
fields.push_back(getZeroInt()); // FieldOffset
else
fields.push_back(getAllOnesInt()); // FieldOffset
}
if (MSInheritanceAttr::hasNVOffsetField(MPT->isMemberFunctionPointer(),
Inheritance))
fields.push_back(getZeroInt());
if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
fields.push_back(getZeroInt());
if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
fields.push_back(getAllOnesInt());
}
llvm::Constant *
MicrosoftCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
llvm::SmallVector<llvm::Constant *, 4> fields;
GetNullMemberPointerFields(MPT, fields);
if (fields.size() == 1)
return fields[0];
llvm::Constant *Res = llvm::ConstantStruct::getAnon(fields);
assert(Res->getType() == ConvertMemberPointerType(MPT));
return Res;
}
llvm::Constant *
MicrosoftCXXABI::EmitFullMemberPointer(llvm::Constant *FirstField,
bool IsMemberFunction,
const CXXRecordDecl *RD,
CharUnits NonVirtualBaseAdjustment,
unsigned VBTableIndex) {
MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
// Single inheritance class member pointer are represented as scalars instead
// of aggregates.
if (MSInheritanceAttr::hasOnlyOneField(IsMemberFunction, Inheritance))
return FirstField;
llvm::SmallVector<llvm::Constant *, 4> fields;
fields.push_back(FirstField);
if (MSInheritanceAttr::hasNVOffsetField(IsMemberFunction, Inheritance))
fields.push_back(llvm::ConstantInt::get(
CGM.IntTy, NonVirtualBaseAdjustment.getQuantity()));
if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) {
CharUnits Offs = CharUnits::Zero();
if (VBTableIndex)
Offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
fields.push_back(llvm::ConstantInt::get(CGM.IntTy, Offs.getQuantity()));
}
// The rest of the fields are adjusted by conversions to a more derived class.
if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
fields.push_back(llvm::ConstantInt::get(CGM.IntTy, VBTableIndex));
return llvm::ConstantStruct::getAnon(fields);
}
llvm::Constant *
MicrosoftCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
CharUnits offset) {
const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
if (RD->getMSInheritanceModel() ==
MSInheritanceAttr::Keyword_virtual_inheritance)
offset -= getContext().getOffsetOfBaseWithVBPtr(RD);
llvm::Constant *FirstField =
llvm::ConstantInt::get(CGM.IntTy, offset.getQuantity());
return EmitFullMemberPointer(FirstField, /*IsMemberFunction=*/false, RD,
CharUnits::Zero(), /*VBTableIndex=*/0);
}
llvm::Constant *MicrosoftCXXABI::EmitMemberPointer(const APValue &MP,
QualType MPType) {
const MemberPointerType *DstTy = MPType->castAs<MemberPointerType>();
const ValueDecl *MPD = MP.getMemberPointerDecl();
if (!MPD)
return EmitNullMemberPointer(DstTy);
ASTContext &Ctx = getContext();
ArrayRef<const CXXRecordDecl *> MemberPointerPath = MP.getMemberPointerPath();
llvm::Constant *C;
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MPD)) {
C = EmitMemberFunctionPointer(MD);
} else {
CharUnits FieldOffset = Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(MPD));
C = EmitMemberDataPointer(DstTy, FieldOffset);
}
if (!MemberPointerPath.empty()) {
const CXXRecordDecl *SrcRD = cast<CXXRecordDecl>(MPD->getDeclContext());
const Type *SrcRecTy = Ctx.getTypeDeclType(SrcRD).getTypePtr();
const MemberPointerType *SrcTy =
Ctx.getMemberPointerType(DstTy->getPointeeType(), SrcRecTy)
->castAs<MemberPointerType>();
bool DerivedMember = MP.isMemberPointerToDerivedMember();
SmallVector<const CXXBaseSpecifier *, 4> DerivedToBasePath;
const CXXRecordDecl *PrevRD = SrcRD;
for (const CXXRecordDecl *PathElem : MemberPointerPath) {
const CXXRecordDecl *Base = nullptr;
const CXXRecordDecl *Derived = nullptr;
if (DerivedMember) {
Base = PathElem;
Derived = PrevRD;
} else {
Base = PrevRD;
Derived = PathElem;
}
for (const CXXBaseSpecifier &BS : Derived->bases())
if (BS.getType()->getAsCXXRecordDecl()->getCanonicalDecl() ==
Base->getCanonicalDecl())
DerivedToBasePath.push_back(&BS);
PrevRD = PathElem;
}
assert(DerivedToBasePath.size() == MemberPointerPath.size());
CastKind CK = DerivedMember ? CK_DerivedToBaseMemberPointer
: CK_BaseToDerivedMemberPointer;
C = EmitMemberPointerConversion(SrcTy, DstTy, CK, DerivedToBasePath.begin(),
DerivedToBasePath.end(), C);
}
return C;
}
llvm::Constant *
MicrosoftCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
assert(MD->isInstance() && "Member function must not be static!");
MD = MD->getCanonicalDecl();
CharUnits NonVirtualBaseAdjustment = CharUnits::Zero();
const CXXRecordDecl *RD = MD->getParent()->getMostRecentDecl();
CodeGenTypes &Types = CGM.getTypes();
unsigned VBTableIndex = 0;
llvm::Constant *FirstField;
const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
if (!MD->isVirtual()) {
llvm::Type *Ty;
// Check whether the function has a computable LLVM signature.
if (Types.isFuncTypeConvertible(FPT)) {
// The function has a computable LLVM signature; use the correct type.
Ty = Types.GetFunctionType(Types.arrangeCXXMethodDeclaration(MD));
} else {
// Use an arbitrary non-function type to tell GetAddrOfFunction that the
// function type is incomplete.
Ty = CGM.PtrDiffTy;
}
FirstField = CGM.GetAddrOfFunction(MD, Ty);
} else {
auto &VTableContext = CGM.getMicrosoftVTableContext();
MicrosoftVTableContext::MethodVFTableLocation ML =
VTableContext.getMethodVFTableLocation(MD);
FirstField = EmitVirtualMemPtrThunk(MD, ML);
// Include the vfptr adjustment if the method is in a non-primary vftable.
NonVirtualBaseAdjustment += ML.VFPtrOffset;
if (ML.VBase)
VBTableIndex = VTableContext.getVBTableIndex(RD, ML.VBase) * 4;
}
if (VBTableIndex == 0 &&
RD->getMSInheritanceModel() ==
MSInheritanceAttr::Keyword_virtual_inheritance)
NonVirtualBaseAdjustment -= getContext().getOffsetOfBaseWithVBPtr(RD);
// The rest of the fields are common with data member pointers.
FirstField = llvm::ConstantExpr::getBitCast(FirstField, CGM.VoidPtrTy);
return EmitFullMemberPointer(FirstField, /*IsMemberFunction=*/true, RD,
NonVirtualBaseAdjustment, VBTableIndex);
}
/// Member pointers are the same if they're either bitwise identical *or* both
/// null. Null-ness for function members is determined by the first field,
/// while for data member pointers we must compare all fields.
llvm::Value *
MicrosoftCXXABI::EmitMemberPointerComparison(CodeGenFunction &CGF,
llvm::Value *L,
llvm::Value *R,
const MemberPointerType *MPT,
bool Inequality) {
CGBuilderTy &Builder = CGF.Builder;
// Handle != comparisons by switching the sense of all boolean operations.
llvm::ICmpInst::Predicate Eq;
llvm::Instruction::BinaryOps And, Or;
if (Inequality) {
Eq = llvm::ICmpInst::ICMP_NE;
And = llvm::Instruction::Or;
Or = llvm::Instruction::And;
} else {
Eq = llvm::ICmpInst::ICMP_EQ;
And = llvm::Instruction::And;
Or = llvm::Instruction::Or;
}
// If this is a single field member pointer (single inheritance), this is a
// single icmp.
const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
if (MSInheritanceAttr::hasOnlyOneField(MPT->isMemberFunctionPointer(),
Inheritance))
return Builder.CreateICmp(Eq, L, R);
// Compare the first field.
llvm::Value *L0 = Builder.CreateExtractValue(L, 0, "lhs.0");
llvm::Value *R0 = Builder.CreateExtractValue(R, 0, "rhs.0");
llvm::Value *Cmp0 = Builder.CreateICmp(Eq, L0, R0, "memptr.cmp.first");
// Compare everything other than the first field.
llvm::Value *Res = nullptr;
llvm::StructType *LType = cast<llvm::StructType>(L->getType());
for (unsigned I = 1, E = LType->getNumElements(); I != E; ++I) {
llvm::Value *LF = Builder.CreateExtractValue(L, I);
llvm::Value *RF = Builder.CreateExtractValue(R, I);
llvm::Value *Cmp = Builder.CreateICmp(Eq, LF, RF, "memptr.cmp.rest");
if (Res)
Res = Builder.CreateBinOp(And, Res, Cmp);
else
Res = Cmp;
}
// Check if the first field is 0 if this is a function pointer.
if (MPT->isMemberFunctionPointer()) {
// (l1 == r1 && ...) || l0 == 0
llvm::Value *Zero = llvm::Constant::getNullValue(L0->getType());
llvm::Value *IsZero = Builder.CreateICmp(Eq, L0, Zero, "memptr.cmp.iszero");
Res = Builder.CreateBinOp(Or, Res, IsZero);
}
// Combine the comparison of the first field, which must always be true for
// this comparison to succeeed.
return Builder.CreateBinOp(And, Res, Cmp0, "memptr.cmp");
}
llvm::Value *
MicrosoftCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
llvm::Value *MemPtr,
const MemberPointerType *MPT) {
CGBuilderTy &Builder = CGF.Builder;
llvm::SmallVector<llvm::Constant *, 4> fields;
// We only need one field for member functions.
if (MPT->isMemberFunctionPointer())
fields.push_back(llvm::Constant::getNullValue(CGM.VoidPtrTy));
else
GetNullMemberPointerFields(MPT, fields);
assert(!fields.empty());
llvm::Value *FirstField = MemPtr;
if (MemPtr->getType()->isStructTy())
FirstField = Builder.CreateExtractValue(MemPtr, 0);
llvm::Value *Res = Builder.CreateICmpNE(FirstField, fields[0], "memptr.cmp0");
// For function member pointers, we only need to test the function pointer
// field. The other fields if any can be garbage.
if (MPT->isMemberFunctionPointer())
return Res;
// Otherwise, emit a series of compares and combine the results.
for (int I = 1, E = fields.size(); I < E; ++I) {
llvm::Value *Field = Builder.CreateExtractValue(MemPtr, I);
llvm::Value *Next = Builder.CreateICmpNE(Field, fields[I], "memptr.cmp");
Res = Builder.CreateOr(Res, Next, "memptr.tobool");
}
return Res;
}
bool MicrosoftCXXABI::MemberPointerConstantIsNull(const MemberPointerType *MPT,
llvm::Constant *Val) {
// Function pointers are null if the pointer in the first field is null.
if (MPT->isMemberFunctionPointer()) {
llvm::Constant *FirstField = Val->getType()->isStructTy() ?
Val->getAggregateElement(0U) : Val;
return FirstField->isNullValue();
}
// If it's not a function pointer and it's zero initializable, we can easily
// check zero.
if (isZeroInitializable(MPT) && Val->isNullValue())
return true;
// Otherwise, break down all the fields for comparison. Hopefully these
// little Constants are reused, while a big null struct might not be.
llvm::SmallVector<llvm::Constant *, 4> Fields;
GetNullMemberPointerFields(MPT, Fields);
if (Fields.size() == 1) {
assert(Val->getType()->isIntegerTy());
return Val == Fields[0];
}
unsigned I, E;
for (I = 0, E = Fields.size(); I != E; ++I) {
if (Val->getAggregateElement(I) != Fields[I])
break;
}
return I == E;
}
llvm::Value *
MicrosoftCXXABI::GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
Address This,
llvm::Value *VBPtrOffset,
llvm::Value *VBTableOffset,
llvm::Value **VBPtrOut) {
CGBuilderTy &Builder = CGF.Builder;
// Load the vbtable pointer from the vbptr in the instance.
This = Builder.CreateElementBitCast(This, CGM.Int8Ty);
llvm::Value *VBPtr =
Builder.CreateInBoundsGEP(This.getPointer(), VBPtrOffset, "vbptr");
if (VBPtrOut) *VBPtrOut = VBPtr;
VBPtr = Builder.CreateBitCast(VBPtr,
CGM.Int32Ty->getPointerTo(0)->getPointerTo(This.getAddressSpace()));
CharUnits VBPtrAlign;
if (auto CI = dyn_cast<llvm::ConstantInt>(VBPtrOffset)) {
VBPtrAlign = This.getAlignment().alignmentAtOffset(
CharUnits::fromQuantity(CI->getSExtValue()));
} else {
VBPtrAlign = CGF.getPointerAlign();
}
llvm::Value *VBTable = Builder.CreateAlignedLoad(VBPtr, VBPtrAlign, "vbtable");
// Translate from byte offset to table index. It improves analyzability.
llvm::Value *VBTableIndex = Builder.CreateAShr(
VBTableOffset, llvm::ConstantInt::get(VBTableOffset->getType(), 2),
"vbtindex", /*isExact=*/true);
// Load an i32 offset from the vb-table.
llvm::Value *VBaseOffs = Builder.CreateInBoundsGEP(VBTable, VBTableIndex);
VBaseOffs = Builder.CreateBitCast(VBaseOffs, CGM.Int32Ty->getPointerTo(0));
return Builder.CreateAlignedLoad(VBaseOffs, CharUnits::fromQuantity(4),
"vbase_offs");
}
// Returns an adjusted base cast to i8*, since we do more address arithmetic on
// it.
llvm::Value *MicrosoftCXXABI::AdjustVirtualBase(
CodeGenFunction &CGF, const Expr *E, const CXXRecordDecl *RD,
Address Base, llvm::Value *VBTableOffset, llvm::Value *VBPtrOffset) {
CGBuilderTy &Builder = CGF.Builder;
Base = Builder.CreateElementBitCast(Base, CGM.Int8Ty);
llvm::BasicBlock *OriginalBB = nullptr;
llvm::BasicBlock *SkipAdjustBB = nullptr;
llvm::BasicBlock *VBaseAdjustBB = nullptr;
// In the unspecified inheritance model, there might not be a vbtable at all,
// in which case we need to skip the virtual base lookup. If there is a
// vbtable, the first entry is a no-op entry that gives back the original
// base, so look for a virtual base adjustment offset of zero.
if (VBPtrOffset) {
OriginalBB = Builder.GetInsertBlock();
VBaseAdjustBB = CGF.createBasicBlock("memptr.vadjust");
SkipAdjustBB = CGF.createBasicBlock("memptr.skip_vadjust");
llvm::Value *IsVirtual =
Builder.CreateICmpNE(VBTableOffset, getZeroInt(),
"memptr.is_vbase");
Builder.CreateCondBr(IsVirtual, VBaseAdjustBB, SkipAdjustBB);
CGF.EmitBlock(VBaseAdjustBB);
}
// If we weren't given a dynamic vbptr offset, RD should be complete and we'll
// know the vbptr offset.
if (!VBPtrOffset) {
CharUnits offs = CharUnits::Zero();
if (!RD->hasDefinition()) {
DiagnosticsEngine &Diags = CGF.CGM.getDiags();
unsigned DiagID = Diags.getCustomDiagID(
DiagnosticsEngine::Error,
"member pointer representation requires a "
"complete class type for %0 to perform this expression");
Diags.Report(E->getExprLoc(), DiagID) << RD << E->getSourceRange();
} else if (RD->getNumVBases())
offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
VBPtrOffset = llvm::ConstantInt::get(CGM.IntTy, offs.getQuantity());
}
llvm::Value *VBPtr = nullptr;
llvm::Value *VBaseOffs =
GetVBaseOffsetFromVBPtr(CGF, Base, VBPtrOffset, VBTableOffset, &VBPtr);
llvm::Value *AdjustedBase = Builder.CreateInBoundsGEP(VBPtr, VBaseOffs);
// Merge control flow with the case where we didn't have to adjust.
if (VBaseAdjustBB) {
Builder.CreateBr(SkipAdjustBB);
CGF.EmitBlock(SkipAdjustBB);
llvm::PHINode *Phi = Builder.CreatePHI(CGM.Int8PtrTy, 2, "memptr.base");
Phi->addIncoming(Base.getPointer(), OriginalBB);
Phi->addIncoming(AdjustedBase, VBaseAdjustBB);
return Phi;
}
return AdjustedBase;
}
llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
CodeGenFunction &CGF, const Expr *E, Address Base, llvm::Value *MemPtr,
const MemberPointerType *MPT) {
assert(MPT->isMemberDataPointer());
unsigned AS = Base.getAddressSpace();
llvm::Type *PType =
CGF.ConvertTypeForMem(MPT->getPointeeType())->getPointerTo(AS);
CGBuilderTy &Builder = CGF.Builder;
const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
// Extract the fields we need, regardless of model. We'll apply them if we
// have them.
llvm::Value *FieldOffset = MemPtr;
llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
llvm::Value *VBPtrOffset = nullptr;
if (MemPtr->getType()->isStructTy()) {
// We need to extract values.
unsigned I = 0;
FieldOffset = Builder.CreateExtractValue(MemPtr, I++);
if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
VBPtrOffset = Builder.CreateExtractValue(MemPtr, I++);
if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++);
}
llvm::Value *Addr;
if (VirtualBaseAdjustmentOffset) {
Addr = AdjustVirtualBase(CGF, E, RD, Base, VirtualBaseAdjustmentOffset,
VBPtrOffset);
} else {
Addr = Base.getPointer();
}
// Cast to char*.
Addr = Builder.CreateBitCast(Addr, CGF.Int8Ty->getPointerTo(AS));
// Apply the offset, which we assume is non-null.
Addr = Builder.CreateInBoundsGEP(Addr, FieldOffset, "memptr.offset");
// Cast the address to the appropriate pointer type, adopting the address
// space of the base pointer.
return Builder.CreateBitCast(Addr, PType);
}
llvm::Value *
MicrosoftCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
const CastExpr *E,
llvm::Value *Src) {
assert(E->getCastKind() == CK_DerivedToBaseMemberPointer ||
E->getCastKind() == CK_BaseToDerivedMemberPointer ||
E->getCastKind() == CK_ReinterpretMemberPointer);
// Use constant emission if we can.
if (isa<llvm::Constant>(Src))
return EmitMemberPointerConversion(E, cast<llvm::Constant>(Src));
// We may be adding or dropping fields from the member pointer, so we need
// both types and the inheritance models of both records.
const MemberPointerType *SrcTy =
E->getSubExpr()->getType()->castAs<MemberPointerType>();
const MemberPointerType *DstTy = E->getType()->castAs<MemberPointerType>();
bool IsFunc = SrcTy->isMemberFunctionPointer();
// If the classes use the same null representation, reinterpret_cast is a nop.
bool IsReinterpret = E->getCastKind() == CK_ReinterpretMemberPointer;
if (IsReinterpret && IsFunc)
return Src;
CXXRecordDecl *SrcRD = SrcTy->getMostRecentCXXRecordDecl();
CXXRecordDecl *DstRD = DstTy->getMostRecentCXXRecordDecl();
if (IsReinterpret &&
SrcRD->nullFieldOffsetIsZero() == DstRD->nullFieldOffsetIsZero())
return Src;
CGBuilderTy &Builder = CGF.Builder;
// Branch past the conversion if Src is null.
llvm::Value *IsNotNull = EmitMemberPointerIsNotNull(CGF, Src, SrcTy);
llvm::Constant *DstNull = EmitNullMemberPointer(DstTy);
// C++ 5.2.10p9: The null member pointer value is converted to the null member
// pointer value of the destination type.
if (IsReinterpret) {
// For reinterpret casts, sema ensures that src and dst are both functions
// or data and have the same size, which means the LLVM types should match.
assert(Src->getType() == DstNull->getType());
return Builder.CreateSelect(IsNotNull, Src, DstNull);
}
llvm::BasicBlock *OriginalBB = Builder.GetInsertBlock();
llvm::BasicBlock *ConvertBB = CGF.createBasicBlock("memptr.convert");
llvm::BasicBlock *ContinueBB = CGF.createBasicBlock("memptr.converted");
Builder.CreateCondBr(IsNotNull, ConvertBB, ContinueBB);
CGF.EmitBlock(ConvertBB);
llvm::Value *Dst = EmitNonNullMemberPointerConversion(
SrcTy, DstTy, E->getCastKind(), E->path_begin(), E->path_end(), Src,
Builder);
Builder.CreateBr(ContinueBB);
// In the continuation, choose between DstNull and Dst.
CGF.EmitBlock(ContinueBB);
llvm::PHINode *Phi = Builder.CreatePHI(DstNull->getType(), 2, "memptr.converted");
Phi->addIncoming(DstNull, OriginalBB);
Phi->addIncoming(Dst, ConvertBB);
return Phi;
}
llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion(
const MemberPointerType *SrcTy, const MemberPointerType *DstTy, CastKind CK,
CastExpr::path_const_iterator PathBegin,
CastExpr::path_const_iterator PathEnd, llvm::Value *Src,
CGBuilderTy &Builder) {
const CXXRecordDecl *SrcRD = SrcTy->getMostRecentCXXRecordDecl();
const CXXRecordDecl *DstRD = DstTy->getMostRecentCXXRecordDecl();
MSInheritanceAttr::Spelling SrcInheritance = SrcRD->getMSInheritanceModel();
MSInheritanceAttr::Spelling DstInheritance = DstRD->getMSInheritanceModel();
bool IsFunc = SrcTy->isMemberFunctionPointer();
bool IsConstant = isa<llvm::Constant>(Src);
// Decompose src.
llvm::Value *FirstField = Src;
llvm::Value *NonVirtualBaseAdjustment = getZeroInt();
llvm::Value *VirtualBaseAdjustmentOffset = getZeroInt();
llvm::Value *VBPtrOffset = getZeroInt();
if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) {
// We need to extract values.
unsigned I = 0;
FirstField = Builder.CreateExtractValue(Src, I++);
if (MSInheritanceAttr::hasNVOffsetField(IsFunc, SrcInheritance))
NonVirtualBaseAdjustment = Builder.CreateExtractValue(Src, I++);
if (MSInheritanceAttr::hasVBPtrOffsetField(SrcInheritance))
VBPtrOffset = Builder.CreateExtractValue(Src, I++);
if (MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance))
VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(Src, I++);
}
bool IsDerivedToBase = (CK == CK_DerivedToBaseMemberPointer);
const MemberPointerType *DerivedTy = IsDerivedToBase ? SrcTy : DstTy;
const CXXRecordDecl *DerivedClass = DerivedTy->getMostRecentCXXRecordDecl();
// For data pointers, we adjust the field offset directly. For functions, we
// have a separate field.
llvm::Value *&NVAdjustField = IsFunc ? NonVirtualBaseAdjustment : FirstField;
// The virtual inheritance model has a quirk: the virtual base table is always
// referenced when dereferencing a member pointer even if the member pointer
// is non-virtual. This is accounted for by adjusting the non-virtual offset
// to point backwards to the top of the MDC from the first VBase. Undo this
// adjustment to normalize the member pointer.
llvm::Value *SrcVBIndexEqZero =
Builder.CreateICmpEQ(VirtualBaseAdjustmentOffset, getZeroInt());
if (SrcInheritance == MSInheritanceAttr::Keyword_virtual_inheritance) {
if (int64_t SrcOffsetToFirstVBase =
getContext().getOffsetOfBaseWithVBPtr(SrcRD).getQuantity()) {
llvm::Value *UndoSrcAdjustment = Builder.CreateSelect(
SrcVBIndexEqZero,
llvm::ConstantInt::get(CGM.IntTy, SrcOffsetToFirstVBase),
getZeroInt());
NVAdjustField = Builder.CreateNSWAdd(NVAdjustField, UndoSrcAdjustment);
}
}
// A non-zero vbindex implies that we are dealing with a source member in a
// floating virtual base in addition to some non-virtual offset. If the
// vbindex is zero, we are dealing with a source that exists in a non-virtual,
// fixed, base. The difference between these two cases is that the vbindex +
// nvoffset *always* point to the member regardless of what context they are
// evaluated in so long as the vbindex is adjusted. A member inside a fixed
// base requires explicit nv adjustment.
llvm::Constant *BaseClassOffset = llvm::ConstantInt::get(
CGM.IntTy,
CGM.computeNonVirtualBaseClassOffset(DerivedClass, PathBegin, PathEnd)
.getQuantity());
llvm::Value *NVDisp;
if (IsDerivedToBase)
NVDisp = Builder.CreateNSWSub(NVAdjustField, BaseClassOffset, "adj");
else
NVDisp = Builder.CreateNSWAdd(NVAdjustField, BaseClassOffset, "adj");
NVAdjustField = Builder.CreateSelect(SrcVBIndexEqZero, NVDisp, getZeroInt());
// Update the vbindex to an appropriate value in the destination because
// SrcRD's vbtable might not be a strict prefix of the one in DstRD.
llvm::Value *DstVBIndexEqZero = SrcVBIndexEqZero;
if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance) &&
MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance)) {
if (llvm::GlobalVariable *VDispMap =
getAddrOfVirtualDisplacementMap(SrcRD, DstRD)) {
llvm::Value *VBIndex = Builder.CreateExactUDiv(
VirtualBaseAdjustmentOffset, llvm::ConstantInt::get(CGM.IntTy, 4));
if (IsConstant) {
llvm::Constant *Mapping = VDispMap->getInitializer();
VirtualBaseAdjustmentOffset =
Mapping->getAggregateElement(cast<llvm::Constant>(VBIndex));
} else {
llvm::Value *Idxs[] = {getZeroInt(), VBIndex};
VirtualBaseAdjustmentOffset =
Builder.CreateAlignedLoad(Builder.CreateInBoundsGEP(VDispMap, Idxs),
CharUnits::fromQuantity(4));
}
DstVBIndexEqZero =
Builder.CreateICmpEQ(VirtualBaseAdjustmentOffset, getZeroInt());
}
}
// Set the VBPtrOffset to zero if the vbindex is zero. Otherwise, initialize
// it to the offset of the vbptr.
if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance)) {
llvm::Value *DstVBPtrOffset = llvm::ConstantInt::get(
CGM.IntTy,
getContext().getASTRecordLayout(DstRD).getVBPtrOffset().getQuantity());
VBPtrOffset =
Builder.CreateSelect(DstVBIndexEqZero, getZeroInt(), DstVBPtrOffset);
}
// Likewise, apply a similar adjustment so that dereferencing the member
// pointer correctly accounts for the distance between the start of the first
// virtual base and the top of the MDC.
if (DstInheritance == MSInheritanceAttr::Keyword_virtual_inheritance) {
if (int64_t DstOffsetToFirstVBase =
getContext().getOffsetOfBaseWithVBPtr(DstRD).getQuantity()) {
llvm::Value *DoDstAdjustment = Builder.CreateSelect(
DstVBIndexEqZero,
llvm::ConstantInt::get(CGM.IntTy, DstOffsetToFirstVBase),
getZeroInt());
NVAdjustField = Builder.CreateNSWSub(NVAdjustField, DoDstAdjustment);
}
}
// Recompose dst from the null struct and the adjusted fields from src.
llvm::Value *Dst;
if (MSInheritanceAttr::hasOnlyOneField(IsFunc, DstInheritance)) {
Dst = FirstField;
} else {
Dst = llvm::UndefValue::get(ConvertMemberPointerType(DstTy));
unsigned Idx = 0;
Dst = Builder.CreateInsertValue(Dst, FirstField, Idx++);
if (MSInheritanceAttr::hasNVOffsetField(IsFunc, DstInheritance))
Dst = Builder.CreateInsertValue(Dst, NonVirtualBaseAdjustment, Idx++);
if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance))
Dst = Builder.CreateInsertValue(Dst, VBPtrOffset, Idx++);
if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance))
Dst = Builder.CreateInsertValue(Dst, VirtualBaseAdjustmentOffset, Idx++);
}
return Dst;
}
llvm::Constant *
MicrosoftCXXABI::EmitMemberPointerConversion(const CastExpr *E,
llvm::Constant *Src) {
const MemberPointerType *SrcTy =
E->getSubExpr()->getType()->castAs<MemberPointerType>();
const MemberPointerType *DstTy = E->getType()->castAs<MemberPointerType>();
CastKind CK = E->getCastKind();
return EmitMemberPointerConversion(SrcTy, DstTy, CK, E->path_begin(),
E->path_end(), Src);
}
llvm::Constant *MicrosoftCXXABI::EmitMemberPointerConversion(
const MemberPointerType *SrcTy, const MemberPointerType *DstTy, CastKind CK,
CastExpr::path_const_iterator PathBegin,
CastExpr::path_const_iterator PathEnd, llvm::Constant *Src) {
assert(CK == CK_DerivedToBaseMemberPointer ||
CK == CK_BaseToDerivedMemberPointer ||
CK == CK_ReinterpretMemberPointer);
// If src is null, emit a new null for dst. We can't return src because dst
// might have a new representation.
if (MemberPointerConstantIsNull(SrcTy, Src))
return EmitNullMemberPointer(DstTy);
// We don't need to do anything for reinterpret_casts of non-null member
// pointers. We should only get here when the two type representations have
// the same size.
if (CK == CK_ReinterpretMemberPointer)
return Src;
CGBuilderTy Builder(CGM, CGM.getLLVMContext());
auto *Dst = cast<llvm::Constant>(EmitNonNullMemberPointerConversion(
SrcTy, DstTy, CK, PathBegin, PathEnd, Src, Builder));
return Dst;
}
CGCallee MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(
CodeGenFunction &CGF, const Expr *E, Address This,
llvm::Value *&ThisPtrForCall, llvm::Value *MemPtr,
const MemberPointerType *MPT) {
assert(MPT->isMemberFunctionPointer());
const FunctionProtoType *FPT =
MPT->getPointeeType()->castAs<FunctionProtoType>();
const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr));
CGBuilderTy &Builder = CGF.Builder;
MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
// Extract the fields we need, regardless of model. We'll apply them if we
// have them.
llvm::Value *FunctionPointer = MemPtr;
llvm::Value *NonVirtualBaseAdjustment = nullptr;
llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
llvm::Value *VBPtrOffset = nullptr;
if (MemPtr->getType()->isStructTy()) {
// We need to extract values.
unsigned I = 0;
FunctionPointer = Builder.CreateExtractValue(MemPtr, I++);
if (MSInheritanceAttr::hasNVOffsetField(MPT, Inheritance))
NonVirtualBaseAdjustment = Builder.CreateExtractValue(MemPtr, I++);
if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
VBPtrOffset = Builder.CreateExtractValue(MemPtr, I++);
if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++);
}
if (VirtualBaseAdjustmentOffset) {
ThisPtrForCall = AdjustVirtualBase(CGF, E, RD, This,
VirtualBaseAdjustmentOffset, VBPtrOffset);
} else {
ThisPtrForCall = This.getPointer();
}
if (NonVirtualBaseAdjustment) {
// Apply the adjustment and cast back to the original struct type.
llvm::Value *Ptr = Builder.CreateBitCast(ThisPtrForCall, CGF.Int8PtrTy);
Ptr = Builder.CreateInBoundsGEP(Ptr, NonVirtualBaseAdjustment);
ThisPtrForCall = Builder.CreateBitCast(Ptr, ThisPtrForCall->getType(),
"this.adjusted");
}
FunctionPointer =
Builder.CreateBitCast(FunctionPointer, FTy->getPointerTo());
CGCallee Callee(FPT, FunctionPointer);
return Callee;
}
CGCXXABI *clang::CodeGen::CreateMicrosoftCXXABI(CodeGenModule &CGM) {
return new MicrosoftCXXABI(CGM);
}
// MS RTTI Overview:
// The run time type information emitted by cl.exe contains 5 distinct types of
// structures. Many of them reference each other.
//
// TypeInfo: Static classes that are returned by typeid.
//
// CompleteObjectLocator: Referenced by vftables. They contain information
// required for dynamic casting, including OffsetFromTop. They also contain
// a reference to the TypeInfo for the type and a reference to the
// CompleteHierarchyDescriptor for the type.
//
// ClassHieararchyDescriptor: Contains information about a class hierarchy.
// Used during dynamic_cast to walk a class hierarchy. References a base
// class array and the size of said array.
//
// BaseClassArray: Contains a list of classes in a hierarchy. BaseClassArray is
// somewhat of a misnomer because the most derived class is also in the list
// as well as multiple copies of virtual bases (if they occur multiple times
// in the hiearchy.) The BaseClassArray contains one BaseClassDescriptor for
// every path in the hierarchy, in pre-order depth first order. Note, we do
// not declare a specific llvm type for BaseClassArray, it's merely an array
// of BaseClassDescriptor pointers.
//
// BaseClassDescriptor: Contains information about a class in a class hierarchy.
// BaseClassDescriptor is also somewhat of a misnomer for the same reason that
// BaseClassArray is. It contains information about a class within a
// hierarchy such as: is this base is ambiguous and what is its offset in the
// vbtable. The names of the BaseClassDescriptors have all of their fields
// mangled into them so they can be aggressively deduplicated by the linker.
static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) {
StringRef MangledName("\01??_7type_info@@6B@");
if (auto VTable = CGM.getModule().getNamedGlobal(MangledName))
return VTable;
return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
/*Constant=*/true,
llvm::GlobalVariable::ExternalLinkage,
/*Initializer=*/nullptr, MangledName);
}
namespace {
/// \brief A Helper struct that stores information about a class in a class
/// hierarchy. The information stored in these structs struct is used during
/// the generation of ClassHierarchyDescriptors and BaseClassDescriptors.
// During RTTI creation, MSRTTIClasses are stored in a contiguous array with
// implicit depth first pre-order tree connectivity. getFirstChild and
// getNextSibling allow us to walk the tree efficiently.
struct MSRTTIClass {
enum {
IsPrivateOnPath = 1 | 8,
IsAmbiguous = 2,
IsPrivate = 4,
IsVirtual = 16,
HasHierarchyDescriptor = 64
};
MSRTTIClass(const CXXRecordDecl *RD) : RD(RD) {}
uint32_t initialize(const MSRTTIClass *Parent,
const CXXBaseSpecifier *Specifier);
MSRTTIClass *getFirstChild() { return this + 1; }
static MSRTTIClass *getNextChild(MSRTTIClass *Child) {
return Child + 1 + Child->NumBases;
}
const CXXRecordDecl *RD, *VirtualRoot;
uint32_t Flags, NumBases, OffsetInVBase;
};
/// \brief Recursively initialize the base class array.
uint32_t MSRTTIClass::initialize(const MSRTTIClass *Parent,
const CXXBaseSpecifier *Specifier) {
Flags = HasHierarchyDescriptor;
if (!Parent) {
VirtualRoot = nullptr;
OffsetInVBase = 0;
} else {
if (Specifier->getAccessSpecifier() != AS_public)
Flags |= IsPrivate | IsPrivateOnPath;
if (Specifier->isVirtual()) {
Flags |= IsVirtual;
VirtualRoot = RD;
OffsetInVBase = 0;
} else {
if (Parent->Flags & IsPrivateOnPath)
Flags |= IsPrivateOnPath;
VirtualRoot = Parent->VirtualRoot;
OffsetInVBase = Parent->OffsetInVBase + RD->getASTContext()
.getASTRecordLayout(Parent->RD).getBaseClassOffset(RD).getQuantity();
}
}
NumBases = 0;
MSRTTIClass *Child = getFirstChild();
for (const CXXBaseSpecifier &Base : RD->bases()) {
NumBases += Child->initialize(this, &Base) + 1;
Child = getNextChild(Child);
}
return NumBases;
}
static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) {
switch (Ty->getLinkage()) {
case NoLinkage:
case InternalLinkage:
case UniqueExternalLinkage:
return llvm::GlobalValue::InternalLinkage;
case VisibleNoLinkage:
case ModuleInternalLinkage:
case ModuleLinkage:
case ExternalLinkage:
return llvm::GlobalValue::LinkOnceODRLinkage;
}
llvm_unreachable("Invalid linkage!");
}
/// \brief An ephemeral helper class for building MS RTTI types. It caches some
/// calls to the module and information about the most derived class in a
/// hierarchy.
struct MSRTTIBuilder {
enum {
HasBranchingHierarchy = 1,
HasVirtualBranchingHierarchy = 2,
HasAmbiguousBases = 4
};
MSRTTIBuilder(MicrosoftCXXABI &ABI, const CXXRecordDecl *RD)
: CGM(ABI.CGM), Context(CGM.getContext()),
VMContext(CGM.getLLVMContext()), Module(CGM.getModule()), RD(RD),
Linkage(getLinkageForRTTI(CGM.getContext().getTagDeclType(RD))),
ABI(ABI) {}
llvm::GlobalVariable *getBaseClassDescriptor(const MSRTTIClass &Classes);
llvm::GlobalVariable *
getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes);
llvm::GlobalVariable *getClassHierarchyDescriptor();
llvm::GlobalVariable *getCompleteObjectLocator(const VPtrInfo &Info);
CodeGenModule &CGM;
ASTContext &Context;
llvm::LLVMContext &VMContext;
llvm::Module &Module;
const CXXRecordDecl *RD;
llvm::GlobalVariable::LinkageTypes Linkage;
MicrosoftCXXABI &ABI;
};
} // namespace
/// \brief Recursively serializes a class hierarchy in pre-order depth first
/// order.
static void serializeClassHierarchy(SmallVectorImpl<MSRTTIClass> &Classes,
const CXXRecordDecl *RD) {
Classes.push_back(MSRTTIClass(RD));
for (const CXXBaseSpecifier &Base : RD->bases())
serializeClassHierarchy(Classes, Base.getType()->getAsCXXRecordDecl());
}
/// \brief Find ambiguity among base classes.
static void
detectAmbiguousBases(SmallVectorImpl<MSRTTIClass> &Classes) {
llvm::SmallPtrSet<const CXXRecordDecl *, 8> VirtualBases;
llvm::SmallPtrSet<const CXXRecordDecl *, 8> UniqueBases;
llvm::SmallPtrSet<const CXXRecordDecl *, 8> AmbiguousBases;
for (MSRTTIClass *Class = &Classes.front(); Class <= &Classes.back();) {
if ((Class->Flags & MSRTTIClass::IsVirtual) &&
!VirtualBases.insert(Class->RD).second) {
Class = MSRTTIClass::getNextChild(Class);
continue;
}
if (!UniqueBases.insert(Class->RD).second)
AmbiguousBases.insert(Class->RD);
Class++;
}
if (AmbiguousBases.empty())
return;
for (MSRTTIClass &Class : Classes)
if (AmbiguousBases.count(Class.RD))
Class.Flags |= MSRTTIClass::IsAmbiguous;
}
llvm::GlobalVariable *MSRTTIBuilder::getClassHierarchyDescriptor() {
SmallString<256> MangledName;
{
llvm::raw_svector_ostream Out(MangledName);
ABI.getMangleContext().mangleCXXRTTIClassHierarchyDescriptor(RD, Out);
}
// Check to see if we've already declared this ClassHierarchyDescriptor.
if (auto CHD = Module.getNamedGlobal(MangledName))
return CHD;
// Serialize the class hierarchy and initialize the CHD Fields.
SmallVector<MSRTTIClass, 8> Classes;
serializeClassHierarchy(Classes, RD);
Classes.front().initialize(/*Parent=*/nullptr, /*Specifier=*/nullptr);
detectAmbiguousBases(Classes);
int Flags = 0;
for (auto Class : Classes) {
if (Class.RD->getNumBases() > 1)
Flags |= HasBranchingHierarchy;
// Note: cl.exe does not calculate "HasAmbiguousBases" correctly. We
// believe the field isn't actually used.
if (Class.Flags & MSRTTIClass::IsAmbiguous)
Flags |= HasAmbiguousBases;
}
if ((Flags & HasBranchingHierarchy) && RD->getNumVBases() != 0)
Flags |= HasVirtualBranchingHierarchy;
// These gep indices are used to get the address of the first element of the
// base class array.
llvm::Value *GEPIndices[] = {llvm::ConstantInt::get(CGM.IntTy, 0),
llvm::ConstantInt::get(CGM.IntTy, 0)};
// Forward-declare the class hierarchy descriptor
auto Type = ABI.getClassHierarchyDescriptorType();
auto CHD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
/*Initializer=*/nullptr,
MangledName);
if (CHD->isWeakForLinker())
CHD->setComdat(CGM.getModule().getOrInsertComdat(CHD->getName()));
auto *Bases = getBaseClassArray(Classes);
// Initialize the base class ClassHierarchyDescriptor.
llvm::Constant *Fields[] = {
llvm::ConstantInt::get(CGM.IntTy, 0), // reserved by the runtime
llvm::ConstantInt::get(CGM.IntTy, Flags),
llvm::ConstantInt::get(CGM.IntTy, Classes.size()),
ABI.getImageRelativeConstant(llvm::ConstantExpr::getInBoundsGetElementPtr(
Bases->getValueType(), Bases,
llvm::ArrayRef<llvm::Value *>(GEPIndices))),
};
CHD->setInitializer(llvm::ConstantStruct::get(Type, Fields));
return CHD;
}
llvm::GlobalVariable *
MSRTTIBuilder::getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes) {
SmallString<256> MangledName;
{
llvm::raw_svector_ostream Out(MangledName);
ABI.getMangleContext().mangleCXXRTTIBaseClassArray(RD, Out);
}
// Forward-declare the base class array.
// cl.exe pads the base class array with 1 (in 32 bit mode) or 4 (in 64 bit
// mode) bytes of padding. We provide a pointer sized amount of padding by
// adding +1 to Classes.size(). The sections have pointer alignment and are
// marked pick-any so it shouldn't matter.
llvm::Type *PtrType = ABI.getImageRelativeType(
ABI.getBaseClassDescriptorType()->getPointerTo());
auto *ArrType = llvm::ArrayType::get(PtrType, Classes.size() + 1);
auto *BCA =
new llvm::GlobalVariable(Module, ArrType,
/*Constant=*/true, Linkage,
/*Initializer=*/nullptr, MangledName);
if (BCA->isWeakForLinker())
BCA->setComdat(CGM.getModule().getOrInsertComdat(BCA->getName()));
// Initialize the BaseClassArray.
SmallVector<llvm::Constant *, 8> BaseClassArrayData;
for (MSRTTIClass &Class : Classes)
BaseClassArrayData.push_back(
ABI.getImageRelativeConstant(getBaseClassDescriptor(Class)));
BaseClassArrayData.push_back(llvm::Constant::getNullValue(PtrType));
BCA->setInitializer(llvm::ConstantArray::get(ArrType, BaseClassArrayData));
return BCA;
}
llvm::GlobalVariable *
MSRTTIBuilder::getBaseClassDescriptor(const MSRTTIClass &Class) {
// Compute the fields for the BaseClassDescriptor. They are computed up front
// because they are mangled into the name of the object.
uint32_t OffsetInVBTable = 0;
int32_t VBPtrOffset = -1;
if (Class.VirtualRoot) {
auto &VTableContext = CGM.getMicrosoftVTableContext();
OffsetInVBTable = VTableContext.getVBTableIndex(RD, Class.VirtualRoot) * 4;
VBPtrOffset = Context.getASTRecordLayout(RD).getVBPtrOffset().getQuantity();
}
SmallString<256> MangledName;
{
llvm::raw_svector_ostream Out(MangledName);
ABI.getMangleContext().mangleCXXRTTIBaseClassDescriptor(
Class.RD, Class.OffsetInVBase, VBPtrOffset, OffsetInVBTable,
Class.Flags, Out);
}
// Check to see if we've already declared this object.
if (auto BCD = Module.getNamedGlobal(MangledName))
return BCD;
// Forward-declare the base class descriptor.
auto Type = ABI.getBaseClassDescriptorType();
auto BCD =
new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
/*Initializer=*/nullptr, MangledName);
if (BCD->isWeakForLinker())
BCD->setComdat(CGM.getModule().getOrInsertComdat(BCD->getName()));
// Initialize the BaseClassDescriptor.
llvm::Constant *Fields[] = {
ABI.getImageRelativeConstant(
ABI.getAddrOfRTTIDescriptor(Context.getTypeDeclType(Class.RD))),
llvm::ConstantInt::get(CGM.IntTy, Class.NumBases),
llvm::ConstantInt::get(CGM.IntTy, Class.OffsetInVBase),
llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset),
llvm::ConstantInt::get(CGM.IntTy, OffsetInVBTable),
llvm::ConstantInt::get(CGM.IntTy, Class.Flags),
ABI.getImageRelativeConstant(
MSRTTIBuilder(ABI, Class.RD).getClassHierarchyDescriptor()),
};
BCD->setInitializer(llvm::ConstantStruct::get(Type, Fields));
return BCD;
}
llvm::GlobalVariable *
MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo &Info) {
SmallString<256> MangledName;
{
llvm::raw_svector_ostream Out(MangledName);
ABI.getMangleContext().mangleCXXRTTICompleteObjectLocator(RD, Info.MangledPath, Out);
}
// Check to see if we've already computed this complete object locator.
if (auto COL = Module.getNamedGlobal(MangledName))
return COL;
// Compute the fields of the complete object locator.
int OffsetToTop = Info.FullOffsetInMDC.getQuantity();
int VFPtrOffset = 0;
// The offset includes the vtordisp if one exists.
if (const CXXRecordDecl *VBase = Info.getVBaseWithVPtr())
if (Context.getASTRecordLayout(RD)
.getVBaseOffsetsMap()
.find(VBase)
->second.hasVtorDisp())
VFPtrOffset = Info.NonVirtualOffset.getQuantity() + 4;
// Forward-declare the complete object locator.
llvm::StructType *Type = ABI.getCompleteObjectLocatorType();
auto COL = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
/*Initializer=*/nullptr, MangledName);
// Initialize the CompleteObjectLocator.
llvm::Constant *Fields[] = {
llvm::ConstantInt::get(CGM.IntTy, ABI.isImageRelative()),
llvm::ConstantInt::get(CGM.IntTy, OffsetToTop),
llvm::ConstantInt::get(CGM.IntTy, VFPtrOffset),
ABI.getImageRelativeConstant(
CGM.GetAddrOfRTTIDescriptor(Context.getTypeDeclType(RD))),
ABI.getImageRelativeConstant(getClassHierarchyDescriptor()),
ABI.getImageRelativeConstant(COL),
};
llvm::ArrayRef<llvm::Constant *> FieldsRef(Fields);
if (!ABI.isImageRelative())
FieldsRef = FieldsRef.drop_back();
COL->setInitializer(llvm::ConstantStruct::get(Type, FieldsRef));
if (COL->isWeakForLinker())
COL->setComdat(CGM.getModule().getOrInsertComdat(COL->getName()));
return COL;
}
static QualType decomposeTypeForEH(ASTContext &Context, QualType T,
bool &IsConst, bool &IsVolatile,
bool &IsUnaligned) {
T = Context.getExceptionObjectType(T);
// C++14 [except.handle]p3:
// A handler is a match for an exception object of type E if [...]
// - the handler is of type cv T or const T& where T is a pointer type and
// E is a pointer type that can be converted to T by [...]
// - a qualification conversion
IsConst = false;
IsVolatile = false;
IsUnaligned = false;
QualType PointeeType = T->getPointeeType();
if (!PointeeType.isNull()) {
IsConst = PointeeType.isConstQualified();
IsVolatile = PointeeType.isVolatileQualified();
IsUnaligned = PointeeType.getQualifiers().hasUnaligned();
}
// Member pointer types like "const int A::*" are represented by having RTTI
// for "int A::*" and separately storing the const qualifier.
if (const auto *MPTy = T->getAs<MemberPointerType>())
T = Context.getMemberPointerType(PointeeType.getUnqualifiedType(),
MPTy->getClass());
// Pointer types like "const int * const *" are represented by having RTTI
// for "const int **" and separately storing the const qualifier.
if (T->isPointerType())
T = Context.getPointerType(PointeeType.getUnqualifiedType());
return T;
}
CatchTypeInfo
MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
QualType CatchHandlerType) {
// TypeDescriptors for exceptions never have qualified pointer types,
// qualifiers are stored separately in order to support qualification
// conversions.
bool IsConst, IsVolatile, IsUnaligned;
Type =
decomposeTypeForEH(getContext(), Type, IsConst, IsVolatile, IsUnaligned);
bool IsReference = CatchHandlerType->isReferenceType();
uint32_t Flags = 0;
if (IsConst)
Flags |= 1;
if (IsVolatile)
Flags |= 2;
if (IsUnaligned)
Flags |= 4;
if (IsReference)
Flags |= 8;
return CatchTypeInfo{getAddrOfRTTIDescriptor(Type)->stripPointerCasts(),
Flags};
}
/// \brief Gets a TypeDescriptor. Returns a llvm::Constant * rather than a
/// llvm::GlobalVariable * because different type descriptors have different
/// types, and need to be abstracted. They are abstracting by casting the
/// address to an Int8PtrTy.
llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
SmallString<256> MangledName;
{
llvm::raw_svector_ostream Out(MangledName);
getMangleContext().mangleCXXRTTI(Type, Out);
}
// Check to see if we've already declared this TypeDescriptor.
if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
// Note for the future: If we would ever like to do deferred emission of
// RTTI, check if emitting vtables opportunistically need any adjustment.
// Compute the fields for the TypeDescriptor.
SmallString<256> TypeInfoString;
{
llvm::raw_svector_ostream Out(TypeInfoString);
getMangleContext().mangleCXXRTTIName(Type, Out);
}
// Declare and initialize the TypeDescriptor.
llvm::Constant *Fields[] = {
getTypeInfoVTable(CGM), // VFPtr
llvm::ConstantPointerNull::get(CGM.Int8PtrTy), // Runtime data
llvm::ConstantDataArray::getString(CGM.getLLVMContext(), TypeInfoString)};
llvm::StructType *TypeDescriptorType =
getTypeDescriptorType(TypeInfoString);
auto *Var = new llvm::GlobalVariable(
CGM.getModule(), TypeDescriptorType, /*Constant=*/false,
getLinkageForRTTI(Type),
llvm::ConstantStruct::get(TypeDescriptorType, Fields),
MangledName);
if (Var->isWeakForLinker())
Var->setComdat(CGM.getModule().getOrInsertComdat(Var->getName()));
return llvm::ConstantExpr::getBitCast(Var, CGM.Int8PtrTy);
}
/// \brief Gets or a creates a Microsoft CompleteObjectLocator.
llvm::GlobalVariable *
MicrosoftCXXABI::getMSCompleteObjectLocator(const CXXRecordDecl *RD,
const VPtrInfo &Info) {
return MSRTTIBuilder(*this, RD).getCompleteObjectLocator(Info);
}
static void emitCXXConstructor(CodeGenModule &CGM,
const CXXConstructorDecl *ctor,
StructorType ctorType) {
// There are no constructor variants, always emit the complete destructor.
llvm::Function *Fn = CGM.codegenCXXStructor(ctor, StructorType::Complete);
CGM.maybeSetTrivialComdat(*ctor, *Fn);
}
static void emitCXXDestructor(CodeGenModule &CGM, const CXXDestructorDecl *dtor,
StructorType dtorType) {
// The complete destructor is equivalent to the base destructor for
// classes with no virtual bases, so try to emit it as an alias.
if (!dtor->getParent()->getNumVBases() &&
(dtorType == StructorType::Complete || dtorType == StructorType::Base)) {
bool ProducedAlias = !CGM.TryEmitDefinitionAsAlias(
GlobalDecl(dtor, Dtor_Complete), GlobalDecl(dtor, Dtor_Base), true);
if (ProducedAlias) {
if (dtorType == StructorType::Complete)
return;
if (dtor->isVirtual())
CGM.getVTables().EmitThunks(GlobalDecl(dtor, Dtor_Complete));
}
}
// The base destructor is equivalent to the base destructor of its
// base class if there is exactly one non-virtual base class with a
// non-trivial destructor, there are no fields with a non-trivial
// destructor, and the body of the destructor is trivial.
if (dtorType == StructorType::Base && !CGM.TryEmitBaseDestructorAsAlias(dtor))
return;
llvm::Function *Fn = CGM.codegenCXXStructor(dtor, dtorType);
if (Fn->isWeakForLinker())
Fn->setComdat(CGM.getModule().getOrInsertComdat(Fn->getName()));
}
void MicrosoftCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
StructorType Type) {
if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
emitCXXConstructor(CGM, CD, Type);
return;
}
emitCXXDestructor(CGM, cast<CXXDestructorDecl>(MD), Type);
}
llvm::Function *
MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
CXXCtorType CT) {
assert(CT == Ctor_CopyingClosure || CT == Ctor_DefaultClosure);
// Calculate the mangled name.
SmallString<256> ThunkName;
llvm::raw_svector_ostream Out(ThunkName);
getMangleContext().mangleCXXCtor(CD, CT, Out);
// If the thunk has been generated previously, just return it.
if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
return cast<llvm::Function>(GV);
// Create the llvm::Function.
const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeMSCtorClosure(CD, CT);
llvm::FunctionType *ThunkTy = CGM.getTypes().GetFunctionType(FnInfo);
const CXXRecordDecl *RD = CD->getParent();
QualType RecordTy = getContext().getRecordType(RD);
llvm::Function *ThunkFn = llvm::Function::Create(
ThunkTy, getLinkageForRTTI(RecordTy), ThunkName.str(), &CGM.getModule());
ThunkFn->setCallingConv(static_cast<llvm::CallingConv::ID>(
FnInfo.getEffectiveCallingConvention()));
if (ThunkFn->isWeakForLinker())
ThunkFn->setComdat(CGM.getModule().getOrInsertComdat(ThunkFn->getName()));
bool IsCopy = CT == Ctor_CopyingClosure;
// Start codegen.
CodeGenFunction CGF(CGM);
CGF.CurGD = GlobalDecl(CD, Ctor_Complete);
// Build FunctionArgs.
FunctionArgList FunctionArgs;
// A constructor always starts with a 'this' pointer as its first argument.
buildThisParam(CGF, FunctionArgs);
// Following the 'this' pointer is a reference to the source object that we
// are copying from.
ImplicitParamDecl SrcParam(
getContext(), /*DC=*/nullptr, SourceLocation(),
&getContext().Idents.get("src"),
getContext().getLValueReferenceType(RecordTy,
/*SpelledAsLValue=*/true),
ImplicitParamDecl::Other);
if (IsCopy)
FunctionArgs.push_back(&SrcParam);
// Constructors for classes which utilize virtual bases have an additional
// parameter which indicates whether or not it is being delegated to by a more
// derived constructor.
ImplicitParamDecl IsMostDerived(getContext(), /*DC=*/nullptr,
SourceLocation(),
&getContext().Idents.get("is_most_derived"),
getContext().IntTy, ImplicitParamDecl::Other);
// Only add the parameter to the list if thie class has virtual bases.
if (RD->getNumVBases() > 0)
FunctionArgs.push_back(&IsMostDerived);
// Start defining the function.
auto NL = ApplyDebugLocation::CreateEmpty(CGF);
CGF.StartFunction(GlobalDecl(), FnInfo.getReturnType(), ThunkFn, FnInfo,
FunctionArgs, CD->getLocation(), SourceLocation());
// Create a scope with an artificial location for the body of this function.
auto AL = ApplyDebugLocation::CreateArtificial(CGF);
EmitThisParam(CGF);
llvm::Value *This = getThisValue(CGF);
llvm::Value *SrcVal =
IsCopy ? CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&SrcParam), "src")
: nullptr;
CallArgList Args;
// Push the this ptr.
Args.add(RValue::get(This), CD->getThisType(getContext()));
// Push the src ptr.
if (SrcVal)
Args.add(RValue::get(SrcVal), SrcParam.getType());
// Add the rest of the default arguments.
SmallVector<const Stmt *, 4> ArgVec;
ArrayRef<ParmVarDecl *> params = CD->parameters().drop_front(IsCopy ? 1 : 0);
for (const ParmVarDecl *PD : params) {
assert(PD->hasDefaultArg() && "ctor closure lacks default args");
ArgVec.push_back(PD->getDefaultArg());
}
CodeGenFunction::RunCleanupsScope Cleanups(CGF);
const auto *FPT = CD->getType()->castAs<FunctionProtoType>();
CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0);
// Insert any ABI-specific implicit constructor arguments.
AddedStructorArgs ExtraArgs =
addImplicitConstructorArgs(CGF, CD, Ctor_Complete,
/*ForVirtualBase=*/false,
/*Delegating=*/false, Args);
// Call the destructor with our arguments.
llvm::Constant *CalleePtr =
CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
CGCallee Callee = CGCallee::forDirect(CalleePtr, CD);
const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall(
Args, CD, Ctor_Complete, ExtraArgs.Prefix, ExtraArgs.Suffix);
CGF.EmitCall(CalleeInfo, Callee, ReturnValueSlot(), Args);
Cleanups.ForceCleanup();
// Emit the ret instruction, remove any temporary instructions created for the
// aid of CodeGen.
CGF.FinishFunction(SourceLocation());
return ThunkFn;
}
llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T,
uint32_t NVOffset,
int32_t VBPtrOffset,
uint32_t VBIndex) {
assert(!T->isReferenceType());
CXXRecordDecl *RD = T->getAsCXXRecordDecl();
const CXXConstructorDecl *CD =
RD ? CGM.getContext().getCopyConstructorForExceptionObject(RD) : nullptr;
CXXCtorType CT = Ctor_Complete;
if (CD)
if (!hasDefaultCXXMethodCC(getContext(), CD) || CD->getNumParams() != 1)
CT = Ctor_CopyingClosure;
uint32_t Size = getContext().getTypeSizeInChars(T).getQuantity();
SmallString<256> MangledName;
{
llvm::raw_svector_ostream Out(MangledName);
getMangleContext().mangleCXXCatchableType(T, CD, CT, Size, NVOffset,
VBPtrOffset, VBIndex, Out);
}
if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
return getImageRelativeConstant(GV);
// The TypeDescriptor is used by the runtime to determine if a catch handler
// is appropriate for the exception object.
llvm::Constant *TD = getImageRelativeConstant(getAddrOfRTTIDescriptor(T));
// The runtime is responsible for calling the copy constructor if the
// exception is caught by value.
llvm::Constant *CopyCtor;
if (CD) {
if (CT == Ctor_CopyingClosure)
CopyCtor = getAddrOfCXXCtorClosure(CD, Ctor_CopyingClosure);
else
CopyCtor = CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
CopyCtor = llvm::ConstantExpr::getBitCast(CopyCtor, CGM.Int8PtrTy);
} else {
CopyCtor = llvm::Constant::getNullValue(CGM.Int8PtrTy);
}
CopyCtor = getImageRelativeConstant(CopyCtor);
bool IsScalar = !RD;
bool HasVirtualBases = false;
bool IsStdBadAlloc = false; // std::bad_alloc is special for some reason.
QualType PointeeType = T;
if (T->isPointerType())
PointeeType = T->getPointeeType();
if (const CXXRecordDecl *RD = PointeeType->getAsCXXRecordDecl()) {
HasVirtualBases = RD->getNumVBases() > 0;
if (IdentifierInfo *II = RD->getIdentifier())
IsStdBadAlloc = II->isStr("bad_alloc") && RD->isInStdNamespace();
}
// Encode the relevant CatchableType properties into the Flags bitfield.
// FIXME: Figure out how bits 2 or 8 can get set.
uint32_t Flags = 0;
if (IsScalar)
Flags |= 1;
if (HasVirtualBases)
Flags |= 4;
if (IsStdBadAlloc)
Flags |= 16;
llvm::Constant *Fields[] = {
llvm::ConstantInt::get(CGM.IntTy, Flags), // Flags
TD, // TypeDescriptor
llvm::ConstantInt::get(CGM.IntTy, NVOffset), // NonVirtualAdjustment
llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset), // OffsetToVBPtr
llvm::ConstantInt::get(CGM.IntTy, VBIndex), // VBTableIndex
llvm::ConstantInt::get(CGM.IntTy, Size), // Size
CopyCtor // CopyCtor
};
llvm::StructType *CTType = getCatchableTypeType();
auto *GV = new llvm::GlobalVariable(
CGM.getModule(), CTType, /*Constant=*/true, getLinkageForRTTI(T),
llvm::ConstantStruct::get(CTType, Fields), MangledName);
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
GV->setSection(".xdata");
if (GV->isWeakForLinker())
GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName()));
return getImageRelativeConstant(GV);
}
llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) {
assert(!T->isReferenceType());
// See if we've already generated a CatchableTypeArray for this type before.
llvm::GlobalVariable *&CTA = CatchableTypeArrays[T];
if (CTA)
return CTA;
// Ensure that we don't have duplicate entries in our CatchableTypeArray by
// using a SmallSetVector. Duplicates may arise due to virtual bases
// occurring more than once in the hierarchy.
llvm::SmallSetVector<llvm::Constant *, 2> CatchableTypes;
// C++14 [except.handle]p3:
// A handler is a match for an exception object of type E if [...]
// - the handler is of type cv T or cv T& and T is an unambiguous public
// base class of E, or
// - the handler is of type cv T or const T& where T is a pointer type and
// E is a pointer type that can be converted to T by [...]
// - a standard pointer conversion (4.10) not involving conversions to
// pointers to private or protected or ambiguous classes
const CXXRecordDecl *MostDerivedClass = nullptr;
bool IsPointer = T->isPointerType();
if (IsPointer)
MostDerivedClass = T->getPointeeType()->getAsCXXRecordDecl();
else
MostDerivedClass = T->getAsCXXRecordDecl();
// Collect all the unambiguous public bases of the MostDerivedClass.
if (MostDerivedClass) {
const ASTContext &Context = getContext();
const ASTRecordLayout &MostDerivedLayout =
Context.getASTRecordLayout(MostDerivedClass);
MicrosoftVTableContext &VTableContext = CGM.getMicrosoftVTableContext();
SmallVector<MSRTTIClass, 8> Classes;
serializeClassHierarchy(Classes, MostDerivedClass);
Classes.front().initialize(/*Parent=*/nullptr, /*Specifier=*/nullptr);
detectAmbiguousBases(Classes);
for (const MSRTTIClass &Class : Classes) {
// Skip any ambiguous or private bases.
if (Class.Flags &
(MSRTTIClass::IsPrivateOnPath | MSRTTIClass::IsAmbiguous))
continue;
// Write down how to convert from a derived pointer to a base pointer.
uint32_t OffsetInVBTable = 0;
int32_t VBPtrOffset = -1;
if (Class.VirtualRoot) {
OffsetInVBTable =
VTableContext.getVBTableIndex(MostDerivedClass, Class.VirtualRoot)*4;
VBPtrOffset = MostDerivedLayout.getVBPtrOffset().getQuantity();
}
// Turn our record back into a pointer if the exception object is a
// pointer.
QualType RTTITy = QualType(Class.RD->getTypeForDecl(), 0);
if (IsPointer)
RTTITy = Context.getPointerType(RTTITy);
CatchableTypes.insert(getCatchableType(RTTITy, Class.OffsetInVBase,
VBPtrOffset, OffsetInVBTable));
}
}
// C++14 [except.handle]p3:
// A handler is a match for an exception object of type E if
// - The handler is of type cv T or cv T& and E and T are the same type
// (ignoring the top-level cv-qualifiers)
CatchableTypes.insert(getCatchableType(T));
// C++14 [except.handle]p3:
// A handler is a match for an exception object of type E if
// - the handler is of type cv T or const T& where T is a pointer type and
// E is a pointer type that can be converted to T by [...]
// - a standard pointer conversion (4.10) not involving conversions to
// pointers to private or protected or ambiguous classes
//
// C++14 [conv.ptr]p2:
// A prvalue of type "pointer to cv T," where T is an object type, can be
// converted to a prvalue of type "pointer to cv void".
if (IsPointer && T->getPointeeType()->isObjectType())
CatchableTypes.insert(getCatchableType(getContext().VoidPtrTy));
// C++14 [except.handle]p3:
// A handler is a match for an exception object of type E if [...]
// - the handler is of type cv T or const T& where T is a pointer or
// pointer to member type and E is std::nullptr_t.
//
// We cannot possibly list all possible pointer types here, making this
// implementation incompatible with the standard. However, MSVC includes an
// entry for pointer-to-void in this case. Let's do the same.
if (T->isNullPtrType())
CatchableTypes.insert(getCatchableType(getContext().VoidPtrTy));
uint32_t NumEntries = CatchableTypes.size();
llvm::Type *CTType =
getImageRelativeType(getCatchableTypeType()->getPointerTo());
llvm::ArrayType *AT = llvm::ArrayType::get(CTType, NumEntries);
llvm::StructType *CTAType = getCatchableTypeArrayType(NumEntries);
llvm::Constant *Fields[] = {
llvm::ConstantInt::get(CGM.IntTy, NumEntries), // NumEntries
llvm::ConstantArray::get(
AT, llvm::makeArrayRef(CatchableTypes.begin(),
CatchableTypes.end())) // CatchableTypes
};
SmallString<256> MangledName;
{
llvm::raw_svector_ostream Out(MangledName);
getMangleContext().mangleCXXCatchableTypeArray(T, NumEntries, Out);
}
CTA = new llvm::GlobalVariable(
CGM.getModule(), CTAType, /*Constant=*/true, getLinkageForRTTI(T),
llvm::ConstantStruct::get(CTAType, Fields), MangledName);
CTA->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
CTA->setSection(".xdata");
if (CTA->isWeakForLinker())
CTA->setComdat(CGM.getModule().getOrInsertComdat(CTA->getName()));
return CTA;
}
llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
bool IsConst, IsVolatile, IsUnaligned;
T = decomposeTypeForEH(getContext(), T, IsConst, IsVolatile, IsUnaligned);
// The CatchableTypeArray enumerates the various (CV-unqualified) types that
// the exception object may be caught as.
llvm::GlobalVariable *CTA = getCatchableTypeArray(T);
// The first field in a CatchableTypeArray is the number of CatchableTypes.
// This is used as a component of the mangled name which means that we need to
// know what it is in order to see if we have previously generated the
// ThrowInfo.
uint32_t NumEntries =
cast<llvm::ConstantInt>(CTA->getInitializer()->getAggregateElement(0U))
->getLimitedValue();
SmallString<256> MangledName;
{
llvm::raw_svector_ostream Out(MangledName);
getMangleContext().mangleCXXThrowInfo(T, IsConst, IsVolatile, IsUnaligned,
NumEntries, Out);
}
// Reuse a previously generated ThrowInfo if we have generated an appropriate
// one before.
if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
return GV;
// The RTTI TypeDescriptor uses an unqualified type but catch clauses must
// be at least as CV qualified. Encode this requirement into the Flags
// bitfield.
uint32_t Flags = 0;
if (IsConst)
Flags |= 1;
if (IsVolatile)
Flags |= 2;
if (IsUnaligned)
Flags |= 4;
// The cleanup-function (a destructor) must be called when the exception
// object's lifetime ends.
llvm::Constant *CleanupFn = llvm::Constant::getNullValue(CGM.Int8PtrTy);
if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
if (CXXDestructorDecl *DtorD = RD->getDestructor())
if (!DtorD->isTrivial())
CleanupFn = llvm::ConstantExpr::getBitCast(
CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete),
CGM.Int8PtrTy);
// This is unused as far as we can tell, initialize it to null.
llvm::Constant *ForwardCompat =
getImageRelativeConstant(llvm::Constant::getNullValue(CGM.Int8PtrTy));
llvm::Constant *PointerToCatchableTypes = getImageRelativeConstant(
llvm::ConstantExpr::getBitCast(CTA, CGM.Int8PtrTy));
llvm::StructType *TIType = getThrowInfoType();
llvm::Constant *Fields[] = {
llvm::ConstantInt::get(CGM.IntTy, Flags), // Flags
getImageRelativeConstant(CleanupFn), // CleanupFn
ForwardCompat, // ForwardCompat
PointerToCatchableTypes // CatchableTypeArray
};
auto *GV = new llvm::GlobalVariable(
CGM.getModule(), TIType, /*Constant=*/true, getLinkageForRTTI(T),
llvm::ConstantStruct::get(TIType, Fields), StringRef(MangledName));
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
GV->setSection(".xdata");
if (GV->isWeakForLinker())
GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName()));
return GV;
}
void MicrosoftCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
const Expr *SubExpr = E->getSubExpr();
QualType ThrowType = SubExpr->getType();
// The exception object lives on the stack and it's address is passed to the
// runtime function.
Address AI = CGF.CreateMemTemp(ThrowType);
CGF.EmitAnyExprToMem(SubExpr, AI, ThrowType.getQualifiers(),
/*IsInit=*/true);
// The so-called ThrowInfo is used to describe how the exception object may be
// caught.
llvm::GlobalVariable *TI = getThrowInfo(ThrowType);
// Call into the runtime to throw the exception.
llvm::Value *Args[] = {
CGF.Builder.CreateBitCast(AI.getPointer(), CGM.Int8PtrTy),
TI
};
CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(), Args);
}
Index: head/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp (revision 322855)
@@ -1,2028 +1,2033 @@
//===--- Darwin.cpp - Darwin Tool and ToolChain Implementations -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Darwin.h"
#include "Arch/ARM.h"
#include "CommonArgs.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/VirtualFileSystem.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/TargetParser.h"
#include <cstdlib> // ::getenv
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;
llvm::Triple::ArchType darwin::getArchTypeForMachOArchName(StringRef Str) {
// See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
// archs which Darwin doesn't use.
// The matching this routine does is fairly pointless, since it is neither the
// complete architecture list, nor a reasonable subset. The problem is that
// historically the driver driver accepts this and also ties its -march=
// handling to the architecture name, so we need to be careful before removing
// support for it.
// This code must be kept in sync with Clang's Darwin specific argument
// translation.
return llvm::StringSwitch<llvm::Triple::ArchType>(Str)
.Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", llvm::Triple::ppc)
.Cases("ppc750", "ppc7400", "ppc7450", "ppc970", llvm::Triple::ppc)
.Case("ppc64", llvm::Triple::ppc64)
.Cases("i386", "i486", "i486SX", "i586", "i686", llvm::Triple::x86)
.Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4",
llvm::Triple::x86)
.Cases("x86_64", "x86_64h", llvm::Triple::x86_64)
// This is derived from the driver driver.
.Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm)
.Cases("armv7", "armv7em", "armv7k", "armv7m", llvm::Triple::arm)
.Cases("armv7s", "xscale", llvm::Triple::arm)
.Case("arm64", llvm::Triple::aarch64)
.Case("r600", llvm::Triple::r600)
.Case("amdgcn", llvm::Triple::amdgcn)
.Case("nvptx", llvm::Triple::nvptx)
.Case("nvptx64", llvm::Triple::nvptx64)
.Case("amdil", llvm::Triple::amdil)
.Case("spir", llvm::Triple::spir)
.Default(llvm::Triple::UnknownArch);
}
void darwin::setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str) {
const llvm::Triple::ArchType Arch = getArchTypeForMachOArchName(Str);
unsigned ArchKind = llvm::ARM::parseArch(Str);
T.setArch(Arch);
if (Str == "x86_64h")
T.setArchName(Str);
else if (ArchKind == llvm::ARM::AK_ARMV6M ||
ArchKind == llvm::ARM::AK_ARMV7M ||
ArchKind == llvm::ARM::AK_ARMV7EM) {
T.setOS(llvm::Triple::UnknownOS);
T.setObjectFormat(llvm::Triple::MachO);
}
}
void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
assert(Inputs.size() == 1 && "Unexpected number of inputs.");
const InputInfo &Input = Inputs[0];
// Determine the original source input.
const Action *SourceAction = &JA;
while (SourceAction->getKind() != Action::InputClass) {
assert(!SourceAction->getInputs().empty() && "unexpected root action!");
SourceAction = SourceAction->getInputs()[0];
}
// If -fno-integrated-as is used add -Q to the darwin assember driver to make
// sure it runs its system assembler not clang's integrated assembler.
// Applicable to darwin11+ and Xcode 4+. darwin<10 lacked integrated-as.
// FIXME: at run-time detect assembler capabilities or rely on version
// information forwarded by -target-assembler-version.
if (Args.hasArg(options::OPT_fno_integrated_as)) {
const llvm::Triple &T(getToolChain().getTriple());
if (!(T.isMacOSX() && T.isMacOSXVersionLT(10, 7)))
CmdArgs.push_back("-Q");
}
// Forward -g, assuming we are dealing with an actual assembly file.
if (SourceAction->getType() == types::TY_Asm ||
SourceAction->getType() == types::TY_PP_Asm) {
if (Args.hasArg(options::OPT_gstabs))
CmdArgs.push_back("--gstabs");
else if (Args.hasArg(options::OPT_g_Group))
CmdArgs.push_back("-g");
}
// Derived from asm spec.
AddMachOArch(Args, CmdArgs);
// Use -force_cpusubtype_ALL on x86 by default.
if (getToolChain().getArch() == llvm::Triple::x86 ||
getToolChain().getArch() == llvm::Triple::x86_64 ||
Args.hasArg(options::OPT_force__cpusubtype__ALL))
CmdArgs.push_back("-force_cpusubtype_ALL");
if (getToolChain().getArch() != llvm::Triple::x86_64 &&
(((Args.hasArg(options::OPT_mkernel) ||
Args.hasArg(options::OPT_fapple_kext)) &&
getMachOToolChain().isKernelStatic()) ||
Args.hasArg(options::OPT_static)))
CmdArgs.push_back("-static");
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
assert(Output.isFilename() && "Unexpected lipo output.");
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
assert(Input.isFilename() && "Invalid input.");
CmdArgs.push_back(Input.getFilename());
// asm_final spec is empty.
const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
}
void darwin::MachOTool::anchor() {}
void darwin::MachOTool::AddMachOArch(const ArgList &Args,
ArgStringList &CmdArgs) const {
StringRef ArchName = getMachOToolChain().getMachOArchName(Args);
// Derived from darwin_arch spec.
CmdArgs.push_back("-arch");
CmdArgs.push_back(Args.MakeArgString(ArchName));
// FIXME: Is this needed anymore?
if (ArchName == "arm")
CmdArgs.push_back("-force_cpusubtype_ALL");
}
bool darwin::Linker::NeedsTempPath(const InputInfoList &Inputs) const {
// We only need to generate a temp path for LTO if we aren't compiling object
// files. When compiling source files, we run 'dsymutil' after linking. We
// don't run 'dsymutil' when compiling object files.
for (const auto &Input : Inputs)
if (Input.getType() != types::TY_Object)
return true;
return false;
}
/// \brief Pass -no_deduplicate to ld64 under certain conditions:
///
/// - Either -O0 or -O1 is explicitly specified
/// - No -O option is specified *and* this is a compile+link (implicit -O0)
///
/// Also do *not* add -no_deduplicate when no -O option is specified and this
/// is just a link (we can't imply -O0)
static bool shouldLinkerNotDedup(bool IsLinkerOnlyAction, const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
if (A->getOption().matches(options::OPT_O0))
return true;
if (A->getOption().matches(options::OPT_O))
return llvm::StringSwitch<bool>(A->getValue())
.Case("1", true)
.Default(false);
return false; // OPT_Ofast & OPT_O4
}
if (!IsLinkerOnlyAction) // Implicit -O0 for compile+linker only.
return true;
return false;
}
void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args,
ArgStringList &CmdArgs,
const InputInfoList &Inputs) const {
const Driver &D = getToolChain().getDriver();
const toolchains::MachO &MachOTC = getMachOToolChain();
unsigned Version[5] = {0, 0, 0, 0, 0};
if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
if (!Driver::GetReleaseVersion(A->getValue(), Version))
D.Diag(diag::err_drv_invalid_version_number) << A->getAsString(Args);
}
// Newer linkers support -demangle. Pass it if supported and not disabled by
// the user.
if (Version[0] >= 100 && !Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
CmdArgs.push_back("-demangle");
if (Args.hasArg(options::OPT_rdynamic) && Version[0] >= 137)
CmdArgs.push_back("-export_dynamic");
// If we are using App Extension restrictions, pass a flag to the linker
// telling it that the compiled code has been audited.
if (Args.hasFlag(options::OPT_fapplication_extension,
options::OPT_fno_application_extension, false))
CmdArgs.push_back("-application_extension");
if (D.isUsingLTO()) {
// If we are using LTO, then automatically create a temporary file path for
// the linker to use, so that it's lifetime will extend past a possible
// dsymutil step.
if (Version[0] >= 116 && NeedsTempPath(Inputs)) {
const char *TmpPath = C.getArgs().MakeArgString(
D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object)));
C.addTempFile(TmpPath);
CmdArgs.push_back("-object_path_lto");
CmdArgs.push_back(TmpPath);
}
}
// Use -lto_library option to specify the libLTO.dylib path. Try to find
// it in clang installed libraries. ld64 will only look at this argument
// when it actually uses LTO, so libLTO.dylib only needs to exist at link
// time if ld64 decides that it needs to use LTO.
// Since this is passed unconditionally, ld64 will never look for libLTO.dylib
// next to it. That's ok since ld64 using a libLTO.dylib not matching the
// clang version won't work anyways.
if (Version[0] >= 133) {
// Search for libLTO in <InstalledDir>/../lib/libLTO.dylib
StringRef P = llvm::sys::path::parent_path(D.Dir);
SmallString<128> LibLTOPath(P);
llvm::sys::path::append(LibLTOPath, "lib");
llvm::sys::path::append(LibLTOPath, "libLTO.dylib");
CmdArgs.push_back("-lto_library");
CmdArgs.push_back(C.getArgs().MakeArgString(LibLTOPath));
}
// ld64 version 262 and above run the deduplicate pass by default.
if (Version[0] >= 262 && shouldLinkerNotDedup(C.getJobs().empty(), Args))
CmdArgs.push_back("-no_deduplicate");
// Derived from the "link" spec.
Args.AddAllArgs(CmdArgs, options::OPT_static);
if (!Args.hasArg(options::OPT_static))
CmdArgs.push_back("-dynamic");
if (Args.hasArg(options::OPT_fgnu_runtime)) {
// FIXME: gcc replaces -lobjc in forward args with -lobjc-gnu
// here. How do we wish to handle such things?
}
if (!Args.hasArg(options::OPT_dynamiclib)) {
AddMachOArch(Args, CmdArgs);
// FIXME: Why do this only on this path?
Args.AddLastArg(CmdArgs, options::OPT_force__cpusubtype__ALL);
Args.AddLastArg(CmdArgs, options::OPT_bundle);
Args.AddAllArgs(CmdArgs, options::OPT_bundle__loader);
Args.AddAllArgs(CmdArgs, options::OPT_client__name);
Arg *A;
if ((A = Args.getLastArg(options::OPT_compatibility__version)) ||
(A = Args.getLastArg(options::OPT_current__version)) ||
(A = Args.getLastArg(options::OPT_install__name)))
D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args)
<< "-dynamiclib";
Args.AddLastArg(CmdArgs, options::OPT_force__flat__namespace);
Args.AddLastArg(CmdArgs, options::OPT_keep__private__externs);
Args.AddLastArg(CmdArgs, options::OPT_private__bundle);
} else {
CmdArgs.push_back("-dylib");
Arg *A;
if ((A = Args.getLastArg(options::OPT_bundle)) ||
(A = Args.getLastArg(options::OPT_bundle__loader)) ||
(A = Args.getLastArg(options::OPT_client__name)) ||
(A = Args.getLastArg(options::OPT_force__flat__namespace)) ||
(A = Args.getLastArg(options::OPT_keep__private__externs)) ||
(A = Args.getLastArg(options::OPT_private__bundle)))
D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
<< "-dynamiclib";
Args.AddAllArgsTranslated(CmdArgs, options::OPT_compatibility__version,
"-dylib_compatibility_version");
Args.AddAllArgsTranslated(CmdArgs, options::OPT_current__version,
"-dylib_current_version");
AddMachOArch(Args, CmdArgs);
Args.AddAllArgsTranslated(CmdArgs, options::OPT_install__name,
"-dylib_install_name");
}
Args.AddLastArg(CmdArgs, options::OPT_all__load);
Args.AddAllArgs(CmdArgs, options::OPT_allowable__client);
Args.AddLastArg(CmdArgs, options::OPT_bind__at__load);
if (MachOTC.isTargetIOSBased())
Args.AddLastArg(CmdArgs, options::OPT_arch__errors__fatal);
Args.AddLastArg(CmdArgs, options::OPT_dead__strip);
Args.AddLastArg(CmdArgs, options::OPT_no__dead__strip__inits__and__terms);
Args.AddAllArgs(CmdArgs, options::OPT_dylib__file);
Args.AddLastArg(CmdArgs, options::OPT_dynamic);
Args.AddAllArgs(CmdArgs, options::OPT_exported__symbols__list);
Args.AddLastArg(CmdArgs, options::OPT_flat__namespace);
Args.AddAllArgs(CmdArgs, options::OPT_force__load);
Args.AddAllArgs(CmdArgs, options::OPT_headerpad__max__install__names);
Args.AddAllArgs(CmdArgs, options::OPT_image__base);
Args.AddAllArgs(CmdArgs, options::OPT_init);
// Add the deployment target.
MachOTC.addMinVersionArgs(Args, CmdArgs);
Args.AddLastArg(CmdArgs, options::OPT_nomultidefs);
Args.AddLastArg(CmdArgs, options::OPT_multi__module);
Args.AddLastArg(CmdArgs, options::OPT_single__module);
Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined);
Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined__unused);
if (const Arg *A =
Args.getLastArg(options::OPT_fpie, options::OPT_fPIE,
options::OPT_fno_pie, options::OPT_fno_PIE)) {
if (A->getOption().matches(options::OPT_fpie) ||
A->getOption().matches(options::OPT_fPIE))
CmdArgs.push_back("-pie");
else
CmdArgs.push_back("-no_pie");
}
// for embed-bitcode, use -bitcode_bundle in linker command
if (C.getDriver().embedBitcodeEnabled()) {
// Check if the toolchain supports bitcode build flow.
if (MachOTC.SupportsEmbeddedBitcode()) {
CmdArgs.push_back("-bitcode_bundle");
if (C.getDriver().embedBitcodeMarkerOnly() && Version[0] >= 278) {
CmdArgs.push_back("-bitcode_process_mode");
CmdArgs.push_back("marker");
}
} else
D.Diag(diag::err_drv_bitcode_unsupported_on_toolchain);
}
Args.AddLastArg(CmdArgs, options::OPT_prebind);
Args.AddLastArg(CmdArgs, options::OPT_noprebind);
Args.AddLastArg(CmdArgs, options::OPT_nofixprebinding);
Args.AddLastArg(CmdArgs, options::OPT_prebind__all__twolevel__modules);
Args.AddLastArg(CmdArgs, options::OPT_read__only__relocs);
Args.AddAllArgs(CmdArgs, options::OPT_sectcreate);
Args.AddAllArgs(CmdArgs, options::OPT_sectorder);
Args.AddAllArgs(CmdArgs, options::OPT_seg1addr);
Args.AddAllArgs(CmdArgs, options::OPT_segprot);
Args.AddAllArgs(CmdArgs, options::OPT_segaddr);
Args.AddAllArgs(CmdArgs, options::OPT_segs__read__only__addr);
Args.AddAllArgs(CmdArgs, options::OPT_segs__read__write__addr);
Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table);
Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table__filename);
Args.AddAllArgs(CmdArgs, options::OPT_sub__library);
Args.AddAllArgs(CmdArgs, options::OPT_sub__umbrella);
// Give --sysroot= preference, over the Apple specific behavior to also use
// --isysroot as the syslibroot.
StringRef sysroot = C.getSysRoot();
if (sysroot != "") {
CmdArgs.push_back("-syslibroot");
CmdArgs.push_back(C.getArgs().MakeArgString(sysroot));
} else if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
CmdArgs.push_back("-syslibroot");
CmdArgs.push_back(A->getValue());
}
Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace);
Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace__hints);
Args.AddAllArgs(CmdArgs, options::OPT_umbrella);
Args.AddAllArgs(CmdArgs, options::OPT_undefined);
Args.AddAllArgs(CmdArgs, options::OPT_unexported__symbols__list);
Args.AddAllArgs(CmdArgs, options::OPT_weak__reference__mismatches);
Args.AddLastArg(CmdArgs, options::OPT_X_Flag);
Args.AddAllArgs(CmdArgs, options::OPT_y);
Args.AddLastArg(CmdArgs, options::OPT_w);
Args.AddAllArgs(CmdArgs, options::OPT_pagezero__size);
Args.AddAllArgs(CmdArgs, options::OPT_segs__read__);
Args.AddLastArg(CmdArgs, options::OPT_seglinkedit);
Args.AddLastArg(CmdArgs, options::OPT_noseglinkedit);
Args.AddAllArgs(CmdArgs, options::OPT_sectalign);
Args.AddAllArgs(CmdArgs, options::OPT_sectobjectsymbols);
Args.AddAllArgs(CmdArgs, options::OPT_segcreate);
Args.AddLastArg(CmdArgs, options::OPT_whyload);
Args.AddLastArg(CmdArgs, options::OPT_whatsloaded);
Args.AddAllArgs(CmdArgs, options::OPT_dylinker__install__name);
Args.AddLastArg(CmdArgs, options::OPT_dylinker);
Args.AddLastArg(CmdArgs, options::OPT_Mach);
}
/// \brief Determine whether we are linking the ObjC runtime.
static bool isObjCRuntimeLinked(const ArgList &Args) {
if (isObjCAutoRefCount(Args)) {
Args.ClaimAllArgs(options::OPT_fobjc_link_runtime);
return true;
}
return Args.hasArg(options::OPT_fobjc_link_runtime);
}
void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
assert(Output.getType() == types::TY_Image && "Invalid linker output type.");
// If the number of arguments surpasses the system limits, we will encode the
// input files in a separate file, shortening the command line. To this end,
// build a list of input file names that can be passed via a file with the
// -filelist linker option.
llvm::opt::ArgStringList InputFileList;
// The logic here is derived from gcc's behavior; most of which
// comes from specs (starting with link_command). Consult gcc for
// more information.
ArgStringList CmdArgs;
/// Hack(tm) to ignore linking errors when we are doing ARC migration.
if (Args.hasArg(options::OPT_ccc_arcmt_check,
options::OPT_ccc_arcmt_migrate)) {
for (const auto &Arg : Args)
Arg->claim();
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("touch"));
CmdArgs.push_back(Output.getFilename());
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, None));
return;
}
// I'm not sure why this particular decomposition exists in gcc, but
// we follow suite for ease of comparison.
AddLinkArgs(C, Args, CmdArgs, Inputs);
// For LTO, pass the name of the optimization record file.
if (Args.hasFlag(options::OPT_fsave_optimization_record,
options::OPT_fno_save_optimization_record, false)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-lto-pass-remarks-output");
CmdArgs.push_back("-mllvm");
SmallString<128> F;
F = Output.getFilename();
F += ".opt.yaml";
CmdArgs.push_back(Args.MakeArgString(F));
if (getLastProfileUseArg(Args)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-lto-pass-remarks-with-hotness");
}
}
// It seems that the 'e' option is completely ignored for dynamic executables
// (the default), and with static executables, the last one wins, as expected.
Args.AddAllArgs(CmdArgs, {options::OPT_d_Flag, options::OPT_s, options::OPT_t,
options::OPT_Z_Flag, options::OPT_u_Group,
options::OPT_e, options::OPT_r});
// Forward -ObjC when either -ObjC or -ObjC++ is used, to force loading
// members of static archive libraries which implement Objective-C classes or
// categories.
if (Args.hasArg(options::OPT_ObjC) || Args.hasArg(options::OPT_ObjCXX))
CmdArgs.push_back("-ObjC");
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles))
getMachOToolChain().addStartObjectFileArgs(Args, CmdArgs);
// SafeStack requires its own runtime libraries
// These libraries should be linked first, to make sure the
// __safestack_init constructor executes before everything else
if (getToolChain().getSanitizerArgs().needsSafeStackRt()) {
getMachOToolChain().AddLinkRuntimeLib(Args, CmdArgs,
"libclang_rt.safestack_osx.a",
/*AlwaysLink=*/true);
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
// Build the input file for -filelist (list of linker input files) in case we
// need it later
for (const auto &II : Inputs) {
if (!II.isFilename()) {
// This is a linker input argument.
// We cannot mix input arguments and file names in a -filelist input, thus
// we prematurely stop our list (remaining files shall be passed as
// arguments).
if (InputFileList.size() > 0)
break;
continue;
}
InputFileList.push_back(II.getFilename());
}
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs))
addOpenMPRuntime(CmdArgs, getToolChain(), Args);
if (isObjCRuntimeLinked(Args) &&
!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
// We use arclite library for both ARC and subscripting support.
getMachOToolChain().AddLinkARCArgs(Args, CmdArgs);
CmdArgs.push_back("-framework");
CmdArgs.push_back("Foundation");
// Link libobj.
CmdArgs.push_back("-lobjc");
}
if (LinkingOutput) {
CmdArgs.push_back("-arch_multiple");
CmdArgs.push_back("-final_output");
CmdArgs.push_back(LinkingOutput);
}
if (Args.hasArg(options::OPT_fnested_functions))
CmdArgs.push_back("-allow_stack_execute");
getMachOToolChain().addProfileRTLibs(Args, CmdArgs);
if (unsigned Parallelism =
getLTOParallelism(Args, getToolChain().getDriver())) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(
Args.MakeArgString(Twine("-threads=") + llvm::to_string(Parallelism)));
}
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
if (getToolChain().getDriver().CCCIsCXX())
getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
// link_ssp spec is empty.
// Let the tool chain choose which runtime library to link.
getMachOToolChain().AddLinkRuntimeLibArgs(Args, CmdArgs);
// No need to do anything for pthreads. Claim argument to avoid warning.
Args.ClaimAllArgs(options::OPT_pthread);
Args.ClaimAllArgs(options::OPT_pthreads);
}
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
// endfile_spec is empty.
}
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_F);
// -iframework should be forwarded as -F.
for (const Arg *A : Args.filtered(options::OPT_iframework))
CmdArgs.push_back(Args.MakeArgString(std::string("-F") + A->getValue()));
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
if (Arg *A = Args.getLastArg(options::OPT_fveclib)) {
if (A->getValue() == StringRef("Accelerate")) {
CmdArgs.push_back("-framework");
CmdArgs.push_back("Accelerate");
}
}
}
const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
std::unique_ptr<Command> Cmd =
llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs);
Cmd->setInputFileList(std::move(InputFileList));
C.addCommand(std::move(Cmd));
}
void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
CmdArgs.push_back("-create");
assert(Output.isFilename() && "Unexpected lipo output.");
CmdArgs.push_back("-output");
CmdArgs.push_back(Output.getFilename());
for (const auto &II : Inputs) {
assert(II.isFilename() && "Unexpected lipo input.");
CmdArgs.push_back(II.getFilename());
}
const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("lipo"));
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
}
void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
const InputInfo &Input = Inputs[0];
assert(Input.isFilename() && "Unexpected dsymutil input.");
CmdArgs.push_back(Input.getFilename());
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("dsymutil"));
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
}
void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
CmdArgs.push_back("--verify");
CmdArgs.push_back("--debug-info");
CmdArgs.push_back("--eh-frame");
CmdArgs.push_back("--quiet");
assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
const InputInfo &Input = Inputs[0];
assert(Input.isFilename() && "Unexpected verify input");
// Grabbing the output of the earlier dsymutil run.
CmdArgs.push_back(Input.getFilename());
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump"));
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
}
MachO::MachO(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
: ToolChain(D, Triple, Args) {
// We expect 'as', 'ld', etc. to be adjacent to our install dir.
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
}
/// Darwin - Darwin tool chain for i386 and x86_64.
Darwin::Darwin(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
: MachO(D, Triple, Args), TargetInitialized(false),
CudaInstallation(D, Triple, Args) {}
types::ID MachO::LookupTypeForExtension(StringRef Ext) const {
types::ID Ty = types::lookupTypeForExtension(Ext);
// Darwin always preprocesses assembly files (unless -x is used explicitly).
if (Ty == types::TY_PP_Asm)
return types::TY_Asm;
return Ty;
}
bool MachO::HasNativeLLVMSupport() const { return true; }
ToolChain::CXXStdlibType Darwin::GetDefaultCXXStdlibType() const {
// Default to use libc++ on OS X 10.9+ and iOS 7+.
if ((isTargetMacOS() && !isMacosxVersionLT(10, 9)) ||
(isTargetIOSBased() && !isIPhoneOSVersionLT(7, 0)) ||
isTargetWatchOSBased())
return ToolChain::CST_Libcxx;
return ToolChain::CST_Libstdcxx;
}
/// Darwin provides an ARC runtime starting in MacOS X 10.7 and iOS 5.0.
ObjCRuntime Darwin::getDefaultObjCRuntime(bool isNonFragile) const {
if (isTargetWatchOSBased())
return ObjCRuntime(ObjCRuntime::WatchOS, TargetVersion);
if (isTargetIOSBased())
return ObjCRuntime(ObjCRuntime::iOS, TargetVersion);
if (isNonFragile)
return ObjCRuntime(ObjCRuntime::MacOSX, TargetVersion);
return ObjCRuntime(ObjCRuntime::FragileMacOSX, TargetVersion);
}
/// Darwin provides a blocks runtime starting in MacOS X 10.6 and iOS 3.2.
bool Darwin::hasBlocksRuntime() const {
if (isTargetWatchOSBased())
return true;
else if (isTargetIOSBased())
return !isIPhoneOSVersionLT(3, 2);
else {
assert(isTargetMacOS() && "unexpected darwin target");
return !isMacosxVersionLT(10, 6);
}
}
void Darwin::AddCudaIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
}
// This is just a MachO name translation routine and there's no
// way to join this into ARMTargetParser without breaking all
// other assumptions. Maybe MachO should consider standardising
// their nomenclature.
static const char *ArmMachOArchName(StringRef Arch) {
return llvm::StringSwitch<const char *>(Arch)
.Case("armv6k", "armv6")
.Case("armv6m", "armv6m")
.Case("armv5tej", "armv5")
.Case("xscale", "xscale")
.Case("armv4t", "armv4t")
.Case("armv7", "armv7")
.Cases("armv7a", "armv7-a", "armv7")
.Cases("armv7r", "armv7-r", "armv7")
.Cases("armv7em", "armv7e-m", "armv7em")
.Cases("armv7k", "armv7-k", "armv7k")
.Cases("armv7m", "armv7-m", "armv7m")
.Cases("armv7s", "armv7-s", "armv7s")
.Default(nullptr);
}
static const char *ArmMachOArchNameCPU(StringRef CPU) {
unsigned ArchKind = llvm::ARM::parseCPUArch(CPU);
if (ArchKind == llvm::ARM::AK_INVALID)
return nullptr;
StringRef Arch = llvm::ARM::getArchName(ArchKind);
// FIXME: Make sure this MachO triple mangling is really necessary.
// ARMv5* normalises to ARMv5.
if (Arch.startswith("armv5"))
Arch = Arch.substr(0, 5);
// ARMv6*, except ARMv6M, normalises to ARMv6.
else if (Arch.startswith("armv6") && !Arch.endswith("6m"))
Arch = Arch.substr(0, 5);
// ARMv7A normalises to ARMv7.
else if (Arch.endswith("v7a"))
Arch = Arch.substr(0, 5);
return Arch.data();
}
StringRef MachO::getMachOArchName(const ArgList &Args) const {
switch (getTriple().getArch()) {
default:
return getDefaultUniversalArchName();
case llvm::Triple::aarch64:
return "arm64";
case llvm::Triple::thumb:
case llvm::Triple::arm:
if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ))
if (const char *Arch = ArmMachOArchName(A->getValue()))
return Arch;
if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
if (const char *Arch = ArmMachOArchNameCPU(A->getValue()))
return Arch;
return "arm";
}
}
Darwin::~Darwin() {}
MachO::~MachO() {}
std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
types::ID InputType) const {
llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
// If the target isn't initialized (e.g., an unknown Darwin platform, return
// the default triple).
if (!isTargetInitialized())
return Triple.getTriple();
SmallString<16> Str;
if (isTargetWatchOSBased())
Str += "watchos";
else if (isTargetTvOSBased())
Str += "tvos";
else if (isTargetIOSBased())
Str += "ios";
else
Str += "macosx";
Str += getTargetVersion().getAsString();
Triple.setOSName(Str);
return Triple.getTriple();
}
Tool *MachO::getTool(Action::ActionClass AC) const {
switch (AC) {
case Action::LipoJobClass:
if (!Lipo)
Lipo.reset(new tools::darwin::Lipo(*this));
return Lipo.get();
case Action::DsymutilJobClass:
if (!Dsymutil)
Dsymutil.reset(new tools::darwin::Dsymutil(*this));
return Dsymutil.get();
case Action::VerifyDebugInfoJobClass:
if (!VerifyDebug)
VerifyDebug.reset(new tools::darwin::VerifyDebug(*this));
return VerifyDebug.get();
default:
return ToolChain::getTool(AC);
}
}
Tool *MachO::buildLinker() const { return new tools::darwin::Linker(*this); }
Tool *MachO::buildAssembler() const {
return new tools::darwin::Assembler(*this);
}
DarwinClang::DarwinClang(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: Darwin(D, Triple, Args) {}
void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
// For modern targets, promote certain warnings to errors.
if (isTargetWatchOSBased() || getTriple().isArch64Bit()) {
// Always enable -Wdeprecated-objc-isa-usage and promote it
// to an error.
CC1Args.push_back("-Wdeprecated-objc-isa-usage");
CC1Args.push_back("-Werror=deprecated-objc-isa-usage");
// For iOS and watchOS, also error about implicit function declarations,
// as that can impact calling conventions.
if (!isTargetMacOS())
CC1Args.push_back("-Werror=implicit-function-declaration");
}
}
void DarwinClang::AddLinkARCArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
// Avoid linking compatibility stubs on i386 mac.
if (isTargetMacOS() && getArch() == llvm::Triple::x86)
return;
ObjCRuntime runtime = getDefaultObjCRuntime(/*nonfragile*/ true);
if ((runtime.hasNativeARC() || !isObjCAutoRefCount(Args)) &&
runtime.hasSubscripting())
return;
CmdArgs.push_back("-force_load");
SmallString<128> P(getDriver().ClangExecutable);
llvm::sys::path::remove_filename(P); // 'clang'
llvm::sys::path::remove_filename(P); // 'bin'
llvm::sys::path::append(P, "lib", "arc", "libarclite_");
// Mash in the platform.
if (isTargetWatchOSSimulator())
P += "watchsimulator";
else if (isTargetWatchOS())
P += "watchos";
else if (isTargetTvOSSimulator())
P += "appletvsimulator";
else if (isTargetTvOS())
P += "appletvos";
else if (isTargetIOSSimulator())
P += "iphonesimulator";
else if (isTargetIPhoneOS())
P += "iphoneos";
else
P += "macosx";
P += ".a";
CmdArgs.push_back(Args.MakeArgString(P));
}
unsigned DarwinClang::GetDefaultDwarfVersion() const {
// Default to use DWARF 2 on OS X 10.10 / iOS 8 and lower.
if ((isTargetMacOS() && isMacosxVersionLT(10, 11)) ||
(isTargetIOSBased() && isIPhoneOSVersionLT(9)))
return 2;
return 4;
}
void MachO::AddLinkRuntimeLib(const ArgList &Args, ArgStringList &CmdArgs,
StringRef DarwinLibName, bool AlwaysLink,
bool IsEmbedded, bool AddRPath) const {
SmallString<128> Dir(getDriver().ResourceDir);
llvm::sys::path::append(Dir, "lib", IsEmbedded ? "macho_embedded" : "darwin");
SmallString<128> P(Dir);
llvm::sys::path::append(P, DarwinLibName);
// For now, allow missing resource libraries to support developers who may
// not have compiler-rt checked out or integrated into their build (unless
// we explicitly force linking with this library).
if (AlwaysLink || getVFS().exists(P))
CmdArgs.push_back(Args.MakeArgString(P));
// Adding the rpaths might negatively interact when other rpaths are involved,
// so we should make sure we add the rpaths last, after all user-specified
// rpaths. This is currently true from this place, but we need to be
// careful if this function is ever called before user's rpaths are emitted.
if (AddRPath) {
assert(DarwinLibName.endswith(".dylib") && "must be a dynamic library");
// Add @executable_path to rpath to support having the dylib copied with
// the executable.
CmdArgs.push_back("-rpath");
CmdArgs.push_back("@executable_path");
// Add the path to the resource dir to rpath to support using the dylib
// from the default location without copying.
CmdArgs.push_back("-rpath");
CmdArgs.push_back(Args.MakeArgString(Dir));
}
}
void MachO::AddFuzzerLinkArgs(const ArgList &Args, ArgStringList &CmdArgs) const {
// Go up one directory from Clang to find the libfuzzer archive file.
StringRef ParentDir = llvm::sys::path::parent_path(getDriver().InstalledDir);
SmallString<128> P(ParentDir);
llvm::sys::path::append(P, "lib", "libLLVMFuzzer.a");
CmdArgs.push_back(Args.MakeArgString(P));
// Libfuzzer is written in C++ and requires libcxx.
AddCXXStdlibLibArgs(Args, CmdArgs);
}
StringRef Darwin::getPlatformFamily() const {
switch (TargetPlatform) {
case DarwinPlatformKind::MacOS:
return "MacOSX";
case DarwinPlatformKind::IPhoneOS:
case DarwinPlatformKind::IPhoneOSSimulator:
return "iPhone";
case DarwinPlatformKind::TvOS:
case DarwinPlatformKind::TvOSSimulator:
return "AppleTV";
case DarwinPlatformKind::WatchOS:
case DarwinPlatformKind::WatchOSSimulator:
return "Watch";
}
llvm_unreachable("Unsupported platform");
}
StringRef Darwin::getSDKName(StringRef isysroot) {
// Assume SDK has path: SOME_PATH/SDKs/PlatformXX.YY.sdk
llvm::sys::path::const_iterator SDKDir;
auto BeginSDK = llvm::sys::path::begin(isysroot);
auto EndSDK = llvm::sys::path::end(isysroot);
for (auto IT = BeginSDK; IT != EndSDK; ++IT) {
StringRef SDK = *IT;
if (SDK.endswith(".sdk"))
return SDK.slice(0, SDK.size() - 4);
}
return "";
}
StringRef Darwin::getOSLibraryNameSuffix() const {
switch(TargetPlatform) {
case DarwinPlatformKind::MacOS:
return "osx";
case DarwinPlatformKind::IPhoneOS:
return "ios";
case DarwinPlatformKind::IPhoneOSSimulator:
return "iossim";
case DarwinPlatformKind::TvOS:
return "tvos";
case DarwinPlatformKind::TvOSSimulator:
return "tvossim";
case DarwinPlatformKind::WatchOS:
return "watchos";
case DarwinPlatformKind::WatchOSSimulator:
return "watchossim";
}
llvm_unreachable("Unsupported platform");
}
void Darwin::addProfileRTLibs(const ArgList &Args,
ArgStringList &CmdArgs) const {
if (!needsProfileRT(Args)) return;
AddLinkRuntimeLib(Args, CmdArgs, (Twine("libclang_rt.profile_") +
getOSLibraryNameSuffix() + ".a").str(),
/*AlwaysLink*/ true);
}
void DarwinClang::AddLinkSanitizerLibArgs(const ArgList &Args,
ArgStringList &CmdArgs,
StringRef Sanitizer) const {
AddLinkRuntimeLib(
Args, CmdArgs,
(Twine("libclang_rt.") + Sanitizer + "_" +
getOSLibraryNameSuffix() + "_dynamic.dylib").str(),
/*AlwaysLink*/ true, /*IsEmbedded*/ false,
/*AddRPath*/ true);
}
ToolChain::RuntimeLibType DarwinClang::GetRuntimeLibType(
const ArgList &Args) const {
if (Arg* A = Args.getLastArg(options::OPT_rtlib_EQ)) {
StringRef Value = A->getValue();
if (Value != "compiler-rt")
getDriver().Diag(clang::diag::err_drv_unsupported_rtlib_for_platform)
<< Value << "darwin";
}
return ToolChain::RLT_CompilerRT;
}
void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
// Call once to ensure diagnostic is printed if wrong value was specified
GetRuntimeLibType(Args);
// Darwin doesn't support real static executables, don't link any runtime
// libraries with -static.
if (Args.hasArg(options::OPT_static) ||
Args.hasArg(options::OPT_fapple_kext) ||
Args.hasArg(options::OPT_mkernel))
return;
// Reject -static-libgcc for now, we can deal with this when and if someone
// cares. This is useful in situations where someone wants to statically link
// something like libstdc++, and needs its runtime support routines.
if (const Arg *A = Args.getLastArg(options::OPT_static_libgcc)) {
getDriver().Diag(diag::err_drv_unsupported_opt) << A->getAsString(Args);
return;
}
const SanitizerArgs &Sanitize = getSanitizerArgs();
if (Sanitize.needsAsanRt())
AddLinkSanitizerLibArgs(Args, CmdArgs, "asan");
if (Sanitize.needsLsanRt())
AddLinkSanitizerLibArgs(Args, CmdArgs, "lsan");
if (Sanitize.needsUbsanRt())
AddLinkSanitizerLibArgs(Args, CmdArgs, "ubsan");
if (Sanitize.needsTsanRt())
AddLinkSanitizerLibArgs(Args, CmdArgs, "tsan");
if (Sanitize.needsFuzzer() && !Args.hasArg(options::OPT_dynamiclib))
AddFuzzerLinkArgs(Args, CmdArgs);
if (Sanitize.needsStatsRt()) {
StringRef OS = isTargetMacOS() ? "osx" : "iossim";
AddLinkRuntimeLib(Args, CmdArgs,
(Twine("libclang_rt.stats_client_") + OS + ".a").str(),
/*AlwaysLink=*/true);
AddLinkSanitizerLibArgs(Args, CmdArgs, "stats");
}
if (Sanitize.needsEsanRt())
AddLinkSanitizerLibArgs(Args, CmdArgs, "esan");
// Otherwise link libSystem, then the dynamic runtime library, and finally any
// target specific static runtime library.
CmdArgs.push_back("-lSystem");
// Select the dynamic runtime library and the target specific static library.
if (isTargetWatchOSBased()) {
// We currently always need a static runtime library for watchOS.
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.watchos.a");
} else if (isTargetTvOSBased()) {
// We currently always need a static runtime library for tvOS.
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.tvos.a");
} else if (isTargetIOSBased()) {
// If we are compiling as iOS / simulator, don't attempt to link libgcc_s.1,
// it never went into the SDK.
// Linking against libgcc_s.1 isn't needed for iOS 5.0+
if (isIPhoneOSVersionLT(5, 0) && !isTargetIOSSimulator() &&
getTriple().getArch() != llvm::Triple::aarch64)
CmdArgs.push_back("-lgcc_s.1");
// We currently always need a static runtime library for iOS.
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.ios.a");
} else {
assert(isTargetMacOS() && "unexpected non MacOS platform");
// The dynamic runtime library was merged with libSystem for 10.6 and
// beyond; only 10.4 and 10.5 need an additional runtime library.
if (isMacosxVersionLT(10, 5))
CmdArgs.push_back("-lgcc_s.10.4");
else if (isMacosxVersionLT(10, 6))
CmdArgs.push_back("-lgcc_s.10.5");
// Originally for OS X, we thought we would only need a static runtime
// library when targeting 10.4, to provide versions of the static functions
// which were omitted from 10.4.dylib. This led to the creation of the 10.4
// builtins library.
//
// Unfortunately, that turned out to not be true, because Darwin system
// headers can still use eprintf on i386, and it is not exported from
// libSystem. Therefore, we still must provide a runtime library just for
// the tiny tiny handful of projects that *might* use that symbol.
//
// Then over time, we figured out it was useful to add more things to the
// runtime so we created libclang_rt.osx.a to provide new functions when
// deploying to old OS builds, and for a long time we had both eprintf and
// osx builtin libraries. Which just seems excessive. So with PR 28855, we
// are removing the eprintf library and expecting eprintf to be provided by
// the OS X builtins library.
if (isMacosxVersionLT(10, 5))
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.10.4.a");
else
AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.osx.a");
}
}
/// Returns the most appropriate macOS target version for the current process.
///
/// If the macOS SDK version is the same or earlier than the system version,
/// then the SDK version is returned. Otherwise the system version is returned.
static std::string getSystemOrSDKMacOSVersion(StringRef MacOSSDKVersion) {
unsigned Major, Minor, Micro;
llvm::Triple SystemTriple(llvm::sys::getProcessTriple());
if (!SystemTriple.isMacOSX())
return MacOSSDKVersion;
SystemTriple.getMacOSXVersion(Major, Minor, Micro);
VersionTuple SystemVersion(Major, Minor, Micro);
bool HadExtra;
if (!Driver::GetReleaseVersion(MacOSSDKVersion, Major, Minor, Micro,
HadExtra))
return MacOSSDKVersion;
VersionTuple SDKVersion(Major, Minor, Micro);
if (SDKVersion > SystemVersion)
return SystemVersion.getAsString();
return MacOSSDKVersion;
}
void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
const OptTable &Opts = getDriver().getOpts();
// Support allowing the SDKROOT environment variable used by xcrun and other
// Xcode tools to define the default sysroot, by making it the default for
// isysroot.
if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
// Warn if the path does not exist.
if (!getVFS().exists(A->getValue()))
getDriver().Diag(clang::diag::warn_missing_sysroot) << A->getValue();
} else {
if (char *env = ::getenv("SDKROOT")) {
// We only use this value as the default if it is an absolute path,
// exists, and it is not the root path.
if (llvm::sys::path::is_absolute(env) && getVFS().exists(env) &&
StringRef(env) != "/") {
Args.append(Args.MakeSeparateArg(
nullptr, Opts.getOption(options::OPT_isysroot), env));
}
}
}
Arg *OSXVersion = Args.getLastArg(options::OPT_mmacosx_version_min_EQ);
Arg *iOSVersion = Args.getLastArg(options::OPT_miphoneos_version_min_EQ,
options::OPT_mios_simulator_version_min_EQ);
Arg *TvOSVersion =
Args.getLastArg(options::OPT_mtvos_version_min_EQ,
options::OPT_mtvos_simulator_version_min_EQ);
Arg *WatchOSVersion =
Args.getLastArg(options::OPT_mwatchos_version_min_EQ,
options::OPT_mwatchos_simulator_version_min_EQ);
unsigned Major, Minor, Micro;
bool HadExtra;
// The iOS deployment target that is explicitly specified via a command line
// option or an environment variable.
std::string ExplicitIOSDeploymentTargetStr;
if (iOSVersion)
ExplicitIOSDeploymentTargetStr = iOSVersion->getAsString(Args);
// Add a macro to differentiate between m(iphone|tv|watch)os-version-min=X.Y and
// -m(iphone|tv|watch)simulator-version-min=X.Y.
if (Args.hasArg(options::OPT_mios_simulator_version_min_EQ) ||
Args.hasArg(options::OPT_mtvos_simulator_version_min_EQ) ||
Args.hasArg(options::OPT_mwatchos_simulator_version_min_EQ))
Args.append(Args.MakeSeparateArg(nullptr, Opts.getOption(options::OPT_D),
" __APPLE_EMBEDDED_SIMULATOR__=1"));
if (OSXVersion && (iOSVersion || TvOSVersion || WatchOSVersion)) {
getDriver().Diag(diag::err_drv_argument_not_allowed_with)
<< OSXVersion->getAsString(Args)
<< (iOSVersion ? iOSVersion :
TvOSVersion ? TvOSVersion : WatchOSVersion)->getAsString(Args);
iOSVersion = TvOSVersion = WatchOSVersion = nullptr;
} else if (iOSVersion && (TvOSVersion || WatchOSVersion)) {
getDriver().Diag(diag::err_drv_argument_not_allowed_with)
<< iOSVersion->getAsString(Args)
<< (TvOSVersion ? TvOSVersion : WatchOSVersion)->getAsString(Args);
TvOSVersion = WatchOSVersion = nullptr;
} else if (TvOSVersion && WatchOSVersion) {
getDriver().Diag(diag::err_drv_argument_not_allowed_with)
<< TvOSVersion->getAsString(Args)
<< WatchOSVersion->getAsString(Args);
WatchOSVersion = nullptr;
} else if (!OSXVersion && !iOSVersion && !TvOSVersion && !WatchOSVersion) {
// If no deployment target was specified on the command line, check for
// environment defines.
std::string OSXTarget;
std::string iOSTarget;
std::string TvOSTarget;
std::string WatchOSTarget;
if (char *env = ::getenv("MACOSX_DEPLOYMENT_TARGET"))
OSXTarget = env;
if (char *env = ::getenv("IPHONEOS_DEPLOYMENT_TARGET"))
iOSTarget = env;
if (char *env = ::getenv("TVOS_DEPLOYMENT_TARGET"))
TvOSTarget = env;
if (char *env = ::getenv("WATCHOS_DEPLOYMENT_TARGET"))
WatchOSTarget = env;
if (!iOSTarget.empty())
ExplicitIOSDeploymentTargetStr =
std::string("IPHONEOS_DEPLOYMENT_TARGET=") + iOSTarget;
// If there is no command-line argument to specify the Target version and
// no environment variable defined, see if we can set the default based
// on -isysroot.
if (OSXTarget.empty() && iOSTarget.empty() && WatchOSTarget.empty() &&
TvOSTarget.empty() && Args.hasArg(options::OPT_isysroot)) {
if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
StringRef isysroot = A->getValue();
StringRef SDK = getSDKName(isysroot);
if (SDK.size() > 0) {
// Slice the version number out.
// Version number is between the first and the last number.
size_t StartVer = SDK.find_first_of("0123456789");
size_t EndVer = SDK.find_last_of("0123456789");
if (StartVer != StringRef::npos && EndVer > StartVer) {
StringRef Version = SDK.slice(StartVer, EndVer + 1);
if (SDK.startswith("iPhoneOS") ||
SDK.startswith("iPhoneSimulator"))
iOSTarget = Version;
else if (SDK.startswith("MacOSX"))
OSXTarget = getSystemOrSDKMacOSVersion(Version);
else if (SDK.startswith("WatchOS") ||
SDK.startswith("WatchSimulator"))
WatchOSTarget = Version;
else if (SDK.startswith("AppleTVOS") ||
SDK.startswith("AppleTVSimulator"))
TvOSTarget = Version;
}
}
}
}
// If no OS targets have been specified, try to guess platform from -target
// or arch name and compute the version from the triple.
if (OSXTarget.empty() && iOSTarget.empty() && TvOSTarget.empty() &&
WatchOSTarget.empty()) {
llvm::Triple::OSType OSTy = llvm::Triple::UnknownOS;
// Set the OSTy based on -target if -arch isn't present.
if (Args.hasArg(options::OPT_target) && !Args.hasArg(options::OPT_arch)) {
OSTy = getTriple().getOS();
} else {
StringRef MachOArchName = getMachOArchName(Args);
if (MachOArchName == "armv7" || MachOArchName == "armv7s" ||
MachOArchName == "arm64")
OSTy = llvm::Triple::IOS;
else if (MachOArchName == "armv7k")
OSTy = llvm::Triple::WatchOS;
else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" &&
MachOArchName != "armv7em")
OSTy = llvm::Triple::MacOSX;
}
if (OSTy != llvm::Triple::UnknownOS) {
unsigned Major, Minor, Micro;
std::string *OSTarget;
switch (OSTy) {
case llvm::Triple::Darwin:
case llvm::Triple::MacOSX:
if (!getTriple().getMacOSXVersion(Major, Minor, Micro))
getDriver().Diag(diag::err_drv_invalid_darwin_version)
<< getTriple().getOSName();
OSTarget = &OSXTarget;
break;
case llvm::Triple::IOS:
getTriple().getiOSVersion(Major, Minor, Micro);
OSTarget = &iOSTarget;
break;
case llvm::Triple::TvOS:
getTriple().getOSVersion(Major, Minor, Micro);
OSTarget = &TvOSTarget;
break;
case llvm::Triple::WatchOS:
getTriple().getWatchOSVersion(Major, Minor, Micro);
OSTarget = &WatchOSTarget;
break;
default:
llvm_unreachable("Unexpected OS type");
break;
}
llvm::raw_string_ostream(*OSTarget) << Major << '.' << Minor << '.'
<< Micro;
}
}
// Do not allow conflicts with the watchOS target.
if (!WatchOSTarget.empty() && (!iOSTarget.empty() || !TvOSTarget.empty())) {
getDriver().Diag(diag::err_drv_conflicting_deployment_targets)
<< "WATCHOS_DEPLOYMENT_TARGET"
<< (!iOSTarget.empty() ? "IPHONEOS_DEPLOYMENT_TARGET" :
"TVOS_DEPLOYMENT_TARGET");
}
// Do not allow conflicts with the tvOS target.
if (!TvOSTarget.empty() && !iOSTarget.empty()) {
getDriver().Diag(diag::err_drv_conflicting_deployment_targets)
<< "TVOS_DEPLOYMENT_TARGET"
<< "IPHONEOS_DEPLOYMENT_TARGET";
}
// Allow conflicts among OSX and iOS for historical reasons, but choose the
// default platform.
if (!OSXTarget.empty() && (!iOSTarget.empty() ||
!WatchOSTarget.empty() ||
!TvOSTarget.empty())) {
if (getTriple().getArch() == llvm::Triple::arm ||
getTriple().getArch() == llvm::Triple::aarch64 ||
getTriple().getArch() == llvm::Triple::thumb)
OSXTarget = "";
else
iOSTarget = WatchOSTarget = TvOSTarget = "";
}
if (!OSXTarget.empty()) {
const Option O = Opts.getOption(options::OPT_mmacosx_version_min_EQ);
OSXVersion = Args.MakeJoinedArg(nullptr, O, OSXTarget);
Args.append(OSXVersion);
} else if (!iOSTarget.empty()) {
const Option O = Opts.getOption(options::OPT_miphoneos_version_min_EQ);
iOSVersion = Args.MakeJoinedArg(nullptr, O, iOSTarget);
Args.append(iOSVersion);
} else if (!TvOSTarget.empty()) {
const Option O = Opts.getOption(options::OPT_mtvos_version_min_EQ);
TvOSVersion = Args.MakeJoinedArg(nullptr, O, TvOSTarget);
Args.append(TvOSVersion);
} else if (!WatchOSTarget.empty()) {
const Option O = Opts.getOption(options::OPT_mwatchos_version_min_EQ);
WatchOSVersion = Args.MakeJoinedArg(nullptr, O, WatchOSTarget);
Args.append(WatchOSVersion);
}
}
DarwinPlatformKind Platform;
if (OSXVersion)
Platform = MacOS;
else if (iOSVersion)
Platform = IPhoneOS;
else if (TvOSVersion)
Platform = TvOS;
else if (WatchOSVersion)
Platform = WatchOS;
else
llvm_unreachable("Unable to infer Darwin variant");
// Set the tool chain target information.
if (Platform == MacOS) {
assert((!iOSVersion && !TvOSVersion && !WatchOSVersion) &&
"Unknown target platform!");
if (!Driver::GetReleaseVersion(OSXVersion->getValue(), Major, Minor, Micro,
HadExtra) ||
HadExtra || Major != 10 || Minor >= 100 || Micro >= 100)
getDriver().Diag(diag::err_drv_invalid_version_number)
<< OSXVersion->getAsString(Args);
} else if (Platform == IPhoneOS) {
assert(iOSVersion && "Unknown target platform!");
if (!Driver::GetReleaseVersion(iOSVersion->getValue(), Major, Minor, Micro,
HadExtra) ||
HadExtra || Major >= 100 || Minor >= 100 || Micro >= 100)
getDriver().Diag(diag::err_drv_invalid_version_number)
<< iOSVersion->getAsString(Args);
// For 32-bit targets, the deployment target for iOS has to be earlier than
// iOS 11.
if (getTriple().isArch32Bit() && Major >= 11) {
// If the deployment target is explicitly specified, print a diagnostic.
if (!ExplicitIOSDeploymentTargetStr.empty()) {
getDriver().Diag(diag::warn_invalid_ios_deployment_target)
<< ExplicitIOSDeploymentTargetStr;
// Otherwise, set it to 10.99.99.
} else {
Major = 10;
Minor = 99;
Micro = 99;
}
}
} else if (Platform == TvOS) {
if (!Driver::GetReleaseVersion(TvOSVersion->getValue(), Major, Minor,
Micro, HadExtra) || HadExtra ||
Major >= 100 || Minor >= 100 || Micro >= 100)
getDriver().Diag(diag::err_drv_invalid_version_number)
<< TvOSVersion->getAsString(Args);
} else if (Platform == WatchOS) {
if (!Driver::GetReleaseVersion(WatchOSVersion->getValue(), Major, Minor,
Micro, HadExtra) || HadExtra ||
Major >= 10 || Minor >= 100 || Micro >= 100)
getDriver().Diag(diag::err_drv_invalid_version_number)
<< WatchOSVersion->getAsString(Args);
} else
llvm_unreachable("unknown kind of Darwin platform");
// Recognize iOS targets with an x86 architecture as the iOS simulator.
if (iOSVersion && (getTriple().getArch() == llvm::Triple::x86 ||
getTriple().getArch() == llvm::Triple::x86_64))
Platform = IPhoneOSSimulator;
if (TvOSVersion && (getTriple().getArch() == llvm::Triple::x86 ||
getTriple().getArch() == llvm::Triple::x86_64))
Platform = TvOSSimulator;
if (WatchOSVersion && (getTriple().getArch() == llvm::Triple::x86 ||
getTriple().getArch() == llvm::Triple::x86_64))
Platform = WatchOSSimulator;
setTarget(Platform, Major, Minor, Micro);
if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
StringRef SDK = getSDKName(A->getValue());
if (SDK.size() > 0) {
size_t StartVer = SDK.find_first_of("0123456789");
StringRef SDKName = SDK.slice(0, StartVer);
if (!SDKName.startswith(getPlatformFamily()))
getDriver().Diag(diag::warn_incompatible_sysroot)
<< SDKName << getPlatformFamily();
}
}
}
void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
CXXStdlibType Type = GetCXXStdlibType(Args);
switch (Type) {
case ToolChain::CST_Libcxx:
CmdArgs.push_back("-lc++");
break;
case ToolChain::CST_Libstdcxx:
// Unfortunately, -lstdc++ doesn't always exist in the standard search path;
// it was previously found in the gcc lib dir. However, for all the Darwin
// platforms we care about it was -lstdc++.6, so we search for that
// explicitly if we can't see an obvious -lstdc++ candidate.
// Check in the sysroot first.
if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
SmallString<128> P(A->getValue());
llvm::sys::path::append(P, "usr", "lib", "libstdc++.dylib");
if (!getVFS().exists(P)) {
llvm::sys::path::remove_filename(P);
llvm::sys::path::append(P, "libstdc++.6.dylib");
if (getVFS().exists(P)) {
CmdArgs.push_back(Args.MakeArgString(P));
return;
}
}
}
// Otherwise, look in the root.
// FIXME: This should be removed someday when we don't have to care about
// 10.6 and earlier, where /usr/lib/libstdc++.dylib does not exist.
if (!getVFS().exists("/usr/lib/libstdc++.dylib") &&
getVFS().exists("/usr/lib/libstdc++.6.dylib")) {
CmdArgs.push_back("/usr/lib/libstdc++.6.dylib");
return;
}
// Otherwise, let the linker search.
CmdArgs.push_back("-lstdc++");
break;
}
}
void DarwinClang::AddCCKextLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
// For Darwin platforms, use the compiler-rt-based support library
// instead of the gcc-provided one (which is also incidentally
// only present in the gcc lib dir, which makes it hard to find).
SmallString<128> P(getDriver().ResourceDir);
llvm::sys::path::append(P, "lib", "darwin");
// Use the newer cc_kext for iOS ARM after 6.0.
if (isTargetWatchOS()) {
llvm::sys::path::append(P, "libclang_rt.cc_kext_watchos.a");
} else if (isTargetTvOS()) {
llvm::sys::path::append(P, "libclang_rt.cc_kext_tvos.a");
} else if (isTargetIPhoneOS()) {
llvm::sys::path::append(P, "libclang_rt.cc_kext_ios.a");
} else {
llvm::sys::path::append(P, "libclang_rt.cc_kext.a");
}
// For now, allow missing resource libraries to support developers who may
// not have compiler-rt checked out or integrated into their build.
if (getVFS().exists(P))
CmdArgs.push_back(Args.MakeArgString(P));
}
DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
StringRef BoundArch,
Action::OffloadKind) const {
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
const OptTable &Opts = getDriver().getOpts();
// FIXME: We really want to get out of the tool chain level argument
// translation business, as it makes the driver functionality much
// more opaque. For now, we follow gcc closely solely for the
// purpose of easily achieving feature parity & testability. Once we
// have something that works, we should reevaluate each translation
// and try to push it down into tool specific logic.
for (Arg *A : Args) {
if (A->getOption().matches(options::OPT_Xarch__)) {
// Skip this argument unless the architecture matches either the toolchain
// triple arch, or the arch being bound.
llvm::Triple::ArchType XarchArch =
tools::darwin::getArchTypeForMachOArchName(A->getValue(0));
if (!(XarchArch == getArch() ||
(!BoundArch.empty() &&
XarchArch ==
tools::darwin::getArchTypeForMachOArchName(BoundArch))))
continue;
Arg *OriginalArg = A;
unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
unsigned Prev = Index;
std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
// If the argument parsing failed or more than one argument was
// consumed, the -Xarch_ argument's parameter tried to consume
// extra arguments. Emit an error and ignore.
//
// We also want to disallow any options which would alter the
// driver behavior; that isn't going to work in our model. We
// use isDriverOption() as an approximation, although things
// like -O4 are going to slip through.
if (!XarchArg || Index > Prev + 1) {
getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
<< A->getAsString(Args);
continue;
} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
<< A->getAsString(Args);
continue;
}
XarchArg->setBaseArg(A);
A = XarchArg.release();
DAL->AddSynthesizedArg(A);
// Linker input arguments require custom handling. The problem is that we
// have already constructed the phase actions, so we can not treat them as
// "input arguments".
if (A->getOption().hasFlag(options::LinkerInput)) {
// Convert the argument into individual Zlinker_input_args.
for (const char *Value : A->getValues()) {
DAL->AddSeparateArg(
OriginalArg, Opts.getOption(options::OPT_Zlinker_input), Value);
}
continue;
}
}
// Sob. These is strictly gcc compatible for the time being. Apple
// gcc translates options twice, which means that self-expanding
// options add duplicates.
switch ((options::ID)A->getOption().getID()) {
default:
DAL->append(A);
break;
case options::OPT_mkernel:
case options::OPT_fapple_kext:
DAL->append(A);
DAL->AddFlagArg(A, Opts.getOption(options::OPT_static));
break;
case options::OPT_dependency_file:
DAL->AddSeparateArg(A, Opts.getOption(options::OPT_MF), A->getValue());
break;
case options::OPT_gfull:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_g_Flag));
DAL->AddFlagArg(
A, Opts.getOption(options::OPT_fno_eliminate_unused_debug_symbols));
break;
case options::OPT_gused:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_g_Flag));
DAL->AddFlagArg(
A, Opts.getOption(options::OPT_feliminate_unused_debug_symbols));
break;
case options::OPT_shared:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_dynamiclib));
break;
case options::OPT_fconstant_cfstrings:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_mconstant_cfstrings));
break;
case options::OPT_fno_constant_cfstrings:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_mno_constant_cfstrings));
break;
case options::OPT_Wnonportable_cfstrings:
DAL->AddFlagArg(A,
Opts.getOption(options::OPT_mwarn_nonportable_cfstrings));
break;
case options::OPT_Wno_nonportable_cfstrings:
DAL->AddFlagArg(
A, Opts.getOption(options::OPT_mno_warn_nonportable_cfstrings));
break;
case options::OPT_fpascal_strings:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_mpascal_strings));
break;
case options::OPT_fno_pascal_strings:
DAL->AddFlagArg(A, Opts.getOption(options::OPT_mno_pascal_strings));
break;
}
}
if (getTriple().getArch() == llvm::Triple::x86 ||
getTriple().getArch() == llvm::Triple::x86_64)
if (!Args.hasArgNoClaim(options::OPT_mtune_EQ))
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mtune_EQ),
"core2");
// Add the arch options based on the particular spelling of -arch, to match
// how the driver driver works.
if (!BoundArch.empty()) {
StringRef Name = BoundArch;
const Option MCpu = Opts.getOption(options::OPT_mcpu_EQ);
const Option MArch = Opts.getOption(clang::driver::options::OPT_march_EQ);
// This code must be kept in sync with LLVM's getArchTypeForDarwinArch,
// which defines the list of which architectures we accept.
if (Name == "ppc")
;
else if (Name == "ppc601")
DAL->AddJoinedArg(nullptr, MCpu, "601");
else if (Name == "ppc603")
DAL->AddJoinedArg(nullptr, MCpu, "603");
else if (Name == "ppc604")
DAL->AddJoinedArg(nullptr, MCpu, "604");
else if (Name == "ppc604e")
DAL->AddJoinedArg(nullptr, MCpu, "604e");
else if (Name == "ppc750")
DAL->AddJoinedArg(nullptr, MCpu, "750");
else if (Name == "ppc7400")
DAL->AddJoinedArg(nullptr, MCpu, "7400");
else if (Name == "ppc7450")
DAL->AddJoinedArg(nullptr, MCpu, "7450");
else if (Name == "ppc970")
DAL->AddJoinedArg(nullptr, MCpu, "970");
else if (Name == "ppc64" || Name == "ppc64le")
DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_m64));
else if (Name == "i386")
;
else if (Name == "i486")
DAL->AddJoinedArg(nullptr, MArch, "i486");
else if (Name == "i586")
DAL->AddJoinedArg(nullptr, MArch, "i586");
else if (Name == "i686")
DAL->AddJoinedArg(nullptr, MArch, "i686");
else if (Name == "pentium")
DAL->AddJoinedArg(nullptr, MArch, "pentium");
else if (Name == "pentium2")
DAL->AddJoinedArg(nullptr, MArch, "pentium2");
else if (Name == "pentpro")
DAL->AddJoinedArg(nullptr, MArch, "pentiumpro");
else if (Name == "pentIIm3")
DAL->AddJoinedArg(nullptr, MArch, "pentium2");
else if (Name == "x86_64")
DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_m64));
else if (Name == "x86_64h") {
DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_m64));
DAL->AddJoinedArg(nullptr, MArch, "x86_64h");
}
else if (Name == "arm")
DAL->AddJoinedArg(nullptr, MArch, "armv4t");
else if (Name == "armv4t")
DAL->AddJoinedArg(nullptr, MArch, "armv4t");
else if (Name == "armv5")
DAL->AddJoinedArg(nullptr, MArch, "armv5tej");
else if (Name == "xscale")
DAL->AddJoinedArg(nullptr, MArch, "xscale");
else if (Name == "armv6")
DAL->AddJoinedArg(nullptr, MArch, "armv6k");
else if (Name == "armv6m")
DAL->AddJoinedArg(nullptr, MArch, "armv6m");
else if (Name == "armv7")
DAL->AddJoinedArg(nullptr, MArch, "armv7a");
else if (Name == "armv7em")
DAL->AddJoinedArg(nullptr, MArch, "armv7em");
else if (Name == "armv7k")
DAL->AddJoinedArg(nullptr, MArch, "armv7k");
else if (Name == "armv7m")
DAL->AddJoinedArg(nullptr, MArch, "armv7m");
else if (Name == "armv7s")
DAL->AddJoinedArg(nullptr, MArch, "armv7s");
}
return DAL;
}
void MachO::AddLinkRuntimeLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
// Embedded targets are simple at the moment, not supporting sanitizers and
// with different libraries for each member of the product { static, PIC } x
// { hard-float, soft-float }
llvm::SmallString<32> CompilerRT = StringRef("libclang_rt.");
CompilerRT +=
(tools::arm::getARMFloatABI(*this, Args) == tools::arm::FloatABI::Hard)
? "hard"
: "soft";
CompilerRT += Args.hasArg(options::OPT_fPIC) ? "_pic.a" : "_static.a";
AddLinkRuntimeLib(Args, CmdArgs, CompilerRT, false, true);
}
bool Darwin::isAlignedAllocationUnavailable() const {
switch (TargetPlatform) {
case MacOS: // Earlier than 10.13.
return TargetVersion < VersionTuple(10U, 13U, 0U);
case IPhoneOS:
case IPhoneOSSimulator:
case TvOS:
case TvOSSimulator: // Earlier than 11.0.
return TargetVersion < VersionTuple(11U, 0U, 0U);
case WatchOS:
case WatchOSSimulator: // Earlier than 4.0.
return TargetVersion < VersionTuple(4U, 0U, 0U);
}
llvm_unreachable("Unsupported platform");
}
void Darwin::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadKind) const {
if (isAlignedAllocationUnavailable())
CC1Args.push_back("-faligned-alloc-unavailable");
}
DerivedArgList *
Darwin::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const {
// First get the generic Apple args, before moving onto Darwin-specific ones.
DerivedArgList *DAL =
MachO::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
const OptTable &Opts = getDriver().getOpts();
// If no architecture is bound, none of the translations here are relevant.
if (BoundArch.empty())
return DAL;
// Add an explicit version min argument for the deployment target. We do this
// after argument translation because -Xarch_ arguments may add a version min
// argument.
AddDeploymentTarget(*DAL);
// For iOS 6, undo the translation to add -static for -mkernel/-fapple-kext.
// FIXME: It would be far better to avoid inserting those -static arguments,
// but we can't check the deployment target in the translation code until
// it is set here.
if (isTargetWatchOSBased() ||
(isTargetIOSBased() && !isIPhoneOSVersionLT(6, 0))) {
for (ArgList::iterator it = DAL->begin(), ie = DAL->end(); it != ie; ) {
Arg *A = *it;
++it;
if (A->getOption().getID() != options::OPT_mkernel &&
A->getOption().getID() != options::OPT_fapple_kext)
continue;
assert(it != ie && "unexpected argument translation");
A = *it;
assert(A->getOption().getID() == options::OPT_static &&
"missing expected -static argument");
*it = nullptr;
++it;
}
}
if (!Args.getLastArg(options::OPT_stdlib_EQ) &&
GetCXXStdlibType(Args) == ToolChain::CST_Libcxx)
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_stdlib_EQ),
"libc++");
// Validate the C++ standard library choice.
CXXStdlibType Type = GetCXXStdlibType(*DAL);
if (Type == ToolChain::CST_Libcxx) {
// Check whether the target provides libc++.
StringRef where;
// Complain about targeting iOS < 5.0 in any way.
if (isTargetIOSBased() && isIPhoneOSVersionLT(5, 0))
where = "iOS 5.0";
if (where != StringRef()) {
getDriver().Diag(clang::diag::err_drv_invalid_libcxx_deployment) << where;
}
}
auto Arch = tools::darwin::getArchTypeForMachOArchName(BoundArch);
if ((Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb)) {
if (Args.hasFlag(options::OPT_fomit_frame_pointer,
options::OPT_fno_omit_frame_pointer, false))
getDriver().Diag(clang::diag::warn_drv_unsupported_opt_for_target)
<< "-fomit-frame-pointer" << BoundArch;
}
return DAL;
}
bool MachO::IsUnwindTablesDefault(const ArgList &Args) const {
- return !UseSjLjExceptions(Args);
+ // Unwind tables are not emitted if -fno-exceptions is supplied (except when
+ // targeting x86_64).
+ return getArch() == llvm::Triple::x86_64 ||
+ (!UseSjLjExceptions(Args) &&
+ Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions,
+ true));
}
bool MachO::UseDwarfDebugFlags() const {
if (const char *S = ::getenv("RC_DEBUG_OPTIONS"))
return S[0] != '\0';
return false;
}
bool Darwin::UseSjLjExceptions(const ArgList &Args) const {
// Darwin uses SjLj exceptions on ARM.
if (getTriple().getArch() != llvm::Triple::arm &&
getTriple().getArch() != llvm::Triple::thumb)
return false;
// Only watchOS uses the new DWARF/Compact unwinding method.
llvm::Triple Triple(ComputeLLVMTriple(Args));
return !Triple.isWatchABI();
}
bool Darwin::SupportsEmbeddedBitcode() const {
assert(TargetInitialized && "Target not initialized!");
if (isTargetIPhoneOS() && isIPhoneOSVersionLT(6, 0))
return false;
return true;
}
bool MachO::isPICDefault() const { return true; }
bool MachO::isPIEDefault() const { return false; }
bool MachO::isPICDefaultForced() const {
return (getArch() == llvm::Triple::x86_64 ||
getArch() == llvm::Triple::aarch64);
}
bool MachO::SupportsProfiling() const {
// Profiling instrumentation is only supported on x86.
return getArch() == llvm::Triple::x86 || getArch() == llvm::Triple::x86_64;
}
void Darwin::addMinVersionArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
VersionTuple TargetVersion = getTargetVersion();
if (isTargetWatchOS())
CmdArgs.push_back("-watchos_version_min");
else if (isTargetWatchOSSimulator())
CmdArgs.push_back("-watchos_simulator_version_min");
else if (isTargetTvOS())
CmdArgs.push_back("-tvos_version_min");
else if (isTargetTvOSSimulator())
CmdArgs.push_back("-tvos_simulator_version_min");
else if (isTargetIOSSimulator())
CmdArgs.push_back("-ios_simulator_version_min");
else if (isTargetIOSBased())
CmdArgs.push_back("-iphoneos_version_min");
else {
assert(isTargetMacOS() && "unexpected target");
CmdArgs.push_back("-macosx_version_min");
}
CmdArgs.push_back(Args.MakeArgString(TargetVersion.getAsString()));
}
void Darwin::addStartObjectFileArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
// Derived from startfile spec.
if (Args.hasArg(options::OPT_dynamiclib)) {
// Derived from darwin_dylib1 spec.
if (isTargetWatchOSBased()) {
; // watchOS does not need dylib1.o.
} else if (isTargetIOSSimulator()) {
; // iOS simulator does not need dylib1.o.
} else if (isTargetIPhoneOS()) {
if (isIPhoneOSVersionLT(3, 1))
CmdArgs.push_back("-ldylib1.o");
} else {
if (isMacosxVersionLT(10, 5))
CmdArgs.push_back("-ldylib1.o");
else if (isMacosxVersionLT(10, 6))
CmdArgs.push_back("-ldylib1.10.5.o");
}
} else {
if (Args.hasArg(options::OPT_bundle)) {
if (!Args.hasArg(options::OPT_static)) {
// Derived from darwin_bundle1 spec.
if (isTargetWatchOSBased()) {
; // watchOS does not need bundle1.o.
} else if (isTargetIOSSimulator()) {
; // iOS simulator does not need bundle1.o.
} else if (isTargetIPhoneOS()) {
if (isIPhoneOSVersionLT(3, 1))
CmdArgs.push_back("-lbundle1.o");
} else {
if (isMacosxVersionLT(10, 6))
CmdArgs.push_back("-lbundle1.o");
}
}
} else {
if (Args.hasArg(options::OPT_pg) && SupportsProfiling()) {
if (Args.hasArg(options::OPT_static) ||
Args.hasArg(options::OPT_object) ||
Args.hasArg(options::OPT_preload)) {
CmdArgs.push_back("-lgcrt0.o");
} else {
CmdArgs.push_back("-lgcrt1.o");
// darwin_crt2 spec is empty.
}
// By default on OS X 10.8 and later, we don't link with a crt1.o
// file and the linker knows to use _main as the entry point. But,
// when compiling with -pg, we need to link with the gcrt1.o file,
// so pass the -no_new_main option to tell the linker to use the
// "start" symbol as the entry point.
if (isTargetMacOS() && !isMacosxVersionLT(10, 8))
CmdArgs.push_back("-no_new_main");
} else {
if (Args.hasArg(options::OPT_static) ||
Args.hasArg(options::OPT_object) ||
Args.hasArg(options::OPT_preload)) {
CmdArgs.push_back("-lcrt0.o");
} else {
// Derived from darwin_crt1 spec.
if (isTargetWatchOSBased()) {
; // watchOS does not need crt1.o.
} else if (isTargetIOSSimulator()) {
; // iOS simulator does not need crt1.o.
} else if (isTargetIPhoneOS()) {
if (getArch() == llvm::Triple::aarch64)
; // iOS does not need any crt1 files for arm64
else if (isIPhoneOSVersionLT(3, 1))
CmdArgs.push_back("-lcrt1.o");
else if (isIPhoneOSVersionLT(6, 0))
CmdArgs.push_back("-lcrt1.3.1.o");
} else {
if (isMacosxVersionLT(10, 5))
CmdArgs.push_back("-lcrt1.o");
else if (isMacosxVersionLT(10, 6))
CmdArgs.push_back("-lcrt1.10.5.o");
else if (isMacosxVersionLT(10, 8))
CmdArgs.push_back("-lcrt1.10.6.o");
// darwin_crt2 spec is empty.
}
}
}
}
}
if (!isTargetIPhoneOS() && Args.hasArg(options::OPT_shared_libgcc) &&
!isTargetWatchOS() &&
isMacosxVersionLT(10, 5)) {
const char *Str = Args.MakeArgString(GetFilePath("crt3.o"));
CmdArgs.push_back(Str);
}
}
bool Darwin::SupportsObjCGC() const { return isTargetMacOS(); }
void Darwin::CheckObjCARC() const {
if (isTargetIOSBased() || isTargetWatchOSBased() ||
(isTargetMacOS() && !isMacosxVersionLT(10, 6)))
return;
getDriver().Diag(diag::err_arc_unsupported_on_toolchain);
}
SanitizerMask Darwin::getSupportedSanitizers() const {
const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
SanitizerMask Res = ToolChain::getSupportedSanitizers();
Res |= SanitizerKind::Address;
Res |= SanitizerKind::Leak;
Res |= SanitizerKind::Fuzzer;
if (isTargetMacOS()) {
if (!isMacosxVersionLT(10, 9))
Res |= SanitizerKind::Vptr;
Res |= SanitizerKind::SafeStack;
if (IsX86_64)
Res |= SanitizerKind::Thread;
} else if (isTargetIOSSimulator() || isTargetTvOSSimulator()) {
if (IsX86_64)
Res |= SanitizerKind::Thread;
}
return Res;
}
void Darwin::printVerboseInfo(raw_ostream &OS) const {
CudaInstallation.print(OS);
}
Index: head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.cpp (revision 322855)
@@ -1,1426 +1,1463 @@
//===--- ToolChains.cpp - ToolChain Implementations -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "MSVC.h"
#include "CommonArgs.h"
#include "Darwin.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/Version.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include <cstdio>
// Include the necessary headers to interface with the Windows registry and
// environment.
#if defined(LLVM_ON_WIN32)
#define USE_WIN32
#endif
#ifdef USE_WIN32
#define WIN32_LEAN_AND_MEAN
#define NOGDI
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#endif
#ifdef _MSC_VER
// Don't support SetupApi on MinGW.
#define USE_MSVC_SETUP_API
// Make sure this comes before MSVCSetupApi.h
#include <comdef.h>
#include "MSVCSetupApi.h"
#include "llvm/Support/COM.h"
_COM_SMARTPTR_TYPEDEF(ISetupConfiguration, __uuidof(ISetupConfiguration));
_COM_SMARTPTR_TYPEDEF(ISetupConfiguration2, __uuidof(ISetupConfiguration2));
_COM_SMARTPTR_TYPEDEF(ISetupHelper, __uuidof(ISetupHelper));
_COM_SMARTPTR_TYPEDEF(IEnumSetupInstances, __uuidof(IEnumSetupInstances));
_COM_SMARTPTR_TYPEDEF(ISetupInstance, __uuidof(ISetupInstance));
_COM_SMARTPTR_TYPEDEF(ISetupInstance2, __uuidof(ISetupInstance2));
#endif
using namespace clang::driver;
using namespace clang::driver::toolchains;
using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
// Defined below.
// Forward declare this so there aren't too many things above the constructor.
static bool getSystemRegistryString(const char *keyPath, const char *valueName,
std::string &value, std::string *phValue);
// Check various environment variables to try and find a toolchain.
static bool findVCToolChainViaEnvironment(std::string &Path,
- bool &IsVS2017OrNewer) {
+ MSVCToolChain::ToolsetLayout &VSLayout) {
// These variables are typically set by vcvarsall.bat
// when launching a developer command prompt.
if (llvm::Optional<std::string> VCToolsInstallDir =
llvm::sys::Process::GetEnv("VCToolsInstallDir")) {
// This is only set by newer Visual Studios, and it leads straight to
// the toolchain directory.
Path = std::move(*VCToolsInstallDir);
- IsVS2017OrNewer = true;
+ VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer;
return true;
}
if (llvm::Optional<std::string> VCInstallDir =
llvm::sys::Process::GetEnv("VCINSTALLDIR")) {
// If the previous variable isn't set but this one is, then we've found
// an older Visual Studio. This variable is set by newer Visual Studios too,
// so this check has to appear second.
// In older Visual Studios, the VC directory is the toolchain.
Path = std::move(*VCInstallDir);
- IsVS2017OrNewer = false;
+ VSLayout = MSVCToolChain::ToolsetLayout::OlderVS;
return true;
}
// We couldn't find any VC environment variables. Let's walk through PATH and
// see if it leads us to a VC toolchain bin directory. If it does, pick the
// first one that we find.
if (llvm::Optional<std::string> PathEnv =
llvm::sys::Process::GetEnv("PATH")) {
llvm::SmallVector<llvm::StringRef, 8> PathEntries;
llvm::StringRef(*PathEnv).split(PathEntries, llvm::sys::EnvPathSeparator);
for (llvm::StringRef PathEntry : PathEntries) {
if (PathEntry.empty())
continue;
llvm::SmallString<256> ExeTestPath;
// If cl.exe doesn't exist, then this definitely isn't a VC toolchain.
ExeTestPath = PathEntry;
llvm::sys::path::append(ExeTestPath, "cl.exe");
if (!llvm::sys::fs::exists(ExeTestPath))
continue;
// cl.exe existing isn't a conclusive test for a VC toolchain; clang also
// has a cl.exe. So let's check for link.exe too.
ExeTestPath = PathEntry;
llvm::sys::path::append(ExeTestPath, "link.exe");
if (!llvm::sys::fs::exists(ExeTestPath))
continue;
// whatever/VC/bin --> old toolchain, VC dir is toolchain dir.
llvm::StringRef TestPath = PathEntry;
bool IsBin = llvm::sys::path::filename(TestPath).equals_lower("bin");
if (!IsBin) {
// Strip any architecture subdir like "amd64".
TestPath = llvm::sys::path::parent_path(TestPath);
IsBin = llvm::sys::path::filename(TestPath).equals_lower("bin");
}
if (IsBin) {
llvm::StringRef ParentPath = llvm::sys::path::parent_path(TestPath);
- if (llvm::sys::path::filename(ParentPath) == "VC") {
+ llvm::StringRef ParentFilename = llvm::sys::path::filename(ParentPath);
+ if (ParentFilename == "VC") {
Path = ParentPath;
- IsVS2017OrNewer = false;
+ VSLayout = MSVCToolChain::ToolsetLayout::OlderVS;
return true;
}
+ if (ParentFilename == "x86ret" || ParentFilename == "x86chk"
+ || ParentFilename == "amd64ret" || ParentFilename == "amd64chk") {
+ Path = ParentPath;
+ VSLayout = MSVCToolChain::ToolsetLayout::DevDivInternal;
+ return true;
+ }
} else {
// This could be a new (>=VS2017) toolchain. If it is, we should find
// path components with these prefixes when walking backwards through
// the path.
// Note: empty strings match anything.
llvm::StringRef ExpectedPrefixes[] = {"", "Host", "bin", "",
"MSVC", "Tools", "VC"};
auto It = llvm::sys::path::rbegin(PathEntry);
auto End = llvm::sys::path::rend(PathEntry);
for (llvm::StringRef Prefix : ExpectedPrefixes) {
if (It == End)
goto NotAToolChain;
if (!It->startswith(Prefix))
goto NotAToolChain;
++It;
}
// We've found a new toolchain!
// Back up 3 times (/bin/Host/arch) to get the root path.
llvm::StringRef ToolChainPath(PathEntry);
for (int i = 0; i < 3; ++i)
ToolChainPath = llvm::sys::path::parent_path(ToolChainPath);
Path = ToolChainPath;
- IsVS2017OrNewer = true;
+ VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer;
return true;
}
NotAToolChain:
continue;
}
}
return false;
}
// Query the Setup Config server for installs, then pick the newest version
// and find its default VC toolchain.
// This is the preferred way to discover new Visual Studios, as they're no
// longer listed in the registry.
static bool findVCToolChainViaSetupConfig(std::string &Path,
- bool &IsVS2017OrNewer) {
+ MSVCToolChain::ToolsetLayout &VSLayout) {
#if !defined(USE_MSVC_SETUP_API)
return false;
#else
// FIXME: This really should be done once in the top-level program's main
// function, as it may have already been initialized with a different
// threading model otherwise.
llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::SingleThreaded);
HRESULT HR;
// _com_ptr_t will throw a _com_error if a COM calls fail.
// The LLVM coding standards forbid exception handling, so we'll have to
// stop them from being thrown in the first place.
// The destructor will put the regular error handler back when we leave
// this scope.
struct SuppressCOMErrorsRAII {
static void __stdcall handler(HRESULT hr, IErrorInfo *perrinfo) {}
SuppressCOMErrorsRAII() { _set_com_error_handler(handler); }
~SuppressCOMErrorsRAII() { _set_com_error_handler(_com_raise_error); }
} COMErrorSuppressor;
ISetupConfigurationPtr Query;
HR = Query.CreateInstance(__uuidof(SetupConfiguration));
if (FAILED(HR))
return false;
IEnumSetupInstancesPtr EnumInstances;
HR = ISetupConfiguration2Ptr(Query)->EnumAllInstances(&EnumInstances);
if (FAILED(HR))
return false;
ISetupInstancePtr Instance;
HR = EnumInstances->Next(1, &Instance, nullptr);
if (HR != S_OK)
return false;
ISetupInstancePtr NewestInstance;
Optional<uint64_t> NewestVersionNum;
do {
bstr_t VersionString;
uint64_t VersionNum;
HR = Instance->GetInstallationVersion(VersionString.GetAddress());
if (FAILED(HR))
continue;
HR = ISetupHelperPtr(Query)->ParseVersion(VersionString, &VersionNum);
if (FAILED(HR))
continue;
if (!NewestVersionNum || (VersionNum > NewestVersionNum)) {
NewestInstance = Instance;
NewestVersionNum = VersionNum;
}
} while ((HR = EnumInstances->Next(1, &Instance, nullptr)) == S_OK);
if (!NewestInstance)
return false;
bstr_t VCPathWide;
HR = NewestInstance->ResolvePath(L"VC", VCPathWide.GetAddress());
if (FAILED(HR))
return false;
std::string VCRootPath;
llvm::convertWideToUTF8(std::wstring(VCPathWide), VCRootPath);
llvm::SmallString<256> ToolsVersionFilePath(VCRootPath);
llvm::sys::path::append(ToolsVersionFilePath, "Auxiliary", "Build",
"Microsoft.VCToolsVersion.default.txt");
auto ToolsVersionFile = llvm::MemoryBuffer::getFile(ToolsVersionFilePath);
if (!ToolsVersionFile)
return false;
llvm::SmallString<256> ToolchainPath(VCRootPath);
llvm::sys::path::append(ToolchainPath, "Tools", "MSVC",
ToolsVersionFile->get()->getBuffer().rtrim());
if (!llvm::sys::fs::is_directory(ToolchainPath))
return false;
Path = ToolchainPath.str();
- IsVS2017OrNewer = true;
+ VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer;
return true;
#endif
}
// Look in the registry for Visual Studio installs, and use that to get
// a toolchain path. VS2017 and newer don't get added to the registry.
// So if we find something here, we know that it's an older version.
static bool findVCToolChainViaRegistry(std::string &Path,
- bool &IsVS2017OrNewer) {
+ MSVCToolChain::ToolsetLayout &VSLayout) {
std::string VSInstallPath;
if (getSystemRegistryString(R"(SOFTWARE\Microsoft\VisualStudio\$VERSION)",
"InstallDir", VSInstallPath, nullptr) ||
getSystemRegistryString(R"(SOFTWARE\Microsoft\VCExpress\$VERSION)",
"InstallDir", VSInstallPath, nullptr)) {
if (!VSInstallPath.empty()) {
llvm::SmallString<256> VCPath(llvm::StringRef(
VSInstallPath.c_str(), VSInstallPath.find(R"(\Common7\IDE)")));
llvm::sys::path::append(VCPath, "VC");
Path = VCPath.str();
- IsVS2017OrNewer = false;
+ VSLayout = MSVCToolChain::ToolsetLayout::OlderVS;
return true;
}
}
return false;
}
// Try to find Exe from a Visual Studio distribution. This first tries to find
// an installed copy of Visual Studio and, failing that, looks in the PATH,
// making sure that whatever executable that's found is not a same-named exe
// from clang itself to prevent clang from falling back to itself.
static std::string FindVisualStudioExecutable(const ToolChain &TC,
const char *Exe) {
const auto &MSVC = static_cast<const toolchains::MSVCToolChain &>(TC);
SmallString<128> FilePath(MSVC.getSubDirectoryPath(
toolchains::MSVCToolChain::SubDirectoryType::Bin));
llvm::sys::path::append(FilePath, Exe);
return llvm::sys::fs::can_execute(FilePath) ? FilePath.str() : Exe;
}
void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
auto &TC = static_cast<const toolchains::MSVCToolChain &>(getToolChain());
assert((Output.isFilename() || Output.isNothing()) && "invalid output");
if (Output.isFilename())
CmdArgs.push_back(
Args.MakeArgString(std::string("-out:") + Output.getFilename()));
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) &&
!C.getDriver().IsCLMode())
CmdArgs.push_back("-defaultlib:libcmt");
if (!llvm::sys::Process::GetEnv("LIB")) {
// If the VC environment hasn't been configured (perhaps because the user
// did not run vcvarsall), try to build a consistent link environment. If
// the environment variable is set however, assume the user knows what
// they're doing.
CmdArgs.push_back(Args.MakeArgString(
Twine("-libpath:") +
TC.getSubDirectoryPath(
toolchains::MSVCToolChain::SubDirectoryType::Lib)));
if (TC.useUniversalCRT()) {
std::string UniversalCRTLibPath;
if (TC.getUniversalCRTLibraryPath(UniversalCRTLibPath))
CmdArgs.push_back(
Args.MakeArgString(Twine("-libpath:") + UniversalCRTLibPath));
}
std::string WindowsSdkLibPath;
if (TC.getWindowsSDKLibraryPath(WindowsSdkLibPath))
CmdArgs.push_back(
Args.MakeArgString(std::string("-libpath:") + WindowsSdkLibPath));
}
if (!C.getDriver().IsCLMode() && Args.hasArg(options::OPT_L))
for (const auto &LibPath : Args.getAllArgValues(options::OPT_L))
CmdArgs.push_back(Args.MakeArgString("-libpath:" + LibPath));
CmdArgs.push_back("-nologo");
if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7,
options::OPT__SLASH_Zd))
CmdArgs.push_back("-debug");
bool DLL = Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd,
options::OPT_shared);
if (DLL) {
CmdArgs.push_back(Args.MakeArgString("-dll"));
SmallString<128> ImplibName(Output.getFilename());
llvm::sys::path::replace_extension(ImplibName, "lib");
CmdArgs.push_back(Args.MakeArgString(std::string("-implib:") + ImplibName));
}
if (TC.getSanitizerArgs().needsAsanRt()) {
CmdArgs.push_back(Args.MakeArgString("-debug"));
CmdArgs.push_back(Args.MakeArgString("-incremental:no"));
if (TC.getSanitizerArgs().needsSharedAsanRt() ||
Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd)) {
for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"})
CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
// Make sure the dynamic runtime thunk is not optimized out at link time
// to ensure proper SEH handling.
CmdArgs.push_back(Args.MakeArgString(
TC.getArch() == llvm::Triple::x86
? "-include:___asan_seh_interceptor"
: "-include:__asan_seh_interceptor"));
// Make sure the linker consider all object files from the dynamic runtime
// thunk.
CmdArgs.push_back(Args.MakeArgString(std::string("-wholearchive:") +
TC.getCompilerRT(Args, "asan_dynamic_runtime_thunk")));
} else if (DLL) {
CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk"));
} else {
for (const auto &Lib : {"asan", "asan_cxx"}) {
CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
// Make sure the linker consider all object files from the static lib.
// This is necessary because instrumented dlls need access to all the
// interface exported by the static lib in the main executable.
CmdArgs.push_back(Args.MakeArgString(std::string("-wholearchive:") +
TC.getCompilerRT(Args, Lib)));
}
}
}
Args.AddAllArgValues(CmdArgs, options::OPT__SLASH_link);
if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
options::OPT_fno_openmp, false)) {
CmdArgs.push_back("-nodefaultlib:vcomp.lib");
CmdArgs.push_back("-nodefaultlib:vcompd.lib");
CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") +
TC.getDriver().Dir + "/../lib"));
switch (TC.getDriver().getOpenMPRuntime(Args)) {
case Driver::OMPRT_OMP:
CmdArgs.push_back("-defaultlib:libomp.lib");
break;
case Driver::OMPRT_IOMP5:
CmdArgs.push_back("-defaultlib:libiomp5md.lib");
break;
case Driver::OMPRT_GOMP:
break;
case Driver::OMPRT_Unknown:
// Already diagnosed.
break;
}
}
// Add compiler-rt lib in case if it was explicitly
// specified as an argument for --rtlib option.
if (!Args.hasArg(options::OPT_nostdlib)) {
AddRunTimeLibs(TC, TC.getDriver(), CmdArgs, Args);
}
// Add filenames, libraries, and other linker inputs.
for (const auto &Input : Inputs) {
if (Input.isFilename()) {
CmdArgs.push_back(Input.getFilename());
continue;
}
const Arg &A = Input.getInputArg();
// Render -l options differently for the MSVC linker.
if (A.getOption().matches(options::OPT_l)) {
StringRef Lib = A.getValue();
const char *LinkLibArg;
if (Lib.endswith(".lib"))
LinkLibArg = Args.MakeArgString(Lib);
else
LinkLibArg = Args.MakeArgString(Lib + ".lib");
CmdArgs.push_back(LinkLibArg);
continue;
}
// Otherwise, this is some other kind of linker input option like -Wl, -z,
// or -L. Render it, even if MSVC doesn't understand it.
A.renderAsInput(Args, CmdArgs);
}
TC.addProfileRTLibs(Args, CmdArgs);
std::vector<const char *> Environment;
// We need to special case some linker paths. In the case of lld, we need to
// translate 'lld' into 'lld-link', and in the case of the regular msvc
// linker, we need to use a special search algorithm.
llvm::SmallString<128> linkPath;
StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link");
if (Linker.equals_lower("lld"))
Linker = "lld-link";
if (Linker.equals_lower("link")) {
// If we're using the MSVC linker, it's not sufficient to just use link
// from the program PATH, because other environments like GnuWin32 install
// their own link.exe which may come first.
linkPath = FindVisualStudioExecutable(TC, "link.exe");
#ifdef USE_WIN32
// When cross-compiling with VS2017 or newer, link.exe expects to have
// its containing bin directory at the top of PATH, followed by the
// native target bin directory.
// e.g. when compiling for x86 on an x64 host, PATH should start with:
// /bin/HostX64/x86;/bin/HostX64/x64
+ // This doesn't attempt to handle ToolsetLayout::DevDivInternal.
if (TC.getIsVS2017OrNewer() &&
llvm::Triple(llvm::sys::getProcessTriple()).getArch() != TC.getArch()) {
auto HostArch = llvm::Triple(llvm::sys::getProcessTriple()).getArch();
auto EnvBlockWide =
std::unique_ptr<wchar_t[], decltype(&FreeEnvironmentStringsW)>(
GetEnvironmentStringsW(), FreeEnvironmentStringsW);
if (!EnvBlockWide)
goto SkipSettingEnvironment;
size_t EnvCount = 0;
size_t EnvBlockLen = 0;
while (EnvBlockWide[EnvBlockLen] != L'\0') {
++EnvCount;
EnvBlockLen += std::wcslen(&EnvBlockWide[EnvBlockLen]) +
1 /*string null-terminator*/;
}
++EnvBlockLen; // add the block null-terminator
std::string EnvBlock;
if (!llvm::convertUTF16ToUTF8String(
llvm::ArrayRef<char>(reinterpret_cast<char *>(EnvBlockWide.get()),
EnvBlockLen * sizeof(EnvBlockWide[0])),
EnvBlock))
goto SkipSettingEnvironment;
Environment.reserve(EnvCount);
// Now loop over each string in the block and copy them into the
// environment vector, adjusting the PATH variable as needed when we
// find it.
for (const char *Cursor = EnvBlock.data(); *Cursor != '\0';) {
llvm::StringRef EnvVar(Cursor);
if (EnvVar.startswith_lower("path=")) {
using SubDirectoryType = toolchains::MSVCToolChain::SubDirectoryType;
constexpr size_t PrefixLen = 5; // strlen("path=")
Environment.push_back(Args.MakeArgString(
EnvVar.substr(0, PrefixLen) +
TC.getSubDirectoryPath(SubDirectoryType::Bin) +
llvm::Twine(llvm::sys::EnvPathSeparator) +
TC.getSubDirectoryPath(SubDirectoryType::Bin, HostArch) +
(EnvVar.size() > PrefixLen
? llvm::Twine(llvm::sys::EnvPathSeparator) +
EnvVar.substr(PrefixLen)
: "")));
} else {
Environment.push_back(Args.MakeArgString(EnvVar));
}
Cursor += EnvVar.size() + 1 /*null-terminator*/;
}
}
SkipSettingEnvironment:;
#endif
} else {
linkPath = TC.GetProgramPath(Linker.str().c_str());
}
auto LinkCmd = llvm::make_unique<Command>(
JA, *this, Args.MakeArgString(linkPath), CmdArgs, Inputs);
if (!Environment.empty())
LinkCmd->setEnvironment(Environment);
C.addCommand(std::move(LinkCmd));
}
void visualstudio::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
C.addCommand(GetCommand(C, JA, Output, Inputs, Args, LinkingOutput));
}
std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
Compilation &C, const JobAction &JA, const InputInfo &Output,
const InputInfoList &Inputs, const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
CmdArgs.push_back("/nologo");
CmdArgs.push_back("/c"); // Compile only.
CmdArgs.push_back("/W0"); // No warnings.
// The goal is to be able to invoke this tool correctly based on
// any flag accepted by clang-cl.
// These are spelled the same way in clang and cl.exe,.
Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I});
// Optimization level.
if (Arg *A = Args.getLastArg(options::OPT_fbuiltin, options::OPT_fno_builtin))
CmdArgs.push_back(A->getOption().getID() == options::OPT_fbuiltin ? "/Oi"
: "/Oi-");
if (Arg *A = Args.getLastArg(options::OPT_O, options::OPT_O0)) {
if (A->getOption().getID() == options::OPT_O0) {
CmdArgs.push_back("/Od");
} else {
CmdArgs.push_back("/Og");
StringRef OptLevel = A->getValue();
if (OptLevel == "s" || OptLevel == "z")
CmdArgs.push_back("/Os");
else
CmdArgs.push_back("/Ot");
CmdArgs.push_back("/Ob2");
}
}
if (Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer,
options::OPT_fno_omit_frame_pointer))
CmdArgs.push_back(A->getOption().getID() == options::OPT_fomit_frame_pointer
? "/Oy"
: "/Oy-");
if (!Args.hasArg(options::OPT_fwritable_strings))
CmdArgs.push_back("/GF");
// Flags for which clang-cl has an alias.
// FIXME: How can we ensure this stays in sync with relevant clang-cl options?
if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR,
/*default=*/false))
CmdArgs.push_back("/GR-");
if (Args.hasFlag(options::OPT__SLASH_GS_, options::OPT__SLASH_GS,
/*default=*/false))
CmdArgs.push_back("/GS-");
if (Arg *A = Args.getLastArg(options::OPT_ffunction_sections,
options::OPT_fno_function_sections))
CmdArgs.push_back(A->getOption().getID() == options::OPT_ffunction_sections
? "/Gy"
: "/Gy-");
if (Arg *A = Args.getLastArg(options::OPT_fdata_sections,
options::OPT_fno_data_sections))
CmdArgs.push_back(
A->getOption().getID() == options::OPT_fdata_sections ? "/Gw" : "/Gw-");
if (Args.hasArg(options::OPT_fsyntax_only))
CmdArgs.push_back("/Zs");
if (Args.hasArg(options::OPT_g_Flag, options::OPT_gline_tables_only,
options::OPT__SLASH_Z7))
CmdArgs.push_back("/Z7");
std::vector<std::string> Includes =
Args.getAllArgValues(options::OPT_include);
for (const auto &Include : Includes)
CmdArgs.push_back(Args.MakeArgString(std::string("/FI") + Include));
// Flags that can simply be passed through.
Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LD);
Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LDd);
Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX);
Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX_);
Args.AddAllArgs(CmdArgs, options::OPT__SLASH_EH);
Args.AddAllArgs(CmdArgs, options::OPT__SLASH_Zl);
// The order of these flags is relevant, so pick the last one.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd,
options::OPT__SLASH_MT, options::OPT__SLASH_MTd))
A->render(Args, CmdArgs);
// Use MSVC's default threadsafe statics behaviour unless there was a flag.
if (Arg *A = Args.getLastArg(options::OPT_fthreadsafe_statics,
options::OPT_fno_threadsafe_statics)) {
CmdArgs.push_back(A->getOption().getID() == options::OPT_fthreadsafe_statics
? "/Zc:threadSafeInit"
: "/Zc:threadSafeInit-");
}
// Pass through all unknown arguments so that the fallback command can see
// them too.
Args.AddAllArgs(CmdArgs, options::OPT_UNKNOWN);
// Input filename.
assert(Inputs.size() == 1);
const InputInfo &II = Inputs[0];
assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX);
CmdArgs.push_back(II.getType() == types::TY_C ? "/Tc" : "/Tp");
if (II.isFilename())
CmdArgs.push_back(II.getFilename());
else
II.getInputArg().renderAsInput(Args, CmdArgs);
// Output filename.
assert(Output.getType() == types::TY_Object);
const char *Fo =
Args.MakeArgString(std::string("/Fo") + Output.getFilename());
CmdArgs.push_back(Fo);
std::string Exec = FindVisualStudioExecutable(getToolChain(), "cl.exe");
return llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
CmdArgs, Inputs);
}
MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args) {
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
// Check the environment first, since that's probably the user telling us
// what they want to use.
// Failing that, just try to find the newest Visual Studio version we can
// and use its default VC toolchain.
- findVCToolChainViaEnvironment(VCToolChainPath, IsVS2017OrNewer) ||
- findVCToolChainViaSetupConfig(VCToolChainPath, IsVS2017OrNewer) ||
- findVCToolChainViaRegistry(VCToolChainPath, IsVS2017OrNewer);
+ findVCToolChainViaEnvironment(VCToolChainPath, VSLayout) ||
+ findVCToolChainViaSetupConfig(VCToolChainPath, VSLayout) ||
+ findVCToolChainViaRegistry(VCToolChainPath, VSLayout);
}
Tool *MSVCToolChain::buildLinker() const {
if (VCToolChainPath.empty())
getDriver().Diag(clang::diag::warn_drv_msvc_not_found);
return new tools::visualstudio::Linker(*this);
}
Tool *MSVCToolChain::buildAssembler() const {
if (getTriple().isOSBinFormatMachO())
return new tools::darwin::Assembler(*this);
getDriver().Diag(clang::diag::err_no_external_assembler);
return nullptr;
}
bool MSVCToolChain::IsIntegratedAssemblerDefault() const {
return true;
}
bool MSVCToolChain::IsUnwindTablesDefault(const ArgList &Args) const {
// Emit unwind tables by default on Win64. All non-x86_32 Windows platforms
// such as ARM and PPC actually require unwind tables, but LLVM doesn't know
// how to generate them yet.
// Don't emit unwind tables by default for MachO targets.
if (getTriple().isOSBinFormatMachO())
return false;
return getArch() == llvm::Triple::x86_64;
}
bool MSVCToolChain::isPICDefault() const {
return getArch() == llvm::Triple::x86_64;
}
bool MSVCToolChain::isPIEDefault() const {
return false;
}
bool MSVCToolChain::isPICDefaultForced() const {
return getArch() == llvm::Triple::x86_64;
}
void MSVCToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
}
void MSVCToolChain::printVerboseInfo(raw_ostream &OS) const {
CudaInstallation.print(OS);
}
// Windows SDKs and VC Toolchains group their contents into subdirectories based
// on the target architecture. This function converts an llvm::Triple::ArchType
// to the corresponding subdirectory name.
static const char *llvmArchToWindowsSDKArch(llvm::Triple::ArchType Arch) {
using ArchType = llvm::Triple::ArchType;
switch (Arch) {
case ArchType::x86:
return "x86";
case ArchType::x86_64:
return "x64";
case ArchType::arm:
return "arm";
default:
return "";
}
}
// Similar to the above function, but for Visual Studios before VS2017.
static const char *llvmArchToLegacyVCArch(llvm::Triple::ArchType Arch) {
using ArchType = llvm::Triple::ArchType;
switch (Arch) {
case ArchType::x86:
// x86 is default in legacy VC toolchains.
// e.g. x86 libs are directly in /lib as opposed to /lib/x86.
return "";
case ArchType::x86_64:
return "amd64";
case ArchType::arm:
return "arm";
default:
return "";
}
}
+// Similar to the above function, but for DevDiv internal builds.
+static const char *llvmArchToDevDivInternalArch(llvm::Triple::ArchType Arch) {
+ using ArchType = llvm::Triple::ArchType;
+ switch (Arch) {
+ case ArchType::x86:
+ return "i386";
+ case ArchType::x86_64:
+ return "amd64";
+ case ArchType::arm:
+ return "arm";
+ default:
+ return "";
+ }
+}
+
// Get the path to a specific subdirectory in the current toolchain for
// a given target architecture.
// VS2017 changed the VC toolchain layout, so this should be used instead
// of hardcoding paths.
std::string
MSVCToolChain::getSubDirectoryPath(SubDirectoryType Type,
llvm::Triple::ArchType TargetArch) const {
+ const char *SubdirName;
+ const char *IncludeName;
+ switch (VSLayout) {
+ case ToolsetLayout::OlderVS:
+ SubdirName = llvmArchToLegacyVCArch(TargetArch);
+ IncludeName = "include";
+ break;
+ case ToolsetLayout::VS2017OrNewer:
+ SubdirName = llvmArchToWindowsSDKArch(TargetArch);
+ IncludeName = "include";
+ break;
+ case ToolsetLayout::DevDivInternal:
+ SubdirName = llvmArchToDevDivInternalArch(TargetArch);
+ IncludeName = "inc";
+ break;
+ }
+
llvm::SmallString<256> Path(VCToolChainPath);
switch (Type) {
case SubDirectoryType::Bin:
- if (IsVS2017OrNewer) {
- bool HostIsX64 =
+ if (VSLayout == ToolsetLayout::VS2017OrNewer) {
+ const bool HostIsX64 =
llvm::Triple(llvm::sys::getProcessTriple()).isArch64Bit();
- llvm::sys::path::append(Path, "bin", (HostIsX64 ? "HostX64" : "HostX86"),
- llvmArchToWindowsSDKArch(TargetArch));
-
- } else {
- llvm::sys::path::append(Path, "bin", llvmArchToLegacyVCArch(TargetArch));
+ const char *const HostName = HostIsX64 ? "HostX64" : "HostX86";
+ llvm::sys::path::append(Path, "bin", HostName, SubdirName);
+ } else { // OlderVS or DevDivInternal
+ llvm::sys::path::append(Path, "bin", SubdirName);
}
break;
case SubDirectoryType::Include:
- llvm::sys::path::append(Path, "include");
+ llvm::sys::path::append(Path, IncludeName);
break;
case SubDirectoryType::Lib:
- llvm::sys::path::append(
- Path, "lib", IsVS2017OrNewer ? llvmArchToWindowsSDKArch(TargetArch)
- : llvmArchToLegacyVCArch(TargetArch));
+ llvm::sys::path::append(Path, "lib", SubdirName);
break;
}
return Path.str();
}
#ifdef USE_WIN32
static bool readFullStringValue(HKEY hkey, const char *valueName,
std::string &value) {
std::wstring WideValueName;
if (!llvm::ConvertUTF8toWide(valueName, WideValueName))
return false;
DWORD result = 0;
DWORD valueSize = 0;
DWORD type = 0;
// First just query for the required size.
result = RegQueryValueExW(hkey, WideValueName.c_str(), NULL, &type, NULL,
&valueSize);
if (result != ERROR_SUCCESS || type != REG_SZ || !valueSize)
return false;
std::vector<BYTE> buffer(valueSize);
result = RegQueryValueExW(hkey, WideValueName.c_str(), NULL, NULL, &buffer[0],
&valueSize);
if (result == ERROR_SUCCESS) {
std::wstring WideValue(reinterpret_cast<const wchar_t *>(buffer.data()),
valueSize / sizeof(wchar_t));
if (valueSize && WideValue.back() == L'\0') {
WideValue.pop_back();
}
// The destination buffer must be empty as an invariant of the conversion
// function; but this function is sometimes called in a loop that passes in
// the same buffer, however. Simply clear it out so we can overwrite it.
value.clear();
return llvm::convertWideToUTF8(WideValue, value);
}
return false;
}
#endif
/// \brief Read registry string.
/// This also supports a means to look for high-versioned keys by use
/// of a $VERSION placeholder in the key path.
/// $VERSION in the key path is a placeholder for the version number,
/// causing the highest value path to be searched for and used.
/// I.e. "SOFTWARE\\Microsoft\\VisualStudio\\$VERSION".
/// There can be additional characters in the component. Only the numeric
/// characters are compared. This function only searches HKLM.
static bool getSystemRegistryString(const char *keyPath, const char *valueName,
std::string &value, std::string *phValue) {
#ifndef USE_WIN32
return false;
#else
HKEY hRootKey = HKEY_LOCAL_MACHINE;
HKEY hKey = NULL;
long lResult;
bool returnValue = false;
const char *placeHolder = strstr(keyPath, "$VERSION");
std::string bestName;
// If we have a $VERSION placeholder, do the highest-version search.
if (placeHolder) {
const char *keyEnd = placeHolder - 1;
const char *nextKey = placeHolder;
// Find end of previous key.
while ((keyEnd > keyPath) && (*keyEnd != '\\'))
keyEnd--;
// Find end of key containing $VERSION.
while (*nextKey && (*nextKey != '\\'))
nextKey++;
size_t partialKeyLength = keyEnd - keyPath;
char partialKey[256];
if (partialKeyLength >= sizeof(partialKey))
partialKeyLength = sizeof(partialKey) - 1;
strncpy(partialKey, keyPath, partialKeyLength);
partialKey[partialKeyLength] = '\0';
HKEY hTopKey = NULL;
lResult = RegOpenKeyExA(hRootKey, partialKey, 0, KEY_READ | KEY_WOW64_32KEY,
&hTopKey);
if (lResult == ERROR_SUCCESS) {
char keyName[256];
double bestValue = 0.0;
DWORD index, size = sizeof(keyName) - 1;
for (index = 0; RegEnumKeyExA(hTopKey, index, keyName, &size, NULL, NULL,
NULL, NULL) == ERROR_SUCCESS;
index++) {
const char *sp = keyName;
while (*sp && !isDigit(*sp))
sp++;
if (!*sp)
continue;
const char *ep = sp + 1;
while (*ep && (isDigit(*ep) || (*ep == '.')))
ep++;
char numBuf[32];
strncpy(numBuf, sp, sizeof(numBuf) - 1);
numBuf[sizeof(numBuf) - 1] = '\0';
double dvalue = strtod(numBuf, NULL);
if (dvalue > bestValue) {
// Test that InstallDir is indeed there before keeping this index.
// Open the chosen key path remainder.
bestName = keyName;
// Append rest of key.
bestName.append(nextKey);
lResult = RegOpenKeyExA(hTopKey, bestName.c_str(), 0,
KEY_READ | KEY_WOW64_32KEY, &hKey);
if (lResult == ERROR_SUCCESS) {
if (readFullStringValue(hKey, valueName, value)) {
bestValue = dvalue;
if (phValue)
*phValue = bestName;
returnValue = true;
}
RegCloseKey(hKey);
}
}
size = sizeof(keyName) - 1;
}
RegCloseKey(hTopKey);
}
} else {
lResult =
RegOpenKeyExA(hRootKey, keyPath, 0, KEY_READ | KEY_WOW64_32KEY, &hKey);
if (lResult == ERROR_SUCCESS) {
if (readFullStringValue(hKey, valueName, value))
returnValue = true;
if (phValue)
phValue->clear();
RegCloseKey(hKey);
}
}
return returnValue;
#endif // USE_WIN32
}
// Find the most recent version of Universal CRT or Windows 10 SDK.
// vcvarsqueryregistry.bat from Visual Studio 2015 sorts entries in the include
// directory by name and uses the last one of the list.
// So we compare entry names lexicographically to find the greatest one.
static bool getWindows10SDKVersionFromPath(const std::string &SDKPath,
std::string &SDKVersion) {
SDKVersion.clear();
std::error_code EC;
llvm::SmallString<128> IncludePath(SDKPath);
llvm::sys::path::append(IncludePath, "Include");
for (llvm::sys::fs::directory_iterator DirIt(IncludePath, EC), DirEnd;
DirIt != DirEnd && !EC; DirIt.increment(EC)) {
if (!llvm::sys::fs::is_directory(DirIt->path()))
continue;
StringRef CandidateName = llvm::sys::path::filename(DirIt->path());
// If WDK is installed, there could be subfolders like "wdf" in the
// "Include" directory.
// Allow only directories which names start with "10.".
if (!CandidateName.startswith("10."))
continue;
if (CandidateName > SDKVersion)
SDKVersion = CandidateName;
}
return !SDKVersion.empty();
}
/// \brief Get Windows SDK installation directory.
static bool getWindowsSDKDir(std::string &Path, int &Major,
std::string &WindowsSDKIncludeVersion,
std::string &WindowsSDKLibVersion) {
std::string RegistrySDKVersion;
// Try the Windows registry.
if (!getSystemRegistryString(
"SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\$VERSION",
"InstallationFolder", Path, &RegistrySDKVersion))
return false;
if (Path.empty() || RegistrySDKVersion.empty())
return false;
WindowsSDKIncludeVersion.clear();
WindowsSDKLibVersion.clear();
Major = 0;
std::sscanf(RegistrySDKVersion.c_str(), "v%d.", &Major);
if (Major <= 7)
return true;
if (Major == 8) {
// Windows SDK 8.x installs libraries in a folder whose names depend on the
// version of the OS you're targeting. By default choose the newest, which
// usually corresponds to the version of the OS you've installed the SDK on.
const char *Tests[] = {"winv6.3", "win8", "win7"};
for (const char *Test : Tests) {
llvm::SmallString<128> TestPath(Path);
llvm::sys::path::append(TestPath, "Lib", Test);
if (llvm::sys::fs::exists(TestPath.c_str())) {
WindowsSDKLibVersion = Test;
break;
}
}
return !WindowsSDKLibVersion.empty();
}
if (Major == 10) {
if (!getWindows10SDKVersionFromPath(Path, WindowsSDKIncludeVersion))
return false;
WindowsSDKLibVersion = WindowsSDKIncludeVersion;
return true;
}
// Unsupported SDK version
return false;
}
// Gets the library path required to link against the Windows SDK.
bool MSVCToolChain::getWindowsSDKLibraryPath(std::string &path) const {
std::string sdkPath;
int sdkMajor = 0;
std::string windowsSDKIncludeVersion;
std::string windowsSDKLibVersion;
path.clear();
if (!getWindowsSDKDir(sdkPath, sdkMajor, windowsSDKIncludeVersion,
windowsSDKLibVersion))
return false;
llvm::SmallString<128> libPath(sdkPath);
llvm::sys::path::append(libPath, "Lib");
if (sdkMajor >= 8) {
llvm::sys::path::append(libPath, windowsSDKLibVersion, "um",
llvmArchToWindowsSDKArch(getArch()));
} else {
switch (getArch()) {
// In Windows SDK 7.x, x86 libraries are directly in the Lib folder.
case llvm::Triple::x86:
break;
case llvm::Triple::x86_64:
llvm::sys::path::append(libPath, "x64");
break;
case llvm::Triple::arm:
// It is not necessary to link against Windows SDK 7.x when targeting ARM.
return false;
default:
return false;
}
}
path = libPath.str();
return true;
}
// Check if the Include path of a specified version of Visual Studio contains
// specific header files. If not, they are probably shipped with Universal CRT.
bool MSVCToolChain::useUniversalCRT() const {
llvm::SmallString<128> TestPath(
getSubDirectoryPath(SubDirectoryType::Include));
llvm::sys::path::append(TestPath, "stdlib.h");
return !llvm::sys::fs::exists(TestPath);
}
static bool getUniversalCRTSdkDir(std::string &Path, std::string &UCRTVersion) {
// vcvarsqueryregistry.bat for Visual Studio 2015 queries the registry
// for the specific key "KitsRoot10". So do we.
if (!getSystemRegistryString(
"SOFTWARE\\Microsoft\\Windows Kits\\Installed Roots", "KitsRoot10",
Path, nullptr))
return false;
return getWindows10SDKVersionFromPath(Path, UCRTVersion);
}
bool MSVCToolChain::getUniversalCRTLibraryPath(std::string &Path) const {
std::string UniversalCRTSdkPath;
std::string UCRTVersion;
Path.clear();
if (!getUniversalCRTSdkDir(UniversalCRTSdkPath, UCRTVersion))
return false;
StringRef ArchName = llvmArchToWindowsSDKArch(getArch());
if (ArchName.empty())
return false;
llvm::SmallString<128> LibPath(UniversalCRTSdkPath);
llvm::sys::path::append(LibPath, "Lib", UCRTVersion, "ucrt", ArchName);
Path = LibPath.str();
return true;
}
static VersionTuple getMSVCVersionFromTriple(const llvm::Triple &Triple) {
unsigned Major, Minor, Micro;
Triple.getEnvironmentVersion(Major, Minor, Micro);
if (Major || Minor || Micro)
return VersionTuple(Major, Minor, Micro);
return VersionTuple();
}
static VersionTuple getMSVCVersionFromExe(const std::string &BinDir) {
VersionTuple Version;
#ifdef USE_WIN32
SmallString<128> ClExe(BinDir);
llvm::sys::path::append(ClExe, "cl.exe");
std::wstring ClExeWide;
if (!llvm::ConvertUTF8toWide(ClExe.c_str(), ClExeWide))
return Version;
const DWORD VersionSize = ::GetFileVersionInfoSizeW(ClExeWide.c_str(),
nullptr);
if (VersionSize == 0)
return Version;
SmallVector<uint8_t, 4 * 1024> VersionBlock(VersionSize);
if (!::GetFileVersionInfoW(ClExeWide.c_str(), 0, VersionSize,
VersionBlock.data()))
return Version;
VS_FIXEDFILEINFO *FileInfo = nullptr;
UINT FileInfoSize = 0;
if (!::VerQueryValueW(VersionBlock.data(), L"\\",
reinterpret_cast<LPVOID *>(&FileInfo), &FileInfoSize) ||
FileInfoSize < sizeof(*FileInfo))
return Version;
const unsigned Major = (FileInfo->dwFileVersionMS >> 16) & 0xFFFF;
const unsigned Minor = (FileInfo->dwFileVersionMS ) & 0xFFFF;
const unsigned Micro = (FileInfo->dwFileVersionLS >> 16) & 0xFFFF;
Version = VersionTuple(Major, Minor, Micro);
#endif
return Version;
}
void MSVCToolChain::AddSystemIncludeWithSubfolder(
const ArgList &DriverArgs, ArgStringList &CC1Args,
const std::string &folder, const Twine &subfolder1, const Twine &subfolder2,
const Twine &subfolder3) const {
llvm::SmallString<128> path(folder);
llvm::sys::path::append(path, subfolder1, subfolder2, subfolder3);
addSystemInclude(DriverArgs, CC1Args, path);
}
void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (DriverArgs.hasArg(options::OPT_nostdinc))
return;
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, getDriver().ResourceDir,
"include");
}
// Add %INCLUDE%-like directories from the -imsvc flag.
for (const auto &Path : DriverArgs.getAllArgValues(options::OPT__SLASH_imsvc))
addSystemInclude(DriverArgs, CC1Args, Path);
if (DriverArgs.hasArg(options::OPT_nostdlibinc))
return;
// Honor %INCLUDE%. It should know essential search paths with vcvarsall.bat.
if (llvm::Optional<std::string> cl_include_dir =
llvm::sys::Process::GetEnv("INCLUDE")) {
SmallVector<StringRef, 8> Dirs;
StringRef(*cl_include_dir)
.split(Dirs, ";", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
for (StringRef Dir : Dirs)
addSystemInclude(DriverArgs, CC1Args, Dir);
if (!Dirs.empty())
return;
}
// When built with access to the proper Windows APIs, try to actually find
// the correct include paths first.
if (!VCToolChainPath.empty()) {
addSystemInclude(DriverArgs, CC1Args,
getSubDirectoryPath(SubDirectoryType::Include));
if (useUniversalCRT()) {
std::string UniversalCRTSdkPath;
std::string UCRTVersion;
if (getUniversalCRTSdkDir(UniversalCRTSdkPath, UCRTVersion)) {
AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, UniversalCRTSdkPath,
"Include", UCRTVersion, "ucrt");
}
}
std::string WindowsSDKDir;
int major;
std::string windowsSDKIncludeVersion;
std::string windowsSDKLibVersion;
if (getWindowsSDKDir(WindowsSDKDir, major, windowsSDKIncludeVersion,
windowsSDKLibVersion)) {
if (major >= 8) {
// Note: windowsSDKIncludeVersion is empty for SDKs prior to v10.
// Anyway, llvm::sys::path::append is able to manage it.
AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
"include", windowsSDKIncludeVersion,
"shared");
AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
"include", windowsSDKIncludeVersion,
"um");
AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
"include", windowsSDKIncludeVersion,
"winrt");
} else {
AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
"include");
}
}
return;
}
#if defined(LLVM_ON_WIN32)
// As a fallback, select default install paths.
// FIXME: Don't guess drives and paths like this on Windows.
const StringRef Paths[] = {
"C:/Program Files/Microsoft Visual Studio 10.0/VC/include",
"C:/Program Files/Microsoft Visual Studio 9.0/VC/include",
"C:/Program Files/Microsoft Visual Studio 9.0/VC/PlatformSDK/Include",
"C:/Program Files/Microsoft Visual Studio 8/VC/include",
"C:/Program Files/Microsoft Visual Studio 8/VC/PlatformSDK/Include"
};
addSystemIncludes(DriverArgs, CC1Args, Paths);
#endif
}
void MSVCToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
// FIXME: There should probably be logic here to find libc++ on Windows.
}
VersionTuple MSVCToolChain::computeMSVCVersion(const Driver *D,
const ArgList &Args) const {
bool IsWindowsMSVC = getTriple().isWindowsMSVCEnvironment();
VersionTuple MSVT = ToolChain::computeMSVCVersion(D, Args);
if (MSVT.empty())
MSVT = getMSVCVersionFromTriple(getTriple());
if (MSVT.empty() && IsWindowsMSVC)
MSVT = getMSVCVersionFromExe(getSubDirectoryPath(SubDirectoryType::Bin));
if (MSVT.empty() &&
Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
IsWindowsMSVC)) {
// -fms-compatibility-version=18.00 is default.
// FIXME: Consider bumping this to 19 (MSVC2015) soon.
MSVT = VersionTuple(18);
}
return MSVT;
}
std::string
MSVCToolChain::ComputeEffectiveClangTriple(const ArgList &Args,
types::ID InputType) const {
// The MSVC version doesn't care about the architecture, even though it
// may look at the triple internally.
VersionTuple MSVT = computeMSVCVersion(/*D=*/nullptr, Args);
MSVT = VersionTuple(MSVT.getMajor(), MSVT.getMinor().getValueOr(0),
MSVT.getSubminor().getValueOr(0));
// For the rest of the triple, however, a computed architecture name may
// be needed.
llvm::Triple Triple(ToolChain::ComputeEffectiveClangTriple(Args, InputType));
if (Triple.getEnvironment() == llvm::Triple::MSVC) {
StringRef ObjFmt = Triple.getEnvironmentName().split('-').second;
if (ObjFmt.empty())
Triple.setEnvironmentName((Twine("msvc") + MSVT.getAsString()).str());
else
Triple.setEnvironmentName(
(Twine("msvc") + MSVT.getAsString() + Twine('-') + ObjFmt).str());
}
return Triple.getTriple();
}
SanitizerMask MSVCToolChain::getSupportedSanitizers() const {
SanitizerMask Res = ToolChain::getSupportedSanitizers();
Res |= SanitizerKind::Address;
return Res;
}
static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL,
bool SupportsForcingFramePointer,
const char *ExpandChar, const OptTable &Opts) {
assert(A->getOption().matches(options::OPT__SLASH_O));
StringRef OptStr = A->getValue();
for (size_t I = 0, E = OptStr.size(); I != E; ++I) {
const char &OptChar = *(OptStr.data() + I);
switch (OptChar) {
default:
break;
case '1':
case '2':
case 'x':
case 'd':
if (&OptChar == ExpandChar) {
if (OptChar == 'd') {
DAL.AddFlagArg(A, Opts.getOption(options::OPT_O0));
} else {
if (OptChar == '1') {
DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "s");
} else if (OptChar == '2' || OptChar == 'x') {
DAL.AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
}
if (SupportsForcingFramePointer &&
!DAL.hasArgNoClaim(options::OPT_fno_omit_frame_pointer))
DAL.AddFlagArg(A,
Opts.getOption(options::OPT_fomit_frame_pointer));
if (OptChar == '1' || OptChar == '2')
DAL.AddFlagArg(A,
Opts.getOption(options::OPT_ffunction_sections));
}
}
break;
case 'b':
if (I + 1 != E && isdigit(OptStr[I + 1])) {
switch (OptStr[I + 1]) {
case '0':
DAL.AddFlagArg(A, Opts.getOption(options::OPT_fno_inline));
break;
case '1':
DAL.AddFlagArg(A, Opts.getOption(options::OPT_finline_hint_functions));
break;
case '2':
DAL.AddFlagArg(A, Opts.getOption(options::OPT_finline_functions));
break;
}
++I;
}
break;
case 'g':
break;
case 'i':
if (I + 1 != E && OptStr[I + 1] == '-') {
++I;
DAL.AddFlagArg(A, Opts.getOption(options::OPT_fno_builtin));
} else {
DAL.AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
}
break;
case 's':
DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "s");
break;
case 't':
DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
break;
case 'y': {
bool OmitFramePointer = true;
if (I + 1 != E && OptStr[I + 1] == '-') {
OmitFramePointer = false;
++I;
}
if (SupportsForcingFramePointer) {
if (OmitFramePointer)
DAL.AddFlagArg(A,
Opts.getOption(options::OPT_fomit_frame_pointer));
else
DAL.AddFlagArg(
A, Opts.getOption(options::OPT_fno_omit_frame_pointer));
} else {
// Don't warn about /Oy- in 64-bit builds (where
// SupportsForcingFramePointer is false). The flag having no effect
// there is a compiler-internal optimization, and people shouldn't have
// to special-case their build files for 64-bit clang-cl.
A->claim();
}
break;
}
}
}
}
static void TranslateDArg(Arg *A, llvm::opt::DerivedArgList &DAL,
const OptTable &Opts) {
assert(A->getOption().matches(options::OPT_D));
StringRef Val = A->getValue();
size_t Hash = Val.find('#');
if (Hash == StringRef::npos || Hash > Val.find('=')) {
DAL.append(A);
return;
}
std::string NewVal = Val;
NewVal[Hash] = '=';
DAL.AddJoinedArg(A, Opts.getOption(options::OPT_D), NewVal);
}
llvm::opt::DerivedArgList *
MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
StringRef BoundArch, Action::OffloadKind) const {
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
const OptTable &Opts = getDriver().getOpts();
// /Oy and /Oy- only has an effect under X86-32.
bool SupportsForcingFramePointer = getArch() == llvm::Triple::x86;
// The -O[12xd] flag actually expands to several flags. We must desugar the
// flags so that options embedded can be negated. For example, the '-O2' flag
// enables '-Oy'. Expanding '-O2' into its constituent flags allows us to
// correctly handle '-O2 -Oy-' where the trailing '-Oy-' disables a single
// aspect of '-O2'.
//
// Note that this expansion logic only applies to the *last* of '[12xd]'.
// First step is to search for the character we'd like to expand.
const char *ExpandChar = nullptr;
for (Arg *A : Args) {
if (!A->getOption().matches(options::OPT__SLASH_O))
continue;
StringRef OptStr = A->getValue();
for (size_t I = 0, E = OptStr.size(); I != E; ++I) {
char OptChar = OptStr[I];
char PrevChar = I > 0 ? OptStr[I - 1] : '0';
if (PrevChar == 'b') {
// OptChar does not expand; it's an argument to the previous char.
continue;
}
if (OptChar == '1' || OptChar == '2' || OptChar == 'x' || OptChar == 'd')
ExpandChar = OptStr.data() + I;
}
}
for (Arg *A : Args) {
if (A->getOption().matches(options::OPT__SLASH_O)) {
// The -O flag actually takes an amalgam of other options. For example,
// '/Ogyb2' is equivalent to '/Og' '/Oy' '/Ob2'.
TranslateOptArg(A, *DAL, SupportsForcingFramePointer, ExpandChar, Opts);
} else if (A->getOption().matches(options::OPT_D)) {
// Translate -Dfoo#bar into -Dfoo=bar.
TranslateDArg(A, *DAL, Opts);
} else {
DAL->append(A);
}
}
return DAL;
}
Index: head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.h
===================================================================
--- head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.h (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.h (revision 322855)
@@ -1,141 +1,146 @@
//===--- MSVC.h - MSVC ToolChain Implementations ----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
#include "Cuda.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
namespace clang {
namespace driver {
namespace tools {
/// Visual studio tools.
namespace visualstudio {
class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
public:
Linker(const ToolChain &TC)
: Tool("visualstudio::Linker", "linker", TC, RF_Full,
llvm::sys::WEM_UTF16) {}
bool hasIntegratedCPP() const override { return false; }
bool isLinkJob() const override { return true; }
void ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output, const InputInfoList &Inputs,
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const override;
};
class LLVM_LIBRARY_VISIBILITY Compiler : public Tool {
public:
Compiler(const ToolChain &TC)
: Tool("visualstudio::Compiler", "compiler", TC, RF_Full,
llvm::sys::WEM_UTF16) {}
bool hasIntegratedAssembler() const override { return true; }
bool hasIntegratedCPP() const override { return true; }
bool isLinkJob() const override { return false; }
void ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output, const InputInfoList &Inputs,
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const override;
std::unique_ptr<Command> GetCommand(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const;
};
} // end namespace visualstudio
} // end namespace tools
namespace toolchains {
class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain {
public:
MSVCToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;
bool IsIntegratedAssemblerDefault() const override;
bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefault() const override;
bool isPIEDefault() const override;
bool isPICDefaultForced() const override;
enum class SubDirectoryType {
Bin,
Include,
Lib,
};
std::string getSubDirectoryPath(SubDirectoryType Type,
llvm::Triple::ArchType TargetArch) const;
// Convenience overload.
// Uses the current target arch.
std::string getSubDirectoryPath(SubDirectoryType Type) const {
return getSubDirectoryPath(Type, getArch());
}
- bool getIsVS2017OrNewer() const { return IsVS2017OrNewer; }
+ enum class ToolsetLayout {
+ OlderVS,
+ VS2017OrNewer,
+ DevDivInternal,
+ };
+ bool getIsVS2017OrNewer() const { return VSLayout == ToolsetLayout::VS2017OrNewer; }
void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void AddClangCXXStdlibIncludeArgs(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
bool getWindowsSDKLibraryPath(std::string &path) const;
/// \brief Check if Universal CRT should be used if available
bool getUniversalCRTLibraryPath(std::string &path) const;
bool useUniversalCRT() const;
VersionTuple
computeMSVCVersion(const Driver *D,
const llvm::opt::ArgList &Args) const override;
std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args,
types::ID InputType) const override;
SanitizerMask getSupportedSanitizers() const override;
void printVerboseInfo(raw_ostream &OS) const override;
protected:
void AddSystemIncludeWithSubfolder(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
const std::string &folder,
const Twine &subfolder1,
const Twine &subfolder2 = "",
const Twine &subfolder3 = "") const;
Tool *buildLinker() const override;
Tool *buildAssembler() const override;
private:
std::string VCToolChainPath;
- bool IsVS2017OrNewer = false;
+ ToolsetLayout VSLayout = ToolsetLayout::OlderVS;
CudaInstallationDetector CudaInstallation;
};
} // end namespace toolchains
} // end namespace driver
} // end namespace clang
#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
Index: head/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp (revision 322855)
@@ -1,701 +1,706 @@
//===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements WhitespaceManager class.
///
//===----------------------------------------------------------------------===//
#include "WhitespaceManager.h"
#include "llvm/ADT/STLExtras.h"
namespace clang {
namespace format {
bool WhitespaceManager::Change::IsBeforeInFile::
operator()(const Change &C1, const Change &C2) const {
return SourceMgr.isBeforeInTranslationUnit(
C1.OriginalWhitespaceRange.getBegin(),
C2.OriginalWhitespaceRange.getBegin());
}
WhitespaceManager::Change::Change(const FormatToken &Tok,
bool CreateReplacement,
SourceRange OriginalWhitespaceRange,
int Spaces, unsigned StartOfTokenColumn,
unsigned NewlinesBefore,
StringRef PreviousLinePostfix,
StringRef CurrentLinePrefix,
bool ContinuesPPDirective, bool IsInsideToken)
: Tok(&Tok), CreateReplacement(CreateReplacement),
OriginalWhitespaceRange(OriginalWhitespaceRange),
StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
PreviousLinePostfix(PreviousLinePostfix),
CurrentLinePrefix(CurrentLinePrefix),
ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces),
IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0),
PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
StartOfBlockComment(nullptr), IndentationOffset(0) {}
void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
unsigned Spaces,
unsigned StartOfTokenColumn,
bool InPPDirective) {
if (Tok.Finalized)
return;
Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange,
Spaces, StartOfTokenColumn, Newlines, "", "",
InPPDirective && !Tok.IsFirst,
/*IsInsideToken=*/false));
}
void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
bool InPPDirective) {
if (Tok.Finalized)
return;
Changes.push_back(Change(Tok, /*CreateReplacement=*/false,
Tok.WhitespaceRange, /*Spaces=*/0,
Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
InPPDirective && !Tok.IsFirst,
/*IsInsideToken=*/false));
}
void WhitespaceManager::replaceWhitespaceInToken(
const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
unsigned Newlines, int Spaces) {
if (Tok.Finalized)
return;
SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
Changes.push_back(
Change(Tok, /*CreateReplacement=*/true,
SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces,
std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix,
InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/true));
}
const tooling::Replacements &WhitespaceManager::generateReplacements() {
if (Changes.empty())
return Replaces;
std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
calculateLineBreakInformation();
alignConsecutiveDeclarations();
alignConsecutiveAssignments();
alignTrailingComments();
alignEscapedNewlines();
generateChanges();
return Replaces;
}
void WhitespaceManager::calculateLineBreakInformation() {
Changes[0].PreviousEndOfTokenColumn = 0;
Change *LastOutsideTokenChange = &Changes[0];
for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
SourceLocation OriginalWhitespaceStart =
Changes[i].OriginalWhitespaceRange.getBegin();
SourceLocation PreviousOriginalWhitespaceEnd =
Changes[i - 1].OriginalWhitespaceRange.getEnd();
unsigned OriginalWhitespaceStartOffset =
SourceMgr.getFileOffset(OriginalWhitespaceStart);
unsigned PreviousOriginalWhitespaceEndOffset =
SourceMgr.getFileOffset(PreviousOriginalWhitespaceEnd);
assert(PreviousOriginalWhitespaceEndOffset <=
OriginalWhitespaceStartOffset);
const char *const PreviousOriginalWhitespaceEndData =
SourceMgr.getCharacterData(PreviousOriginalWhitespaceEnd);
StringRef Text(PreviousOriginalWhitespaceEndData,
SourceMgr.getCharacterData(OriginalWhitespaceStart) -
PreviousOriginalWhitespaceEndData);
// Usually consecutive changes would occur in consecutive tokens. This is
// not the case however when analyzing some preprocessor runs of the
// annotated lines. For example, in this code:
//
// #if A // line 1
// int i = 1;
// #else B // line 2
// int i = 2;
// #endif // line 3
//
// one of the runs will produce the sequence of lines marked with line 1, 2
// and 3. So the two consecutive whitespace changes just before '// line 2'
// and before '#endif // line 3' span multiple lines and tokens:
//
// #else B{change X}[// line 2
// int i = 2;
// ]{change Y}#endif // line 3
//
// For this reason, if the text between consecutive changes spans multiple
// newlines, the token length must be adjusted to the end of the original
// line of the token.
auto NewlinePos = Text.find_first_of('\n');
if (NewlinePos == StringRef::npos) {
Changes[i - 1].TokenLength = OriginalWhitespaceStartOffset -
PreviousOriginalWhitespaceEndOffset +
Changes[i].PreviousLinePostfix.size() +
Changes[i - 1].CurrentLinePrefix.size();
} else {
Changes[i - 1].TokenLength =
NewlinePos + Changes[i - 1].CurrentLinePrefix.size();
}
// If there are multiple changes in this token, sum up all the changes until
// the end of the line.
if (Changes[i - 1].IsInsideToken && Changes[i - 1].NewlinesBefore == 0)
LastOutsideTokenChange->TokenLength +=
Changes[i - 1].TokenLength + Changes[i - 1].Spaces;
else
LastOutsideTokenChange = &Changes[i - 1];
Changes[i].PreviousEndOfTokenColumn =
Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
Changes[i - 1].IsTrailingComment =
(Changes[i].NewlinesBefore > 0 || Changes[i].Tok->is(tok::eof) ||
(Changes[i].IsInsideToken && Changes[i].Tok->is(tok::comment))) &&
Changes[i - 1].Tok->is(tok::comment) &&
// FIXME: This is a dirty hack. The problem is that
// BreakableLineCommentSection does comment reflow changes and here is
// the aligning of trailing comments. Consider the case where we reflow
// the second line up in this example:
//
// // line 1
// // line 2
//
// That amounts to 2 changes by BreakableLineCommentSection:
// - the first, delimited by (), for the whitespace between the tokens,
// - and second, delimited by [], for the whitespace at the beginning
// of the second token:
//
// // line 1(
// )[// ]line 2
//
// So in the end we have two changes like this:
//
// // line1()[ ]line 2
//
// Note that the OriginalWhitespaceStart of the second change is the
// same as the PreviousOriginalWhitespaceEnd of the first change.
// In this case, the below check ensures that the second change doesn't
// get treated as a trailing comment change here, since this might
// trigger additional whitespace to be wrongly inserted before "line 2"
// by the comment aligner here.
//
// For a proper solution we need a mechanism to say to WhitespaceManager
// that a particular change breaks the current sequence of trailing
// comments.
OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
}
// FIXME: The last token is currently not always an eof token; in those
// cases, setting TokenLength of the last token to 0 is wrong.
Changes.back().TokenLength = 0;
Changes.back().IsTrailingComment = Changes.back().Tok->is(tok::comment);
const WhitespaceManager::Change *LastBlockComment = nullptr;
for (auto &Change : Changes) {
// Reset the IsTrailingComment flag for changes inside of trailing comments
// so they don't get realigned later. Comment line breaks however still need
// to be aligned.
if (Change.IsInsideToken && Change.NewlinesBefore == 0)
Change.IsTrailingComment = false;
Change.StartOfBlockComment = nullptr;
Change.IndentationOffset = 0;
if (Change.Tok->is(tok::comment)) {
if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken)
LastBlockComment = &Change;
else {
if ((Change.StartOfBlockComment = LastBlockComment))
Change.IndentationOffset =
Change.StartOfTokenColumn -
Change.StartOfBlockComment->StartOfTokenColumn;
}
} else {
LastBlockComment = nullptr;
}
}
}
// Align a single sequence of tokens, see AlignTokens below.
template <typename F>
static void
AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
SmallVector<WhitespaceManager::Change, 16> &Changes) {
bool FoundMatchOnLine = false;
int Shift = 0;
// ScopeStack keeps track of the current scope depth. It contains indices of
// the first token on each scope.
// We only run the "Matches" function on tokens from the outer-most scope.
// However, we do need to pay special attention to one class of tokens
// that are not in the outer-most scope, and that is function parameters
// which are split across multiple lines, as illustrated by this example:
// double a(int x);
// int b(int y,
// double z);
// In the above example, we need to take special care to ensure that
// 'double z' is indented along with it's owning function 'b'.
SmallVector<unsigned, 16> ScopeStack;
for (unsigned i = Start; i != End; ++i) {
if (ScopeStack.size() != 0 &&
Changes[i].nestingAndIndentLevel() <
Changes[ScopeStack.back()].nestingAndIndentLevel())
ScopeStack.pop_back();
if (i != Start && Changes[i].nestingAndIndentLevel() >
Changes[i - 1].nestingAndIndentLevel())
ScopeStack.push_back(i);
bool InsideNestedScope = ScopeStack.size() != 0;
if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) {
Shift = 0;
FoundMatchOnLine = false;
}
// If this is the first matching token to be aligned, remember by how many
// spaces it has to be shifted, so the rest of the changes on the line are
// shifted by the same amount
if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) {
FoundMatchOnLine = true;
Shift = Column - Changes[i].StartOfTokenColumn;
Changes[i].Spaces += Shift;
}
// This is for function parameters that are split across multiple lines,
// as mentioned in the ScopeStack comment.
if (InsideNestedScope && Changes[i].NewlinesBefore > 0) {
unsigned ScopeStart = ScopeStack.back();
if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) ||
(ScopeStart > Start + 1 &&
Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)))
Changes[i].Spaces += Shift;
}
assert(Shift >= 0);
Changes[i].StartOfTokenColumn += Shift;
if (i + 1 != Changes.size())
Changes[i + 1].PreviousEndOfTokenColumn += Shift;
}
}
// Walk through a subset of the changes, starting at StartAt, and find
// sequences of matching tokens to align. To do so, keep track of the lines and
// whether or not a matching token was found on a line. If a matching token is
// found, extend the current sequence. If the current line cannot be part of a
// sequence, e.g. because there is an empty line before it or it contains only
// non-matching tokens, finalize the previous sequence.
// The value returned is the token on which we stopped, either because we
// exhausted all items inside Changes, or because we hit a scope level higher
// than our initial scope.
// This function is recursive. Each invocation processes only the scope level
// equal to the initial level, which is the level of Changes[StartAt].
// If we encounter a scope level greater than the initial level, then we call
// ourselves recursively, thereby avoiding the pollution of the current state
// with the alignment requirements of the nested sub-level. This recursive
// behavior is necessary for aligning function prototypes that have one or more
// arguments.
// If this function encounters a scope level less than the initial level,
// it returns the current position.
// There is a non-obvious subtlety in the recursive behavior: Even though we
// defer processing of nested levels to recursive invocations of this
// function, when it comes time to align a sequence of tokens, we run the
// alignment on the entire sequence, including the nested levels.
// When doing so, most of the nested tokens are skipped, because their
// alignment was already handled by the recursive invocations of this function.
// However, the special exception is that we do NOT skip function parameters
// that are split across multiple lines. See the test case in FormatTest.cpp
// that mentions "split function parameter alignment" for an example of this.
template <typename F>
static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
SmallVector<WhitespaceManager::Change, 16> &Changes,
unsigned StartAt) {
unsigned MinColumn = 0;
unsigned MaxColumn = UINT_MAX;
// Line number of the start and the end of the current token sequence.
unsigned StartOfSequence = 0;
unsigned EndOfSequence = 0;
// Measure the scope level (i.e. depth of (), [], {}) of the first token, and
// abort when we hit any token in a higher scope than the starting one.
auto NestingAndIndentLevel = StartAt < Changes.size()
? Changes[StartAt].nestingAndIndentLevel()
: std::pair<unsigned, unsigned>(0, 0);
// Keep track of the number of commas before the matching tokens, we will only
// align a sequence of matching tokens if they are preceded by the same number
// of commas.
unsigned CommasBeforeLastMatch = 0;
unsigned CommasBeforeMatch = 0;
// Whether a matching token has been found on the current line.
bool FoundMatchOnLine = false;
// Aligns a sequence of matching tokens, on the MinColumn column.
//
// Sequences start from the first matching token to align, and end at the
// first token of the first line that doesn't need to be aligned.
//
// We need to adjust the StartOfTokenColumn of each Change that is on a line
// containing any matching token to be aligned and located after such token.
auto AlignCurrentSequence = [&] {
if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches,
Changes);
MinColumn = 0;
MaxColumn = UINT_MAX;
StartOfSequence = 0;
EndOfSequence = 0;
};
unsigned i = StartAt;
for (unsigned e = Changes.size(); i != e; ++i) {
if (Changes[i].nestingAndIndentLevel() < NestingAndIndentLevel)
break;
if (Changes[i].NewlinesBefore != 0) {
CommasBeforeMatch = 0;
EndOfSequence = i;
// If there is a blank line, or if the last line didn't contain any
// matching token, the sequence ends here.
if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine)
AlignCurrentSequence();
FoundMatchOnLine = false;
}
if (Changes[i].Tok->is(tok::comma)) {
++CommasBeforeMatch;
} else if (Changes[i].nestingAndIndentLevel() > NestingAndIndentLevel) {
// Call AlignTokens recursively, skipping over this scope block.
unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i);
i = StoppedAt - 1;
continue;
}
if (!Matches(Changes[i]))
continue;
// If there is more than one matching token per line, or if the number of
// preceding commas, do not match anymore, end the sequence.
if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch)
AlignCurrentSequence();
CommasBeforeLastMatch = CommasBeforeMatch;
FoundMatchOnLine = true;
if (StartOfSequence == 0)
StartOfSequence = i;
unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
int LineLengthAfter = -Changes[i].Spaces;
for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j)
LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength;
unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter;
// If we are restricted by the maximum column width, end the sequence.
if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn ||
CommasBeforeLastMatch != CommasBeforeMatch) {
AlignCurrentSequence();
StartOfSequence = i;
}
MinColumn = std::max(MinColumn, ChangeMinColumn);
MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
}
EndOfSequence = i;
AlignCurrentSequence();
return i;
}
void WhitespaceManager::alignConsecutiveAssignments() {
if (!Style.AlignConsecutiveAssignments)
return;
AlignTokens(Style,
[&](const Change &C) {
// Do not align on equal signs that are first on a line.
if (C.NewlinesBefore > 0)
return false;
// Do not align on equal signs that are last on a line.
if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
return false;
return C.Tok->is(tok::equal);
},
Changes, /*StartAt=*/0);
}
void WhitespaceManager::alignConsecutiveDeclarations() {
if (!Style.AlignConsecutiveDeclarations)
return;
// FIXME: Currently we don't handle properly the PointerAlignment: Right
// The * and & are not aligned and are left dangling. Something has to be done
// about it, but it raises the question of alignment of code like:
// const char* const* v1;
// float const* v2;
// SomeVeryLongType const& v3;
AlignTokens(Style,
[](Change const &C) {
// tok::kw_operator is necessary for aligning operator overload
// definitions.
return C.Tok->is(TT_StartOfName) ||
C.Tok->is(TT_FunctionDeclarationName) ||
C.Tok->is(tok::kw_operator);
},
Changes, /*StartAt=*/0);
}
void WhitespaceManager::alignTrailingComments() {
unsigned MinColumn = 0;
unsigned MaxColumn = UINT_MAX;
unsigned StartOfSequence = 0;
bool BreakBeforeNext = false;
unsigned Newlines = 0;
for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
if (Changes[i].StartOfBlockComment)
continue;
Newlines += Changes[i].NewlinesBefore;
if (!Changes[i].IsTrailingComment)
continue;
unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
- unsigned ChangeMaxColumn = Style.ColumnLimit >= Changes[i].TokenLength
- ? Style.ColumnLimit - Changes[i].TokenLength
- : ChangeMinColumn;
+ unsigned ChangeMaxColumn;
+
+ if (Style.ColumnLimit == 0)
+ ChangeMaxColumn = UINT_MAX;
+ else if (Style.ColumnLimit >= Changes[i].TokenLength)
+ ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
+ else
+ ChangeMaxColumn = ChangeMinColumn;
// If we don't create a replacement for this change, we have to consider
// it to be immovable.
if (!Changes[i].CreateReplacement)
ChangeMaxColumn = ChangeMinColumn;
if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
ChangeMaxColumn -= 2;
// If this comment follows an } in column 0, it probably documents the
// closing of a namespace and we don't want to align it.
bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
Changes[i - 1].Tok->is(tok::r_brace) &&
Changes[i - 1].StartOfTokenColumn == 0;
bool WasAlignedWithStartOfNextLine = false;
if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
Changes[i].OriginalWhitespaceRange.getEnd());
for (unsigned j = i + 1; j != e; ++j) {
if (Changes[j].Tok->is(tok::comment))
continue;
unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
Changes[j].OriginalWhitespaceRange.getEnd());
// The start of the next token was previously aligned with the
// start of this comment.
WasAlignedWithStartOfNextLine =
CommentColumn == NextColumn ||
CommentColumn == NextColumn + Style.IndentWidth;
break;
}
}
if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
alignTrailingComments(StartOfSequence, i, MinColumn);
MinColumn = ChangeMinColumn;
MaxColumn = ChangeMinColumn;
StartOfSequence = i;
} else if (BreakBeforeNext || Newlines > 1 ||
(ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
// Break the comment sequence if the previous line did not end
// in a trailing comment.
(Changes[i].NewlinesBefore == 1 && i > 0 &&
!Changes[i - 1].IsTrailingComment) ||
WasAlignedWithStartOfNextLine) {
alignTrailingComments(StartOfSequence, i, MinColumn);
MinColumn = ChangeMinColumn;
MaxColumn = ChangeMaxColumn;
StartOfSequence = i;
} else {
MinColumn = std::max(MinColumn, ChangeMinColumn);
MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
}
BreakBeforeNext =
(i == 0) || (Changes[i].NewlinesBefore > 1) ||
// Never start a sequence with a comment at the beginning of
// the line.
(Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
Newlines = 0;
}
alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
}
void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
unsigned Column) {
for (unsigned i = Start; i != End; ++i) {
int Shift = 0;
if (Changes[i].IsTrailingComment) {
Shift = Column - Changes[i].StartOfTokenColumn;
}
if (Changes[i].StartOfBlockComment) {
Shift = Changes[i].IndentationOffset +
Changes[i].StartOfBlockComment->StartOfTokenColumn -
Changes[i].StartOfTokenColumn;
}
assert(Shift >= 0);
Changes[i].Spaces += Shift;
if (i + 1 != Changes.size())
Changes[i + 1].PreviousEndOfTokenColumn += Shift;
Changes[i].StartOfTokenColumn += Shift;
}
}
void WhitespaceManager::alignEscapedNewlines() {
if (Style.AlignEscapedNewlines == FormatStyle::ENAS_DontAlign)
return;
bool AlignLeft = Style.AlignEscapedNewlines == FormatStyle::ENAS_Left;
unsigned MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
unsigned StartOfMacro = 0;
for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
Change &C = Changes[i];
if (C.NewlinesBefore > 0) {
if (C.ContinuesPPDirective) {
MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
} else {
alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
StartOfMacro = i;
}
}
}
alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
}
void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
unsigned Column) {
for (unsigned i = Start; i < End; ++i) {
Change &C = Changes[i];
if (C.NewlinesBefore > 0) {
assert(C.ContinuesPPDirective);
if (C.PreviousEndOfTokenColumn + 1 > Column)
C.EscapedNewlineColumn = 0;
else
C.EscapedNewlineColumn = Column;
}
}
}
void WhitespaceManager::generateChanges() {
for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
const Change &C = Changes[i];
if (i > 0) {
assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
C.OriginalWhitespaceRange.getBegin() &&
"Generating two replacements for the same location");
}
if (C.CreateReplacement) {
std::string ReplacementText = C.PreviousLinePostfix;
if (C.ContinuesPPDirective)
appendNewlineText(ReplacementText, C.NewlinesBefore,
C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
else
appendNewlineText(ReplacementText, C.NewlinesBefore);
appendIndentText(ReplacementText, C.Tok->IndentLevel,
std::max(0, C.Spaces),
C.StartOfTokenColumn - std::max(0, C.Spaces));
ReplacementText.append(C.CurrentLinePrefix);
storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
}
}
}
void WhitespaceManager::storeReplacement(SourceRange Range,
StringRef Text) {
unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
SourceMgr.getFileOffset(Range.getBegin());
// Don't create a replacement, if it does not change anything.
if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
WhitespaceLength) == Text)
return;
auto Err = Replaces.add(tooling::Replacement(
SourceMgr, CharSourceRange::getCharRange(Range), Text));
// FIXME: better error handling. For now, just print an error message in the
// release version.
if (Err) {
llvm::errs() << llvm::toString(std::move(Err)) << "\n";
assert(false);
}
}
void WhitespaceManager::appendNewlineText(std::string &Text,
unsigned Newlines) {
for (unsigned i = 0; i < Newlines; ++i)
Text.append(UseCRLF ? "\r\n" : "\n");
}
void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
unsigned PreviousEndOfTokenColumn,
unsigned EscapedNewlineColumn) {
if (Newlines > 0) {
unsigned Offset =
std::min<int>(EscapedNewlineColumn - 2, PreviousEndOfTokenColumn);
for (unsigned i = 0; i < Newlines; ++i) {
Text.append(EscapedNewlineColumn - Offset - 1, ' ');
Text.append(UseCRLF ? "\\\r\n" : "\\\n");
Offset = 0;
}
}
}
void WhitespaceManager::appendIndentText(std::string &Text,
unsigned IndentLevel, unsigned Spaces,
unsigned WhitespaceStartColumn) {
switch (Style.UseTab) {
case FormatStyle::UT_Never:
Text.append(Spaces, ' ');
break;
case FormatStyle::UT_Always: {
unsigned FirstTabWidth =
Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
// Indent with tabs only when there's at least one full tab.
if (FirstTabWidth + Style.TabWidth <= Spaces) {
Spaces -= FirstTabWidth;
Text.append("\t");
}
Text.append(Spaces / Style.TabWidth, '\t');
Text.append(Spaces % Style.TabWidth, ' ');
break;
}
case FormatStyle::UT_ForIndentation:
if (WhitespaceStartColumn == 0) {
unsigned Indentation = IndentLevel * Style.IndentWidth;
// This happens, e.g. when a line in a block comment is indented less than
// the first one.
if (Indentation > Spaces)
Indentation = Spaces;
unsigned Tabs = Indentation / Style.TabWidth;
Text.append(Tabs, '\t');
Spaces -= Tabs * Style.TabWidth;
}
Text.append(Spaces, ' ');
break;
case FormatStyle::UT_ForContinuationAndIndentation:
if (WhitespaceStartColumn == 0) {
unsigned Tabs = Spaces / Style.TabWidth;
Text.append(Tabs, '\t');
Spaces -= Tabs * Style.TabWidth;
}
Text.append(Spaces, ' ');
break;
}
}
} // namespace format
} // namespace clang
Index: head/contrib/llvm/tools/clang/lib/Headers/unwind.h
===================================================================
--- head/contrib/llvm/tools/clang/lib/Headers/unwind.h (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Headers/unwind.h (revision 322855)
@@ -1,337 +1,299 @@
/*===---- unwind.h - Stack unwinding ----------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
/* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/
#ifndef __CLANG_UNWIND_H
#define __CLANG_UNWIND_H
#if defined(__APPLE__) && __has_include_next(<unwind.h>)
/* Darwin (from 11.x on) provide an unwind.h. If that's available,
* use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,
* so define that around the include.*/
# ifndef _GNU_SOURCE
# define _SHOULD_UNDEFINE_GNU_SOURCE
# define _GNU_SOURCE
# endif
// libunwind's unwind.h reflects the current visibility. However, Mozilla
// builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the
// visibility to default and export its contents. gcc also allows users to
// override its override by #defining HIDE_EXPORTS (but note, this only obeys
// the user's -fvisibility setting; it doesn't hide any exports on its own). We
// imitate gcc's header here:
# ifdef HIDE_EXPORTS
# include_next <unwind.h>
# else
# pragma GCC visibility push(default)
# include_next <unwind.h>
# pragma GCC visibility pop
# endif
# ifdef _SHOULD_UNDEFINE_GNU_SOURCE
# undef _GNU_SOURCE
# undef _SHOULD_UNDEFINE_GNU_SOURCE
# endif
#else
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/* It is a bit strange for a header to play with the visibility of the
symbols it declares, but this matches gcc's behavior and some programs
depend on it */
#ifndef HIDE_EXPORTS
#pragma GCC visibility push(default)
#endif
typedef uintptr_t _Unwind_Word;
typedef intptr_t _Unwind_Sword;
typedef uintptr_t _Unwind_Ptr;
typedef uintptr_t _Unwind_Internal_Ptr;
typedef uint64_t _Unwind_Exception_Class;
typedef intptr_t _sleb128_t;
typedef uintptr_t _uleb128_t;
struct _Unwind_Context;
-#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH___))
-struct _Unwind_Control_Block;
-typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */
-#else
struct _Unwind_Exception;
-typedef struct _Unwind_Exception _Unwind_Exception;
-#endif
typedef enum {
_URC_NO_REASON = 0,
#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
!defined(__ARM_DWARF_EH__)
_URC_OK = 0, /* used by ARM EHABI */
#endif
_URC_FOREIGN_EXCEPTION_CAUGHT = 1,
_URC_FATAL_PHASE2_ERROR = 2,
_URC_FATAL_PHASE1_ERROR = 3,
_URC_NORMAL_STOP = 4,
_URC_END_OF_STACK = 5,
_URC_HANDLER_FOUND = 6,
_URC_INSTALL_CONTEXT = 7,
_URC_CONTINUE_UNWIND = 8,
#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
!defined(__ARM_DWARF_EH__)
_URC_FAILURE = 9 /* used by ARM EHABI */
#endif
} _Unwind_Reason_Code;
typedef enum {
_UA_SEARCH_PHASE = 1,
_UA_CLEANUP_PHASE = 2,
_UA_HANDLER_FRAME = 4,
_UA_FORCE_UNWIND = 8,
_UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */
} _Unwind_Action;
typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,
- _Unwind_Exception *);
+ struct _Unwind_Exception *);
-#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH___))
-typedef struct _Unwind_Control_Block _Unwind_Control_Block;
-typedef uint32_t _Unwind_EHT_Header;
-
-struct _Unwind_Control_Block {
- uint64_t exception_class;
- void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);
- /* unwinder cache (private fields for the unwinder's use) */
- struct {
- uint32_t reserved1; /* forced unwind stop function, 0 if not forced */
- uint32_t reserved2; /* personality routine */
- uint32_t reserved3; /* callsite */
- uint32_t reserved4; /* forced unwind stop argument */
- uint32_t reserved5;
- } unwinder_cache;
- /* propagation barrier cache (valid after phase 1) */
- struct {
- uint32_t sp;
- uint32_t bitpattern[5];
- } barrier_cache;
- /* cleanup cache (preserved over cleanup) */
- struct {
- uint32_t bitpattern[4];
- } cleanup_cache;
- /* personality cache (for personality's benefit) */
- struct {
- uint32_t fnstart; /* function start address */
- _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */
- uint32_t additional; /* additional data */
- uint32_t reserved1;
- } pr_cache;
- long long int : 0; /* force alignment of next item to 8-byte boundary */
-};
-#else
struct _Unwind_Exception {
_Unwind_Exception_Class exception_class;
_Unwind_Exception_Cleanup_Fn exception_cleanup;
_Unwind_Word private_1;
_Unwind_Word private_2;
/* The Itanium ABI requires that _Unwind_Exception objects are "double-word
* aligned". GCC has interpreted this to mean "use the maximum useful
* alignment for the target"; so do we. */
} __attribute__((__aligned__));
-#endif
typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,
_Unwind_Exception_Class,
- _Unwind_Exception *,
+ struct _Unwind_Exception *,
struct _Unwind_Context *,
void *);
-typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,
- _Unwind_Exception_Class,
- _Unwind_Exception *,
- struct _Unwind_Context *);
+typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
+ int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *,
+ struct _Unwind_Context *);
typedef _Unwind_Personality_Fn __personality_routine;
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
void *);
-#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH___))
+#if defined(__arm__) && !defined(__APPLE__)
+
typedef enum {
_UVRSC_CORE = 0, /* integer register */
_UVRSC_VFP = 1, /* vfp */
_UVRSC_WMMXD = 3, /* Intel WMMX data register */
_UVRSC_WMMXC = 4 /* Intel WMMX control register */
} _Unwind_VRS_RegClass;
typedef enum {
_UVRSD_UINT32 = 0,
_UVRSD_VFPX = 1,
_UVRSD_UINT64 = 3,
_UVRSD_FLOAT = 4,
_UVRSD_DOUBLE = 5
} _Unwind_VRS_DataRepresentation;
typedef enum {
_UVRSR_OK = 0,
_UVRSR_NOT_IMPLEMENTED = 1,
_UVRSR_FAILED = 2
} _Unwind_VRS_Result;
+#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__ARM_DWARF_EH__)
typedef uint32_t _Unwind_State;
#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)
#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)
#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)
#define _US_ACTION_MASK ((_Unwind_State)3)
#define _US_FORCE_UNWIND ((_Unwind_State)8)
+#endif
_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
_Unwind_VRS_RegClass __regclass,
uint32_t __regno,
_Unwind_VRS_DataRepresentation __representation,
void *__valuep);
_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context,
_Unwind_VRS_RegClass __regclass,
uint32_t __regno,
_Unwind_VRS_DataRepresentation __representation,
void *__valuep);
static __inline__
_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) {
_Unwind_Word __value;
_Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);
return __value;
}
static __inline__
void _Unwind_SetGR(struct _Unwind_Context *__context, int __index,
_Unwind_Word __value) {
_Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);
}
static __inline__
_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) {
_Unwind_Word __ip = _Unwind_GetGR(__context, 15);
return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */
}
static __inline__
void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) {
_Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1;
_Unwind_SetGR(__context, 15, __value | __thumb_mode_bit);
}
#else
_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int);
void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word);
_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *);
void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word);
#endif
_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *);
_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);
_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *);
void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *);
_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);
/* DWARF EH functions; currently not available on Darwin/ARM */
#if !defined(__APPLE__) || !defined(__arm__)
-_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);
-_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,
- void *);
-void _Unwind_DeleteException(_Unwind_Exception *);
-void _Unwind_Resume(_Unwind_Exception *);
-_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);
+_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *);
+_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *,
+ _Unwind_Stop_Fn, void *);
+void _Unwind_DeleteException(struct _Unwind_Exception *);
+void _Unwind_Resume(struct _Unwind_Exception *);
+_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *);
+
#endif
_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
/* setjmp(3)/longjmp(3) stuff */
typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;
void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);
void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);
-_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);
-_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,
+_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *);
+_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *,
_Unwind_Stop_Fn, void *);
-void _Unwind_SjLj_Resume(_Unwind_Exception *);
-_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);
+void _Unwind_SjLj_Resume(struct _Unwind_Exception *);
+_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *);
void *_Unwind_FindEnclosingFunction(void *);
#ifdef __APPLE__
_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)
__attribute__((__unavailable__));
_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)
__attribute__((__unavailable__));
/* Darwin-specific functions */
void __register_frame(const void *);
void __deregister_frame(const void *);
struct dwarf_eh_bases {
uintptr_t tbase;
uintptr_t dbase;
uintptr_t func;
};
void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *);
void __register_frame_info_bases(const void *, void *, void *, void *)
__attribute__((__unavailable__));
void __register_frame_info(const void *, void *) __attribute__((__unavailable__));
void __register_frame_info_table_bases(const void *, void*, void *, void *)
__attribute__((__unavailable__));
void __register_frame_info_table(const void *, void *)
__attribute__((__unavailable__));
void __register_frame_table(const void *) __attribute__((__unavailable__));
void __deregister_frame_info(const void *) __attribute__((__unavailable__));
void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__));
#else
_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *);
_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *);
#endif
#ifndef HIDE_EXPORTS
#pragma GCC visibility pop
#endif
#ifdef __cplusplus
}
#endif
#endif
#endif /* __CLANG_UNWIND_H */
Index: head/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp (revision 322855)
@@ -1,828 +1,839 @@
//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements pieces of the Preprocessor interface that manage the
// current lexer stack.
//
//===----------------------------------------------------------------------===//
#include "clang/Lex/Preprocessor.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/PTHManager.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
using namespace clang;
PPCallbacks::~PPCallbacks() {}
//===----------------------------------------------------------------------===//
// Miscellaneous Methods.
//===----------------------------------------------------------------------===//
/// isInPrimaryFile - Return true if we're in the top-level file, not in a
/// \#include. This looks through macro expansions and active _Pragma lexers.
bool Preprocessor::isInPrimaryFile() const {
if (IsFileLexer())
return IncludeMacroStack.empty();
// If there are any stacked lexers, we're in a #include.
assert(IsFileLexer(IncludeMacroStack[0]) &&
"Top level include stack isn't our primary lexer?");
return std::none_of(IncludeMacroStack.begin() + 1, IncludeMacroStack.end(),
[this](const IncludeStackInfo &ISI) -> bool {
return IsFileLexer(ISI);
});
}
/// getCurrentLexer - Return the current file lexer being lexed from. Note
/// that this ignores any potentially active macro expansions and _Pragma
/// expansions going on at the time.
PreprocessorLexer *Preprocessor::getCurrentFileLexer() const {
if (IsFileLexer())
return CurPPLexer;
// Look for a stacked lexer.
for (const IncludeStackInfo &ISI : llvm::reverse(IncludeMacroStack)) {
if (IsFileLexer(ISI))
return ISI.ThePPLexer;
}
return nullptr;
}
//===----------------------------------------------------------------------===//
// Methods for Entering and Callbacks for leaving various contexts
//===----------------------------------------------------------------------===//
/// EnterSourceFile - Add a source file to the top of the include stack and
/// start lexing tokens from it instead of the current buffer.
bool Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir,
SourceLocation Loc) {
assert(!CurTokenLexer && "Cannot #include a file inside a macro!");
++NumEnteredSourceFiles;
if (MaxIncludeStackDepth < IncludeMacroStack.size())
MaxIncludeStackDepth = IncludeMacroStack.size();
if (PTH) {
if (PTHLexer *PL = PTH->CreateLexer(FID)) {
EnterSourceFileWithPTH(PL, CurDir);
return false;
}
}
// Get the MemoryBuffer for this FID, if it fails, we fail.
bool Invalid = false;
const llvm::MemoryBuffer *InputFile =
getSourceManager().getBuffer(FID, Loc, &Invalid);
if (Invalid) {
SourceLocation FileStart = SourceMgr.getLocForStartOfFile(FID);
Diag(Loc, diag::err_pp_error_opening_file)
<< std::string(SourceMgr.getBufferName(FileStart)) << "";
return true;
}
if (isCodeCompletionEnabled() &&
SourceMgr.getFileEntryForID(FID) == CodeCompletionFile) {
CodeCompletionFileLoc = SourceMgr.getLocForStartOfFile(FID);
CodeCompletionLoc =
CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);
}
EnterSourceFileWithLexer(new Lexer(FID, InputFile, *this), CurDir);
return false;
}
/// EnterSourceFileWithLexer - Add a source file to the top of the include stack
/// and start lexing tokens from it instead of the current buffer.
void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
const DirectoryLookup *CurDir) {
// Add the current lexer to the include stack.
if (CurPPLexer || CurTokenLexer)
PushIncludeMacroStack();
CurLexer.reset(TheLexer);
CurPPLexer = TheLexer;
CurDirLookup = CurDir;
CurLexerSubmodule = nullptr;
if (CurLexerKind != CLK_LexAfterModuleImport)
CurLexerKind = CLK_Lexer;
// Notify the client, if desired, that we are in a new source file.
if (Callbacks && !CurLexer->Is_PragmaLexer) {
SrcMgr::CharacteristicKind FileType =
SourceMgr.getFileCharacteristic(CurLexer->getFileLoc());
Callbacks->FileChanged(CurLexer->getFileLoc(),
PPCallbacks::EnterFile, FileType);
}
}
/// EnterSourceFileWithPTH - Add a source file to the top of the include stack
/// and start getting tokens from it using the PTH cache.
void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL,
const DirectoryLookup *CurDir) {
if (CurPPLexer || CurTokenLexer)
PushIncludeMacroStack();
CurDirLookup = CurDir;
CurPTHLexer.reset(PL);
CurPPLexer = CurPTHLexer.get();
CurLexerSubmodule = nullptr;
if (CurLexerKind != CLK_LexAfterModuleImport)
CurLexerKind = CLK_PTHLexer;
// Notify the client, if desired, that we are in a new source file.
if (Callbacks) {
FileID FID = CurPPLexer->getFileID();
SourceLocation EnterLoc = SourceMgr.getLocForStartOfFile(FID);
SrcMgr::CharacteristicKind FileType =
SourceMgr.getFileCharacteristic(EnterLoc);
Callbacks->FileChanged(EnterLoc, PPCallbacks::EnterFile, FileType);
}
}
/// EnterMacro - Add a Macro to the top of the include stack and start lexing
/// tokens from it instead of the current buffer.
void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
MacroInfo *Macro, MacroArgs *Args) {
std::unique_ptr<TokenLexer> TokLexer;
if (NumCachedTokenLexers == 0) {
TokLexer = llvm::make_unique<TokenLexer>(Tok, ILEnd, Macro, Args, *this);
} else {
TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]);
TokLexer->Init(Tok, ILEnd, Macro, Args);
}
PushIncludeMacroStack();
CurDirLookup = nullptr;
CurTokenLexer = std::move(TokLexer);
if (CurLexerKind != CLK_LexAfterModuleImport)
CurLexerKind = CLK_TokenLexer;
}
/// EnterTokenStream - Add a "macro" context to the top of the include stack,
/// which will cause the lexer to start returning the specified tokens.
///
/// If DisableMacroExpansion is true, tokens lexed from the token stream will
/// not be subject to further macro expansion. Otherwise, these tokens will
/// be re-macro-expanded when/if expansion is enabled.
///
/// If OwnsTokens is false, this method assumes that the specified stream of
/// tokens has a permanent owner somewhere, so they do not need to be copied.
/// If it is true, it assumes the array of tokens is allocated with new[] and
/// must be freed.
///
void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
bool DisableMacroExpansion,
bool OwnsTokens) {
if (CurLexerKind == CLK_CachingLexer) {
if (CachedLexPos < CachedTokens.size()) {
// We're entering tokens into the middle of our cached token stream. We
// can't represent that, so just insert the tokens into the buffer.
CachedTokens.insert(CachedTokens.begin() + CachedLexPos,
Toks, Toks + NumToks);
if (OwnsTokens)
delete [] Toks;
return;
}
// New tokens are at the end of the cached token sequnece; insert the
// token stream underneath the caching lexer.
ExitCachingLexMode();
EnterTokenStream(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
EnterCachingLexMode();
return;
}
// Create a macro expander to expand from the specified token stream.
std::unique_ptr<TokenLexer> TokLexer;
if (NumCachedTokenLexers == 0) {
TokLexer = llvm::make_unique<TokenLexer>(
Toks, NumToks, DisableMacroExpansion, OwnsTokens, *this);
} else {
TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]);
TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
}
// Save our current state.
PushIncludeMacroStack();
CurDirLookup = nullptr;
CurTokenLexer = std::move(TokLexer);
if (CurLexerKind != CLK_LexAfterModuleImport)
CurLexerKind = CLK_TokenLexer;
}
/// \brief Compute the relative path that names the given file relative to
/// the given directory.
static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir,
const FileEntry *File,
SmallString<128> &Result) {
Result.clear();
StringRef FilePath = File->getDir()->getName();
StringRef Path = FilePath;
while (!Path.empty()) {
if (const DirectoryEntry *CurDir = FM.getDirectory(Path)) {
if (CurDir == Dir) {
Result = FilePath.substr(Path.size());
llvm::sys::path::append(Result,
llvm::sys::path::filename(File->getName()));
return;
}
}
Path = llvm::sys::path::parent_path(Path);
}
Result = File->getName();
}
void Preprocessor::PropagateLineStartLeadingSpaceInfo(Token &Result) {
if (CurTokenLexer) {
CurTokenLexer->PropagateLineStartLeadingSpaceInfo(Result);
return;
}
if (CurLexer) {
CurLexer->PropagateLineStartLeadingSpaceInfo(Result);
return;
}
// FIXME: Handle other kinds of lexers? It generally shouldn't matter,
// but it might if they're empty?
}
/// \brief Determine the location to use as the end of the buffer for a lexer.
///
/// If the file ends with a newline, form the EOF token on the newline itself,
/// rather than "on the line following it", which doesn't exist. This makes
/// diagnostics relating to the end of file include the last file that the user
/// actually typed, which is goodness.
const char *Preprocessor::getCurLexerEndPos() {
const char *EndPos = CurLexer->BufferEnd;
if (EndPos != CurLexer->BufferStart &&
(EndPos[-1] == '\n' || EndPos[-1] == '\r')) {
--EndPos;
// Handle \n\r and \r\n:
if (EndPos != CurLexer->BufferStart &&
(EndPos[-1] == '\n' || EndPos[-1] == '\r') &&
EndPos[-1] != EndPos[0])
--EndPos;
}
return EndPos;
}
static void collectAllSubModulesWithUmbrellaHeader(
const Module &Mod, SmallVectorImpl<const Module *> &SubMods) {
if (Mod.getUmbrellaHeader())
SubMods.push_back(&Mod);
for (auto *M : Mod.submodules())
collectAllSubModulesWithUmbrellaHeader(*M, SubMods);
}
void Preprocessor::diagnoseMissingHeaderInUmbrellaDir(const Module &Mod) {
assert(Mod.getUmbrellaHeader() && "Module must use umbrella header");
SourceLocation StartLoc =
SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
if (getDiagnostics().isIgnored(diag::warn_uncovered_module_header, StartLoc))
return;
ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap();
const DirectoryEntry *Dir = Mod.getUmbrellaDir().Entry;
vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
std::error_code EC;
for (vfs::recursive_directory_iterator Entry(FS, Dir->getName(), EC), End;
Entry != End && !EC; Entry.increment(EC)) {
using llvm::StringSwitch;
// Check whether this entry has an extension typically associated with
// headers.
if (!StringSwitch<bool>(llvm::sys::path::extension(Entry->getName()))
.Cases(".h", ".H", ".hh", ".hpp", true)
.Default(false))
continue;
if (const FileEntry *Header = getFileManager().getFile(Entry->getName()))
if (!getSourceManager().hasFileInfo(Header)) {
if (!ModMap.isHeaderInUnavailableModule(Header)) {
// Find the relative path that would access this header.
SmallString<128> RelativePath;
computeRelativePath(FileMgr, Dir, Header, RelativePath);
Diag(StartLoc, diag::warn_uncovered_module_header)
<< Mod.getFullModuleName() << RelativePath;
}
}
}
}
/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
/// the current file. This either returns the EOF token or pops a level off
/// the include stack and keeps going.
bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
assert(!CurTokenLexer &&
"Ending a file when currently in a macro!");
// If we have an unclosed module region from a pragma at the end of a
// module, complain and close it now.
// FIXME: This is not correct if we are building a module from PTH.
const bool LeavingSubmodule = CurLexer && CurLexerSubmodule;
if ((LeavingSubmodule || IncludeMacroStack.empty()) &&
!BuildingSubmoduleStack.empty() &&
BuildingSubmoduleStack.back().IsPragma) {
Diag(BuildingSubmoduleStack.back().ImportLoc,
diag::err_pp_module_begin_without_module_end);
Module *M = LeaveSubmodule(/*ForPragma*/true);
Result.startToken();
const char *EndPos = getCurLexerEndPos();
CurLexer->BufferPtr = EndPos;
CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end);
Result.setAnnotationEndLoc(Result.getLocation());
Result.setAnnotationValue(M);
return true;
}
// See if this file had a controlling macro.
if (CurPPLexer) { // Not ending a macro, ignore it.
if (const IdentifierInfo *ControllingMacro =
CurPPLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
// Okay, this has a controlling macro, remember in HeaderFileInfo.
if (const FileEntry *FE = CurPPLexer->getFileEntry()) {
HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
if (MacroInfo *MI =
getMacroInfo(const_cast<IdentifierInfo*>(ControllingMacro)))
MI->setUsedForHeaderGuard(true);
if (const IdentifierInfo *DefinedMacro =
CurPPLexer->MIOpt.GetDefinedMacro()) {
if (!isMacroDefined(ControllingMacro) &&
DefinedMacro != ControllingMacro &&
HeaderInfo.FirstTimeLexingFile(FE)) {
// If the edit distance between the two macros is more than 50%,
// DefinedMacro may not be header guard, or can be header guard of
// another header file. Therefore, it maybe defining something
// completely different. This can be observed in the wild when
// handling feature macros or header guards in different files.
const StringRef ControllingMacroName = ControllingMacro->getName();
const StringRef DefinedMacroName = DefinedMacro->getName();
const size_t MaxHalfLength = std::max(ControllingMacroName.size(),
DefinedMacroName.size()) / 2;
const unsigned ED = ControllingMacroName.edit_distance(
DefinedMacroName, true, MaxHalfLength);
if (ED <= MaxHalfLength) {
// Emit a warning for a bad header guard.
Diag(CurPPLexer->MIOpt.GetMacroLocation(),
diag::warn_header_guard)
<< CurPPLexer->MIOpt.GetMacroLocation() << ControllingMacro;
Diag(CurPPLexer->MIOpt.GetDefinedLocation(),
diag::note_header_guard)
<< CurPPLexer->MIOpt.GetDefinedLocation() << DefinedMacro
<< ControllingMacro
<< FixItHint::CreateReplacement(
CurPPLexer->MIOpt.GetDefinedLocation(),
ControllingMacro->getName());
}
}
}
}
}
}
// Complain about reaching a true EOF within arc_cf_code_audited.
// We don't want to complain about reaching the end of a macro
// instantiation or a _Pragma.
if (PragmaARCCFCodeAuditedLoc.isValid() &&
!isEndOfMacro && !(CurLexer && CurLexer->Is_PragmaLexer)) {
Diag(PragmaARCCFCodeAuditedLoc, diag::err_pp_eof_in_arc_cf_code_audited);
// Recover by leaving immediately.
PragmaARCCFCodeAuditedLoc = SourceLocation();
}
// Complain about reaching a true EOF within assume_nonnull.
// We don't want to complain about reaching the end of a macro
// instantiation or a _Pragma.
if (PragmaAssumeNonNullLoc.isValid() &&
!isEndOfMacro && !(CurLexer && CurLexer->Is_PragmaLexer)) {
Diag(PragmaAssumeNonNullLoc, diag::err_pp_eof_in_assume_nonnull);
// Recover by leaving immediately.
PragmaAssumeNonNullLoc = SourceLocation();
}
// If this is a #include'd file, pop it off the include stack and continue
// lexing the #includer file.
if (!IncludeMacroStack.empty()) {
// If we lexed the code-completion file, act as if we reached EOF.
if (isCodeCompletionEnabled() && CurPPLexer &&
SourceMgr.getLocForStartOfFile(CurPPLexer->getFileID()) ==
CodeCompletionFileLoc) {
if (CurLexer) {
Result.startToken();
CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
CurLexer.reset();
} else {
assert(CurPTHLexer && "Got EOF but no current lexer set!");
CurPTHLexer->getEOF(Result);
CurPTHLexer.reset();
}
CurPPLexer = nullptr;
return true;
}
if (!isEndOfMacro && CurPPLexer &&
SourceMgr.getIncludeLoc(CurPPLexer->getFileID()).isValid()) {
// Notify SourceManager to record the number of FileIDs that were created
// during lexing of the #include'd file.
unsigned NumFIDs =
SourceMgr.local_sloc_entry_size() -
CurPPLexer->getInitialNumSLocEntries() + 1/*#include'd file*/;
SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs);
}
+ bool ExitedFromPredefinesFile = false;
FileID ExitedFID;
- if (Callbacks && !isEndOfMacro && CurPPLexer)
+ if (!isEndOfMacro && CurPPLexer) {
ExitedFID = CurPPLexer->getFileID();
+ assert(PredefinesFileID.isValid() &&
+ "HandleEndOfFile is called before PredefinesFileId is set");
+ ExitedFromPredefinesFile = (PredefinesFileID == ExitedFID);
+ }
+
if (LeavingSubmodule) {
// We're done with this submodule.
Module *M = LeaveSubmodule(/*ForPragma*/false);
// Notify the parser that we've left the module.
const char *EndPos = getCurLexerEndPos();
Result.startToken();
CurLexer->BufferPtr = EndPos;
CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end);
Result.setAnnotationEndLoc(Result.getLocation());
Result.setAnnotationValue(M);
}
// We're done with the #included file.
RemoveTopOfLexerStack();
// Propagate info about start-of-line/leading white-space/etc.
PropagateLineStartLeadingSpaceInfo(Result);
// Notify the client, if desired, that we are in a new source file.
if (Callbacks && !isEndOfMacro && CurPPLexer) {
SrcMgr::CharacteristicKind FileType =
SourceMgr.getFileCharacteristic(CurPPLexer->getSourceLocation());
Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
PPCallbacks::ExitFile, FileType, ExitedFID);
}
+
+ // Restore conditional stack from the preamble right after exiting from the
+ // predefines file.
+ if (ExitedFromPredefinesFile)
+ replayPreambleConditionalStack();
// Client should lex another token unless we generated an EOM.
return LeavingSubmodule;
}
// If this is the end of the main file, form an EOF token.
if (CurLexer) {
const char *EndPos = getCurLexerEndPos();
Result.startToken();
CurLexer->BufferPtr = EndPos;
CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
if (isCodeCompletionEnabled()) {
// Inserting the code-completion point increases the source buffer by 1,
// but the main FileID was created before inserting the point.
// Compensate by reducing the EOF location by 1, otherwise the location
// will point to the next FileID.
// FIXME: This is hacky, the code-completion point should probably be
// inserted before the main FileID is created.
if (CurLexer->getFileLoc() == CodeCompletionFileLoc)
Result.setLocation(Result.getLocation().getLocWithOffset(-1));
}
if (!isIncrementalProcessingEnabled())
// We're done with lexing.
CurLexer.reset();
} else {
assert(CurPTHLexer && "Got EOF but no current lexer set!");
CurPTHLexer->getEOF(Result);
CurPTHLexer.reset();
}
if (!isIncrementalProcessingEnabled())
CurPPLexer = nullptr;
if (TUKind == TU_Complete) {
// This is the end of the top-level file. 'WarnUnusedMacroLocs' has
// collected all macro locations that we need to warn because they are not
// used.
for (WarnUnusedMacroLocsTy::iterator
I=WarnUnusedMacroLocs.begin(), E=WarnUnusedMacroLocs.end();
I!=E; ++I)
Diag(*I, diag::pp_macro_not_used);
}
// If we are building a module that has an umbrella header, make sure that
// each of the headers within the directory, including all submodules, is
// covered by the umbrella header was actually included by the umbrella
// header.
if (Module *Mod = getCurrentModule()) {
llvm::SmallVector<const Module *, 4> AllMods;
collectAllSubModulesWithUmbrellaHeader(*Mod, AllMods);
for (auto *M : AllMods)
diagnoseMissingHeaderInUmbrellaDir(*M);
}
return true;
}
/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer
/// hits the end of its token stream.
bool Preprocessor::HandleEndOfTokenLexer(Token &Result) {
assert(CurTokenLexer && !CurPPLexer &&
"Ending a macro when currently in a #include file!");
if (!MacroExpandingLexersStack.empty() &&
MacroExpandingLexersStack.back().first == CurTokenLexer.get())
removeCachedMacroExpandedTokensOfLastLexer();
// Delete or cache the now-dead macro expander.
if (NumCachedTokenLexers == TokenLexerCacheSize)
CurTokenLexer.reset();
else
TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer);
// Handle this like a #include file being popped off the stack.
return HandleEndOfFile(Result, true);
}
/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
/// lexer stack. This should only be used in situations where the current
/// state of the top-of-stack lexer is unknown.
void Preprocessor::RemoveTopOfLexerStack() {
assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load");
if (CurTokenLexer) {
// Delete or cache the now-dead macro expander.
if (NumCachedTokenLexers == TokenLexerCacheSize)
CurTokenLexer.reset();
else
TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer);
}
PopIncludeMacroStack();
}
/// HandleMicrosoftCommentPaste - When the macro expander pastes together a
/// comment (/##/) in microsoft mode, this method handles updating the current
/// state, returning the token on the next source line.
void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
assert(CurTokenLexer && !CurPPLexer &&
"Pasted comment can only be formed from macro");
// We handle this by scanning for the closest real lexer, switching it to
// raw mode and preprocessor mode. This will cause it to return \n as an
// explicit EOD token.
PreprocessorLexer *FoundLexer = nullptr;
bool LexerWasInPPMode = false;
for (const IncludeStackInfo &ISI : llvm::reverse(IncludeMacroStack)) {
if (ISI.ThePPLexer == nullptr) continue; // Scan for a real lexer.
// Once we find a real lexer, mark it as raw mode (disabling macro
// expansions) and preprocessor mode (return EOD). We know that the lexer
// was *not* in raw mode before, because the macro that the comment came
// from was expanded. However, it could have already been in preprocessor
// mode (#if COMMENT) in which case we have to return it to that mode and
// return EOD.
FoundLexer = ISI.ThePPLexer;
FoundLexer->LexingRawMode = true;
LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective;
FoundLexer->ParsingPreprocessorDirective = true;
break;
}
// Okay, we either found and switched over the lexer, or we didn't find a
// lexer. In either case, finish off the macro the comment came from, getting
// the next token.
if (!HandleEndOfTokenLexer(Tok)) Lex(Tok);
// Discarding comments as long as we don't have EOF or EOD. This 'comments
// out' the rest of the line, including any tokens that came from other macros
// that were active, as in:
// #define submacro a COMMENT b
// submacro c
// which should lex to 'a' only: 'b' and 'c' should be removed.
while (Tok.isNot(tok::eod) && Tok.isNot(tok::eof))
Lex(Tok);
// If we got an eod token, then we successfully found the end of the line.
if (Tok.is(tok::eod)) {
assert(FoundLexer && "Can't get end of line without an active lexer");
// Restore the lexer back to normal mode instead of raw mode.
FoundLexer->LexingRawMode = false;
// If the lexer was already in preprocessor mode, just return the EOD token
// to finish the preprocessor line.
if (LexerWasInPPMode) return;
// Otherwise, switch out of PP mode and return the next lexed token.
FoundLexer->ParsingPreprocessorDirective = false;
return Lex(Tok);
}
// If we got an EOF token, then we reached the end of the token stream but
// didn't find an explicit \n. This can only happen if there was no lexer
// active (an active lexer would return EOD at EOF if there was no \n in
// preprocessor directive mode), so just return EOF as our token.
assert(!FoundLexer && "Lexer should return EOD before EOF in PP mode");
}
void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc,
bool ForPragma) {
if (!getLangOpts().ModulesLocalVisibility) {
// Just track that we entered this submodule.
BuildingSubmoduleStack.push_back(
BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState,
PendingModuleMacroNames.size()));
return;
}
// Resolve as much of the module definition as we can now, before we enter
// one of its headers.
// FIXME: Can we enable Complain here?
// FIXME: Can we do this when local visibility is disabled?
ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap();
ModMap.resolveExports(M, /*Complain=*/false);
ModMap.resolveUses(M, /*Complain=*/false);
ModMap.resolveConflicts(M, /*Complain=*/false);
// If this is the first time we've entered this module, set up its state.
auto R = Submodules.insert(std::make_pair(M, SubmoduleState()));
auto &State = R.first->second;
bool FirstTime = R.second;
if (FirstTime) {
// Determine the set of starting macros for this submodule; take these
// from the "null" module (the predefines buffer).
//
// FIXME: If we have local visibility but not modules enabled, the
// NullSubmoduleState is polluted by #defines in the top-level source
// file.
auto &StartingMacros = NullSubmoduleState.Macros;
// Restore to the starting state.
// FIXME: Do this lazily, when each macro name is first referenced.
for (auto &Macro : StartingMacros) {
// Skip uninteresting macros.
if (!Macro.second.getLatest() &&
Macro.second.getOverriddenMacros().empty())
continue;
MacroState MS(Macro.second.getLatest());
MS.setOverriddenMacros(*this, Macro.second.getOverriddenMacros());
State.Macros.insert(std::make_pair(Macro.first, std::move(MS)));
}
}
// Track that we entered this module.
BuildingSubmoduleStack.push_back(
BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState,
PendingModuleMacroNames.size()));
// Switch to this submodule as the current submodule.
CurSubmoduleState = &State;
// This module is visible to itself.
if (FirstTime)
makeModuleVisible(M, ImportLoc);
}
bool Preprocessor::needModuleMacros() const {
// If we're not within a submodule, we never need to create ModuleMacros.
if (BuildingSubmoduleStack.empty())
return false;
// If we are tracking module macro visibility even for textually-included
// headers, we need ModuleMacros.
if (getLangOpts().ModulesLocalVisibility)
return true;
// Otherwise, we only need module macros if we're actually compiling a module
// interface.
return getLangOpts().isCompilingModule();
}
Module *Preprocessor::LeaveSubmodule(bool ForPragma) {
if (BuildingSubmoduleStack.empty() ||
BuildingSubmoduleStack.back().IsPragma != ForPragma) {
assert(ForPragma && "non-pragma module enter/leave mismatch");
return nullptr;
}
auto &Info = BuildingSubmoduleStack.back();
Module *LeavingMod = Info.M;
SourceLocation ImportLoc = Info.ImportLoc;
if (!needModuleMacros() ||
(!getLangOpts().ModulesLocalVisibility &&
LeavingMod->getTopLevelModuleName() != getLangOpts().CurrentModule)) {
// If we don't need module macros, or this is not a module for which we
// are tracking macro visibility, don't build any, and preserve the list
// of pending names for the surrounding submodule.
BuildingSubmoduleStack.pop_back();
makeModuleVisible(LeavingMod, ImportLoc);
return LeavingMod;
}
// Create ModuleMacros for any macros defined in this submodule.
llvm::SmallPtrSet<const IdentifierInfo*, 8> VisitedMacros;
for (unsigned I = Info.OuterPendingModuleMacroNames;
I != PendingModuleMacroNames.size(); ++I) {
auto *II = const_cast<IdentifierInfo*>(PendingModuleMacroNames[I]);
if (!VisitedMacros.insert(II).second)
continue;
auto MacroIt = CurSubmoduleState->Macros.find(II);
if (MacroIt == CurSubmoduleState->Macros.end())
continue;
auto &Macro = MacroIt->second;
// Find the starting point for the MacroDirective chain in this submodule.
MacroDirective *OldMD = nullptr;
auto *OldState = Info.OuterSubmoduleState;
if (getLangOpts().ModulesLocalVisibility)
OldState = &NullSubmoduleState;
if (OldState && OldState != CurSubmoduleState) {
// FIXME: It'd be better to start at the state from when we most recently
// entered this submodule, but it doesn't really matter.
auto &OldMacros = OldState->Macros;
auto OldMacroIt = OldMacros.find(II);
if (OldMacroIt == OldMacros.end())
OldMD = nullptr;
else
OldMD = OldMacroIt->second.getLatest();
}
// This module may have exported a new macro. If so, create a ModuleMacro
// representing that fact.
bool ExplicitlyPublic = false;
for (auto *MD = Macro.getLatest(); MD != OldMD; MD = MD->getPrevious()) {
assert(MD && "broken macro directive chain");
if (auto *VisMD = dyn_cast<VisibilityMacroDirective>(MD)) {
// The latest visibility directive for a name in a submodule affects
// all the directives that come before it.
if (VisMD->isPublic())
ExplicitlyPublic = true;
else if (!ExplicitlyPublic)
// Private with no following public directive: not exported.
break;
} else {
MacroInfo *Def = nullptr;
if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD))
Def = DefMD->getInfo();
// FIXME: Issue a warning if multiple headers for the same submodule
// define a macro, rather than silently ignoring all but the first.
bool IsNew;
// Don't bother creating a module macro if it would represent a #undef
// that doesn't override anything.
if (Def || !Macro.getOverriddenMacros().empty())
addModuleMacro(LeavingMod, II, Def,
Macro.getOverriddenMacros(), IsNew);
if (!getLangOpts().ModulesLocalVisibility) {
// This macro is exposed to the rest of this compilation as a
// ModuleMacro; we don't need to track its MacroDirective any more.
Macro.setLatest(nullptr);
Macro.setOverriddenMacros(*this, {});
}
break;
}
}
}
PendingModuleMacroNames.resize(Info.OuterPendingModuleMacroNames);
// FIXME: Before we leave this submodule, we should parse all the other
// headers within it. Otherwise, we're left with an inconsistent state
// where we've made the module visible but don't yet have its complete
// contents.
// Put back the outer module's state, if we're tracking it.
if (getLangOpts().ModulesLocalVisibility)
CurSubmoduleState = Info.OuterSubmoduleState;
BuildingSubmoduleStack.pop_back();
// A nested #include makes the included submodule visible.
makeModuleVisible(LeavingMod, ImportLoc);
return LeavingMod;
}
Index: head/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp (revision 322855)
@@ -1,957 +1,959 @@
//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Preprocessor interface.
//
//===----------------------------------------------------------------------===//
//
// Options to support:
// -H - Print the name of each header file used.
// -d[DNI] - Dump various things.
// -fworking-directory - #line's with preprocessor's working dir.
// -fpreprocessed
// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
// -W*
// -w
//
// Messages to emit:
// "Multiple include guards may be useful for:\n"
//
//===----------------------------------------------------------------------===//
#include "clang/Lex/Preprocessor.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemStatCache.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Lex/ExternalPreprocessorSource.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/MacroArgs.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/ModuleLoader.h"
#include "clang/Lex/PTHManager.h"
#include "clang/Lex/Pragma.h"
#include "clang/Lex/PreprocessingRecord.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Lex/ScratchBuffer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Capacity.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <memory>
#include <string>
#include <utility>
#include <vector>
using namespace clang;
LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
//===----------------------------------------------------------------------===//
ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
DiagnosticsEngine &diags, LangOptions &opts,
SourceManager &SM, MemoryBufferCache &PCMCache,
HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
IdentifierInfoLookup *IILookup, bool OwnsHeaders,
TranslationUnitKind TUKind)
: PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)),
HeaderInfo(Headers), TheModuleLoader(TheModuleLoader),
ExternalSource(nullptr), Identifiers(opts, IILookup),
PragmaHandlers(new PragmaNamespace(StringRef())),
IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
CodeCompletionFile(nullptr), CodeCompletionOffset(0),
LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
CodeCompletionReached(false), CodeCompletionII(nullptr),
MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
CurDirLookup(nullptr), CurLexerKind(CLK_Lexer),
CurLexerSubmodule(nullptr), Callbacks(nullptr),
CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
Record(nullptr), MIChainHead(nullptr) {
OwnsHeaderSearch = OwnsHeaders;
CounterValue = 0; // __COUNTER__ starts at 0.
// Clear stats.
NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
NumIf = NumElse = NumEndif = 0;
NumEnteredSourceFiles = 0;
NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
MaxIncludeStackDepth = 0;
NumSkipped = 0;
// Default to discarding comments.
KeepComments = false;
KeepMacroComments = false;
SuppressIncludeNotFoundError = false;
// Macro expansion is enabled.
DisableMacroExpansion = false;
MacroExpansionInDirectivesOverride = false;
InMacroArgs = false;
InMacroArgPreExpansion = false;
NumCachedTokenLexers = 0;
PragmasEnabled = true;
ParsingIfOrElifDirective = false;
PreprocessedOutput = false;
CachedLexPos = 0;
// We haven't read anything from the external source.
ReadMacrosFromExternalSource = false;
// "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
// This gets unpoisoned where it is allowed.
(Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
// Initialize the pragma handlers.
RegisterBuiltinPragmas();
// Initialize builtin macros like __LINE__ and friends.
RegisterBuiltinMacros();
if(LangOpts.Borland) {
Ident__exception_info = getIdentifierInfo("_exception_info");
Ident___exception_info = getIdentifierInfo("__exception_info");
Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
Ident__exception_code = getIdentifierInfo("_exception_code");
Ident___exception_code = getIdentifierInfo("__exception_code");
Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
} else {
Ident__exception_info = Ident__exception_code = nullptr;
Ident__abnormal_termination = Ident___exception_info = nullptr;
Ident___exception_code = Ident___abnormal_termination = nullptr;
Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
Ident_AbnormalTermination = nullptr;
}
if (this->PPOpts->GeneratePreamble)
PreambleConditionalStack.startRecording();
}
Preprocessor::~Preprocessor() {
assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
IncludeMacroStack.clear();
// Destroy any macro definitions.
while (MacroInfoChain *I = MIChainHead) {
MIChainHead = I->Next;
I->~MacroInfoChain();
}
// Free any cached macro expanders.
// This populates MacroArgCache, so all TokenLexers need to be destroyed
// before the code below that frees up the MacroArgCache list.
std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
CurTokenLexer.reset();
// Free any cached MacroArgs.
for (MacroArgs *ArgList = MacroArgCache; ArgList;)
ArgList = ArgList->deallocate();
// Delete the header search info, if we own it.
if (OwnsHeaderSearch)
delete &HeaderInfo;
}
void Preprocessor::Initialize(const TargetInfo &Target,
const TargetInfo *AuxTarget) {
assert((!this->Target || this->Target == &Target) &&
"Invalid override of target information");
this->Target = &Target;
assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
"Invalid override of aux target information.");
this->AuxTarget = AuxTarget;
// Initialize information about built-ins.
BuiltinInfo.InitializeTarget(Target, AuxTarget);
HeaderInfo.setTarget(Target);
}
void Preprocessor::InitializeForModelFile() {
NumEnteredSourceFiles = 0;
// Reset pragmas
PragmaHandlersBackup = std::move(PragmaHandlers);
PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
RegisterBuiltinPragmas();
// Reset PredefinesFileID
PredefinesFileID = FileID();
}
void Preprocessor::FinalizeForModelFile() {
NumEnteredSourceFiles = 1;
PragmaHandlers = std::move(PragmaHandlersBackup);
}
void Preprocessor::setPTHManager(PTHManager* pm) {
PTH.reset(pm);
FileMgr.addStatCache(PTH->createStatCache());
}
void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
<< getSpelling(Tok) << "'";
if (!DumpFlags) return;
llvm::errs() << "\t";
if (Tok.isAtStartOfLine())
llvm::errs() << " [StartOfLine]";
if (Tok.hasLeadingSpace())
llvm::errs() << " [LeadingSpace]";
if (Tok.isExpandDisabled())
llvm::errs() << " [ExpandDisabled]";
if (Tok.needsCleaning()) {
const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
<< "']";
}
llvm::errs() << "\tLoc=<";
DumpLocation(Tok.getLocation());
llvm::errs() << ">";
}
void Preprocessor::DumpLocation(SourceLocation Loc) const {
Loc.dump(SourceMgr);
}
void Preprocessor::DumpMacro(const MacroInfo &MI) const {
llvm::errs() << "MACRO: ";
for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
DumpToken(MI.getReplacementToken(i));
llvm::errs() << " ";
}
llvm::errs() << "\n";
}
void Preprocessor::PrintStats() {
llvm::errs() << "\n*** Preprocessor Stats:\n";
llvm::errs() << NumDirectives << " directives found:\n";
llvm::errs() << " " << NumDefined << " #define.\n";
llvm::errs() << " " << NumUndefined << " #undef.\n";
llvm::errs() << " #include/#include_next/#import:\n";
llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
llvm::errs() << " " << NumElse << " #else/#elif.\n";
llvm::errs() << " " << NumEndif << " #endif.\n";
llvm::errs() << " " << NumPragma << " #pragma.\n";
llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
<< NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
<< NumFastMacroExpanded << " on the fast path.\n";
llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
<< " token paste (##) operations performed, "
<< NumFastTokenPaste << " on the fast path.\n";
llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
llvm::errs() << "\n Macro Expanded Tokens: "
<< llvm::capacity_in_bytes(MacroExpandedTokens);
llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
// FIXME: List information for all submodules.
llvm::errs() << "\n Macros: "
<< llvm::capacity_in_bytes(CurSubmoduleState->Macros);
llvm::errs() << "\n #pragma push_macro Info: "
<< llvm::capacity_in_bytes(PragmaPushMacroInfo);
llvm::errs() << "\n Poison Reasons: "
<< llvm::capacity_in_bytes(PoisonReasons);
llvm::errs() << "\n Comment Handlers: "
<< llvm::capacity_in_bytes(CommentHandlers) << "\n";
}
Preprocessor::macro_iterator
Preprocessor::macro_begin(bool IncludeExternalMacros) const {
if (IncludeExternalMacros && ExternalSource &&
!ReadMacrosFromExternalSource) {
ReadMacrosFromExternalSource = true;
ExternalSource->ReadDefinedMacros();
}
// Make sure we cover all macros in visible modules.
for (const ModuleMacro &Macro : ModuleMacros)
CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
return CurSubmoduleState->Macros.begin();
}
size_t Preprocessor::getTotalMemory() const {
return BP.getTotalMemory()
+ llvm::capacity_in_bytes(MacroExpandedTokens)
+ Predefines.capacity() /* Predefines buffer. */
// FIXME: Include sizes from all submodules, and include MacroInfo sizes,
// and ModuleMacros.
+ llvm::capacity_in_bytes(CurSubmoduleState->Macros)
+ llvm::capacity_in_bytes(PragmaPushMacroInfo)
+ llvm::capacity_in_bytes(PoisonReasons)
+ llvm::capacity_in_bytes(CommentHandlers);
}
Preprocessor::macro_iterator
Preprocessor::macro_end(bool IncludeExternalMacros) const {
if (IncludeExternalMacros && ExternalSource &&
!ReadMacrosFromExternalSource) {
ReadMacrosFromExternalSource = true;
ExternalSource->ReadDefinedMacros();
}
return CurSubmoduleState->Macros.end();
}
/// \brief Compares macro tokens with a specified token value sequence.
static bool MacroDefinitionEquals(const MacroInfo *MI,
ArrayRef<TokenValue> Tokens) {
return Tokens.size() == MI->getNumTokens() &&
std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
}
StringRef Preprocessor::getLastMacroWithSpelling(
SourceLocation Loc,
ArrayRef<TokenValue> Tokens) const {
SourceLocation BestLocation;
StringRef BestSpelling;
for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
I != E; ++I) {
const MacroDirective::DefInfo
Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
if (!Def || !Def.getMacroInfo())
continue;
if (!Def.getMacroInfo()->isObjectLike())
continue;
if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
continue;
SourceLocation Location = Def.getLocation();
// Choose the macro defined latest.
if (BestLocation.isInvalid() ||
(Location.isValid() &&
SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
BestLocation = Location;
BestSpelling = I->first->getName();
}
}
return BestSpelling;
}
void Preprocessor::recomputeCurLexerKind() {
if (CurLexer)
CurLexerKind = CLK_Lexer;
else if (CurPTHLexer)
CurLexerKind = CLK_PTHLexer;
else if (CurTokenLexer)
CurLexerKind = CLK_TokenLexer;
else
CurLexerKind = CLK_CachingLexer;
}
bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
unsigned CompleteLine,
unsigned CompleteColumn) {
assert(File);
assert(CompleteLine && CompleteColumn && "Starts from 1:1");
assert(!CodeCompletionFile && "Already set");
using llvm::MemoryBuffer;
// Load the actual file's contents.
bool Invalid = false;
const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
if (Invalid)
return true;
// Find the byte position of the truncation point.
const char *Position = Buffer->getBufferStart();
for (unsigned Line = 1; Line < CompleteLine; ++Line) {
for (; *Position; ++Position) {
if (*Position != '\r' && *Position != '\n')
continue;
// Eat \r\n or \n\r as a single line.
if ((Position[1] == '\r' || Position[1] == '\n') &&
Position[0] != Position[1])
++Position;
++Position;
break;
}
}
Position += CompleteColumn - 1;
// If pointing inside the preamble, adjust the position at the beginning of
// the file after the preamble.
if (SkipMainFilePreamble.first &&
SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
}
if (Position > Buffer->getBufferEnd())
Position = Buffer->getBufferEnd();
CodeCompletionFile = File;
CodeCompletionOffset = Position - Buffer->getBufferStart();
std::unique_ptr<MemoryBuffer> NewBuffer =
MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
Buffer->getBufferIdentifier());
char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
*NewPos = '\0';
std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
SourceMgr.overrideFileContents(File, std::move(NewBuffer));
return false;
}
void Preprocessor::CodeCompleteNaturalLanguage() {
if (CodeComplete)
CodeComplete->CodeCompleteNaturalLanguage();
setCodeCompletionReached();
}
/// getSpelling - This method is used to get the spelling of a token into a
/// SmallVector. Note that the returned StringRef may not point to the
/// supplied buffer if a copy can be avoided.
StringRef Preprocessor::getSpelling(const Token &Tok,
SmallVectorImpl<char> &Buffer,
bool *Invalid) const {
// NOTE: this has to be checked *before* testing for an IdentifierInfo.
if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
// Try the fast path.
if (const IdentifierInfo *II = Tok.getIdentifierInfo())
return II->getName();
}
// Resize the buffer if we need to copy into it.
if (Tok.needsCleaning())
Buffer.resize(Tok.getLength());
const char *Ptr = Buffer.data();
unsigned Len = getSpelling(Tok, Ptr, Invalid);
return StringRef(Ptr, Len);
}
/// CreateString - Plop the specified string into a scratch buffer and return a
/// location for it. If specified, the source location provides a source
/// location for the token.
void Preprocessor::CreateString(StringRef Str, Token &Tok,
SourceLocation ExpansionLocStart,
SourceLocation ExpansionLocEnd) {
Tok.setLength(Str.size());
const char *DestPtr;
SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
if (ExpansionLocStart.isValid())
Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
ExpansionLocEnd, Str.size());
Tok.setLocation(Loc);
// If this is a raw identifier or a literal token, set the pointer data.
if (Tok.is(tok::raw_identifier))
Tok.setRawIdentifierData(DestPtr);
else if (Tok.isLiteral())
Tok.setLiteralData(DestPtr);
}
Module *Preprocessor::getCurrentModule() {
if (!getLangOpts().isCompilingModule())
return nullptr;
return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
}
//===----------------------------------------------------------------------===//
// Preprocessor Initialization Methods
//===----------------------------------------------------------------------===//
/// EnterMainSourceFile - Enter the specified FileID as the main source file,
/// which implicitly adds the builtin defines etc.
void Preprocessor::EnterMainSourceFile() {
// We do not allow the preprocessor to reenter the main file. Doing so will
// cause FileID's to accumulate information from both runs (e.g. #line
// information) and predefined macros aren't guaranteed to be set properly.
assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
FileID MainFileID = SourceMgr.getMainFileID();
// If MainFileID is loaded it means we loaded an AST file, no need to enter
// a main file.
if (!SourceMgr.isLoadedFileID(MainFileID)) {
// Enter the main file source buffer.
EnterSourceFile(MainFileID, nullptr, SourceLocation());
// If we've been asked to skip bytes in the main file (e.g., as part of a
// precompiled preamble), do so now.
if (SkipMainFilePreamble.first > 0)
CurLexer->SkipBytes(SkipMainFilePreamble.first,
SkipMainFilePreamble.second);
// Tell the header info that the main file was entered. If the file is later
// #imported, it won't be re-entered.
if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
HeaderInfo.IncrementIncludeCount(FE);
}
// Preprocess Predefines to populate the initial preprocessor state.
std::unique_ptr<llvm::MemoryBuffer> SB =
llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
assert(SB && "Cannot create predefined source buffer");
FileID FID = SourceMgr.createFileID(std::move(SB));
assert(FID.isValid() && "Could not create FileID for predefines?");
setPredefinesFileID(FID);
// Start parsing the predefines.
EnterSourceFile(FID, nullptr, SourceLocation());
}
void Preprocessor::replayPreambleConditionalStack() {
// Restore the conditional stack from the preamble, if there is one.
if (PreambleConditionalStack.isReplaying()) {
+ assert(CurPPLexer &&
+ "CurPPLexer is null when calling replayPreambleConditionalStack.");
CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
PreambleConditionalStack.doneReplaying();
}
}
void Preprocessor::EndSourceFile() {
// Notify the client that we reached the end of the source file.
if (Callbacks)
Callbacks->EndOfMainFile();
}
//===----------------------------------------------------------------------===//
// Lexer Event Handling.
//===----------------------------------------------------------------------===//
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
/// identifier information for the token and install it into the token,
/// updating the token kind accordingly.
IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
// Look up this token, see if it is a macro, or if it is a language keyword.
IdentifierInfo *II;
if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
// No cleaning needed, just use the characters from the lexed buffer.
II = getIdentifierInfo(Identifier.getRawIdentifier());
} else {
// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
SmallString<64> IdentifierBuffer;
StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
if (Identifier.hasUCN()) {
SmallString<64> UCNIdentifierBuffer;
expandUCNs(UCNIdentifierBuffer, CleanedStr);
II = getIdentifierInfo(UCNIdentifierBuffer);
} else {
II = getIdentifierInfo(CleanedStr);
}
}
// Update the token info (identifier info and appropriate token kind).
Identifier.setIdentifierInfo(II);
if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
getSourceManager().isInSystemHeader(Identifier.getLocation()))
Identifier.setKind(clang::tok::identifier);
else
Identifier.setKind(II->getTokenID());
return II;
}
void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
PoisonReasons[II] = DiagID;
}
void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
assert(Ident__exception_code && Ident__exception_info);
assert(Ident___exception_code && Ident___exception_info);
Ident__exception_code->setIsPoisoned(Poison);
Ident___exception_code->setIsPoisoned(Poison);
Ident_GetExceptionCode->setIsPoisoned(Poison);
Ident__exception_info->setIsPoisoned(Poison);
Ident___exception_info->setIsPoisoned(Poison);
Ident_GetExceptionInfo->setIsPoisoned(Poison);
Ident__abnormal_termination->setIsPoisoned(Poison);
Ident___abnormal_termination->setIsPoisoned(Poison);
Ident_AbnormalTermination->setIsPoisoned(Poison);
}
void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
assert(Identifier.getIdentifierInfo() &&
"Can't handle identifiers without identifier info!");
llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
PoisonReasons.find(Identifier.getIdentifierInfo());
if(it == PoisonReasons.end())
Diag(Identifier, diag::err_pp_used_poisoned_id);
else
Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
}
/// \brief Returns a diagnostic message kind for reporting a future keyword as
/// appropriate for the identifier and specified language.
static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
const LangOptions &LangOpts) {
assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
if (LangOpts.CPlusPlus)
return llvm::StringSwitch<diag::kind>(II.getName())
#define CXX11_KEYWORD(NAME, FLAGS) \
.Case(#NAME, diag::warn_cxx11_keyword)
#include "clang/Basic/TokenKinds.def"
;
llvm_unreachable(
"Keyword not known to come from a newer Standard or proposed Standard");
}
void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
assert(II.isOutOfDate() && "not out of date");
getExternalSource()->updateOutOfDateIdentifier(II);
}
/// HandleIdentifier - This callback is invoked when the lexer reads an
/// identifier. This callback looks up the identifier in the map and/or
/// potentially macro expands it or turns it into a named token (like 'for').
///
/// Note that callers of this method are guarded by checking the
/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
/// IdentifierInfo methods that compute these properties will need to change to
/// match.
bool Preprocessor::HandleIdentifier(Token &Identifier) {
assert(Identifier.getIdentifierInfo() &&
"Can't handle identifiers without identifier info!");
IdentifierInfo &II = *Identifier.getIdentifierInfo();
// If the information about this identifier is out of date, update it from
// the external source.
// We have to treat __VA_ARGS__ in a special way, since it gets
// serialized with isPoisoned = true, but our preprocessor may have
// unpoisoned it if we're defining a C99 macro.
if (II.isOutOfDate()) {
bool CurrentIsPoisoned = false;
if (&II == Ident__VA_ARGS__)
CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
updateOutOfDateIdentifier(II);
Identifier.setKind(II.getTokenID());
if (&II == Ident__VA_ARGS__)
II.setIsPoisoned(CurrentIsPoisoned);
}
// If this identifier was poisoned, and if it was not produced from a macro
// expansion, emit an error.
if (II.isPoisoned() && CurPPLexer) {
HandlePoisonedIdentifier(Identifier);
}
// If this is a macro to be expanded, do it.
if (MacroDefinition MD = getMacroDefinition(&II)) {
auto *MI = MD.getMacroInfo();
assert(MI && "macro definition with no macro info?");
if (!DisableMacroExpansion) {
if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
// C99 6.10.3p10: If the preprocessing token immediately after the
// macro name isn't a '(', this macro should not be expanded.
if (!MI->isFunctionLike() || isNextPPTokenLParen())
return HandleMacroExpandedIdentifier(Identifier, MD);
} else {
// C99 6.10.3.4p2 says that a disabled macro may never again be
// expanded, even if it's in a context where it could be expanded in the
// future.
Identifier.setFlag(Token::DisableExpand);
if (MI->isObjectLike() || isNextPPTokenLParen())
Diag(Identifier, diag::pp_disabled_macro_expansion);
}
}
}
// If this identifier is a keyword in a newer Standard or proposed Standard,
// produce a warning. Don't warn if we're not considering macro expansion,
// since this identifier might be the name of a macro.
// FIXME: This warning is disabled in cases where it shouldn't be, like
// "#define constexpr constexpr", "int constexpr;"
if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
<< II.getName();
// Don't diagnose this keyword again in this translation unit.
II.setIsFutureCompatKeyword(false);
}
// If this is an extension token, diagnose its use.
// We avoid diagnosing tokens that originate from macro definitions.
// FIXME: This warning is disabled in cases where it shouldn't be,
// like "#define TY typeof", "TY(1) x".
if (II.isExtensionToken() && !DisableMacroExpansion)
Diag(Identifier, diag::ext_token_used);
// If this is the 'import' contextual keyword following an '@', note
// that the next token indicates a module name.
//
// Note that we do not treat 'import' as a contextual
// keyword when we're in a caching lexer, because caching lexers only get
// used in contexts where import declarations are disallowed.
//
// Likewise if this is the C++ Modules TS import keyword.
if (((LastTokenWasAt && II.isModulesImport()) ||
Identifier.is(tok::kw_import)) &&
!InMacroArgs && !DisableMacroExpansion &&
(getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
CurLexerKind != CLK_CachingLexer) {
ModuleImportLoc = Identifier.getLocation();
ModuleImportPath.clear();
ModuleImportExpectsIdentifier = true;
CurLexerKind = CLK_LexAfterModuleImport;
}
return true;
}
void Preprocessor::Lex(Token &Result) {
// We loop here until a lex function returns a token; this avoids recursion.
bool ReturnedToken;
do {
switch (CurLexerKind) {
case CLK_Lexer:
ReturnedToken = CurLexer->Lex(Result);
break;
case CLK_PTHLexer:
ReturnedToken = CurPTHLexer->Lex(Result);
break;
case CLK_TokenLexer:
ReturnedToken = CurTokenLexer->Lex(Result);
break;
case CLK_CachingLexer:
CachingLex(Result);
ReturnedToken = true;
break;
case CLK_LexAfterModuleImport:
LexAfterModuleImport(Result);
ReturnedToken = true;
break;
}
} while (!ReturnedToken);
if (Result.is(tok::code_completion))
setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
LastTokenWasAt = Result.is(tok::at);
}
/// \brief Lex a token following the 'import' contextual keyword.
///
void Preprocessor::LexAfterModuleImport(Token &Result) {
// Figure out what kind of lexer we actually have.
recomputeCurLexerKind();
// Lex the next token.
Lex(Result);
// The token sequence
//
// import identifier (. identifier)*
//
// indicates a module import directive. We already saw the 'import'
// contextual keyword, so now we're looking for the identifiers.
if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
// We expected to see an identifier here, and we did; continue handling
// identifiers.
ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
Result.getLocation()));
ModuleImportExpectsIdentifier = false;
CurLexerKind = CLK_LexAfterModuleImport;
return;
}
// If we're expecting a '.' or a ';', and we got a '.', then wait until we
// see the next identifier. (We can also see a '[[' that begins an
// attribute-specifier-seq here under the C++ Modules TS.)
if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
ModuleImportExpectsIdentifier = true;
CurLexerKind = CLK_LexAfterModuleImport;
return;
}
// If we have a non-empty module path, load the named module.
if (!ModuleImportPath.empty()) {
// Under the Modules TS, the dot is just part of the module name, and not
// a real hierarachy separator. Flatten such module names now.
//
// FIXME: Is this the right level to be performing this transformation?
std::string FlatModuleName;
if (getLangOpts().ModulesTS) {
for (auto &Piece : ModuleImportPath) {
if (!FlatModuleName.empty())
FlatModuleName += ".";
FlatModuleName += Piece.first->getName();
}
SourceLocation FirstPathLoc = ModuleImportPath[0].second;
ModuleImportPath.clear();
ModuleImportPath.push_back(
std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
}
Module *Imported = nullptr;
if (getLangOpts().Modules) {
Imported = TheModuleLoader.loadModule(ModuleImportLoc,
ModuleImportPath,
Module::Hidden,
/*IsIncludeDirective=*/false);
if (Imported)
makeModuleVisible(Imported, ModuleImportLoc);
}
if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
}
}
void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
CurSubmoduleState->VisibleModules.setVisible(
M, Loc, [](Module *) {},
[&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
// FIXME: Include the path in the diagnostic.
// FIXME: Include the import location for the conflicting module.
Diag(ModuleImportLoc, diag::warn_module_conflict)
<< Path[0]->getFullModuleName()
<< Conflict->getFullModuleName()
<< Message;
});
// Add this module to the imports list of the currently-built submodule.
if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
BuildingSubmoduleStack.back().M->Imports.insert(M);
}
bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
const char *DiagnosticTag,
bool AllowMacroExpansion) {
// We need at least one string literal.
if (Result.isNot(tok::string_literal)) {
Diag(Result, diag::err_expected_string_literal)
<< /*Source='in...'*/0 << DiagnosticTag;
return false;
}
// Lex string literal tokens, optionally with macro expansion.
SmallVector<Token, 4> StrToks;
do {
StrToks.push_back(Result);
if (Result.hasUDSuffix())
Diag(Result, diag::err_invalid_string_udl);
if (AllowMacroExpansion)
Lex(Result);
else
LexUnexpandedToken(Result);
} while (Result.is(tok::string_literal));
// Concatenate and parse the strings.
StringLiteralParser Literal(StrToks, *this);
assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return false;
if (Literal.Pascal) {
Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
<< /*Source='in...'*/0 << DiagnosticTag;
return false;
}
String = Literal.GetString();
return true;
}
bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
assert(Tok.is(tok::numeric_constant));
SmallString<8> IntegerBuffer;
bool NumberInvalid = false;
StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
if (NumberInvalid)
return false;
NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
return false;
llvm::APInt APVal(64, 0);
if (Literal.GetIntegerValue(APVal))
return false;
Lex(Tok);
Value = APVal.getLimitedValue();
return true;
}
void Preprocessor::addCommentHandler(CommentHandler *Handler) {
assert(Handler && "NULL comment handler");
assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
CommentHandlers.end() && "Comment handler already registered");
CommentHandlers.push_back(Handler);
}
void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
std::vector<CommentHandler *>::iterator Pos
= std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
assert(Pos != CommentHandlers.end() && "Comment handler not registered");
CommentHandlers.erase(Pos);
}
bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
bool AnyPendingTokens = false;
for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
HEnd = CommentHandlers.end();
H != HEnd; ++H) {
if ((*H)->HandleComment(*this, Comment))
AnyPendingTokens = true;
}
if (!AnyPendingTokens || getCommentRetentionState())
return false;
Lex(result);
return true;
}
ModuleLoader::~ModuleLoader() { }
CommentHandler::~CommentHandler() { }
CodeCompletionHandler::~CodeCompletionHandler() { }
void Preprocessor::createPreprocessingRecord() {
if (Record)
return;
Record = new PreprocessingRecord(getSourceManager());
addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
}
Index: head/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Parse/Parser.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Parse/Parser.cpp (revision 322855)
@@ -1,2248 +1,2246 @@
//===--- Parser.cpp - C Language Family Parser ----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Parser interfaces.
//
//===----------------------------------------------------------------------===//
#include "clang/Parse/Parser.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/Parse/ParseDiagnostic.h"
#include "clang/Parse/RAIIObjectsForParser.h"
#include "clang/Sema/DeclSpec.h"
#include "clang/Sema/ParsedTemplate.h"
#include "clang/Sema/Scope.h"
using namespace clang;
namespace {
/// \brief A comment handler that passes comments found by the preprocessor
/// to the parser action.
class ActionCommentHandler : public CommentHandler {
Sema &S;
public:
explicit ActionCommentHandler(Sema &S) : S(S) { }
bool HandleComment(Preprocessor &PP, SourceRange Comment) override {
S.ActOnComment(Comment);
return false;
}
};
} // end anonymous namespace
IdentifierInfo *Parser::getSEHExceptKeyword() {
// __except is accepted as a (contextual) keyword
if (!Ident__except && (getLangOpts().MicrosoftExt || getLangOpts().Borland))
Ident__except = PP.getIdentifierInfo("__except");
return Ident__except;
}
Parser::Parser(Preprocessor &pp, Sema &actions, bool skipFunctionBodies)
: PP(pp), Actions(actions), Diags(PP.getDiagnostics()),
GreaterThanIsOperator(true), ColonIsSacred(false),
InMessageExpression(false), TemplateParameterDepth(0),
ParsingInObjCContainer(false) {
SkipFunctionBodies = pp.isCodeCompletionEnabled() || skipFunctionBodies;
Tok.startToken();
Tok.setKind(tok::eof);
Actions.CurScope = nullptr;
NumCachedScopes = 0;
CurParsedObjCImpl = nullptr;
// Add #pragma handlers. These are removed and destroyed in the
// destructor.
initializePragmaHandlers();
CommentSemaHandler.reset(new ActionCommentHandler(actions));
PP.addCommentHandler(CommentSemaHandler.get());
PP.setCodeCompletionHandler(*this);
}
DiagnosticBuilder Parser::Diag(SourceLocation Loc, unsigned DiagID) {
return Diags.Report(Loc, DiagID);
}
DiagnosticBuilder Parser::Diag(const Token &Tok, unsigned DiagID) {
return Diag(Tok.getLocation(), DiagID);
}
/// \brief Emits a diagnostic suggesting parentheses surrounding a
/// given range.
///
/// \param Loc The location where we'll emit the diagnostic.
/// \param DK The kind of diagnostic to emit.
/// \param ParenRange Source range enclosing code that should be parenthesized.
void Parser::SuggestParentheses(SourceLocation Loc, unsigned DK,
SourceRange ParenRange) {
SourceLocation EndLoc = PP.getLocForEndOfToken(ParenRange.getEnd());
if (!ParenRange.getEnd().isFileID() || EndLoc.isInvalid()) {
// We can't display the parentheses, so just dig the
// warning/error and return.
Diag(Loc, DK);
return;
}
Diag(Loc, DK)
<< FixItHint::CreateInsertion(ParenRange.getBegin(), "(")
<< FixItHint::CreateInsertion(EndLoc, ")");
}
static bool IsCommonTypo(tok::TokenKind ExpectedTok, const Token &Tok) {
switch (ExpectedTok) {
case tok::semi:
return Tok.is(tok::colon) || Tok.is(tok::comma); // : or , for ;
default: return false;
}
}
bool Parser::ExpectAndConsume(tok::TokenKind ExpectedTok, unsigned DiagID,
StringRef Msg) {
if (Tok.is(ExpectedTok) || Tok.is(tok::code_completion)) {
ConsumeAnyToken();
return false;
}
// Detect common single-character typos and resume.
if (IsCommonTypo(ExpectedTok, Tok)) {
SourceLocation Loc = Tok.getLocation();
{
DiagnosticBuilder DB = Diag(Loc, DiagID);
DB << FixItHint::CreateReplacement(
SourceRange(Loc), tok::getPunctuatorSpelling(ExpectedTok));
if (DiagID == diag::err_expected)
DB << ExpectedTok;
else if (DiagID == diag::err_expected_after)
DB << Msg << ExpectedTok;
else
DB << Msg;
}
// Pretend there wasn't a problem.
ConsumeAnyToken();
return false;
}
SourceLocation EndLoc = PP.getLocForEndOfToken(PrevTokLocation);
const char *Spelling = nullptr;
if (EndLoc.isValid())
Spelling = tok::getPunctuatorSpelling(ExpectedTok);
DiagnosticBuilder DB =
Spelling
? Diag(EndLoc, DiagID) << FixItHint::CreateInsertion(EndLoc, Spelling)
: Diag(Tok, DiagID);
if (DiagID == diag::err_expected)
DB << ExpectedTok;
else if (DiagID == diag::err_expected_after)
DB << Msg << ExpectedTok;
else
DB << Msg;
return true;
}
bool Parser::ExpectAndConsumeSemi(unsigned DiagID) {
if (TryConsumeToken(tok::semi))
return false;
if (Tok.is(tok::code_completion)) {
handleUnexpectedCodeCompletionToken();
return false;
}
if ((Tok.is(tok::r_paren) || Tok.is(tok::r_square)) &&
NextToken().is(tok::semi)) {
Diag(Tok, diag::err_extraneous_token_before_semi)
<< PP.getSpelling(Tok)
<< FixItHint::CreateRemoval(Tok.getLocation());
ConsumeAnyToken(); // The ')' or ']'.
ConsumeToken(); // The ';'.
return false;
}
return ExpectAndConsume(tok::semi, DiagID);
}
void Parser::ConsumeExtraSemi(ExtraSemiKind Kind, unsigned TST) {
if (!Tok.is(tok::semi)) return;
bool HadMultipleSemis = false;
SourceLocation StartLoc = Tok.getLocation();
SourceLocation EndLoc = Tok.getLocation();
ConsumeToken();
while ((Tok.is(tok::semi) && !Tok.isAtStartOfLine())) {
HadMultipleSemis = true;
EndLoc = Tok.getLocation();
ConsumeToken();
}
// C++11 allows extra semicolons at namespace scope, but not in any of the
// other contexts.
if (Kind == OutsideFunction && getLangOpts().CPlusPlus) {
if (getLangOpts().CPlusPlus11)
Diag(StartLoc, diag::warn_cxx98_compat_top_level_semi)
<< FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc));
else
Diag(StartLoc, diag::ext_extra_semi_cxx11)
<< FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc));
return;
}
if (Kind != AfterMemberFunctionDefinition || HadMultipleSemis)
Diag(StartLoc, diag::ext_extra_semi)
<< Kind << DeclSpec::getSpecifierName((DeclSpec::TST)TST,
Actions.getASTContext().getPrintingPolicy())
<< FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc));
else
// A single semicolon is valid after a member function definition.
Diag(StartLoc, diag::warn_extra_semi_after_mem_fn_def)
<< FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc));
}
bool Parser::expectIdentifier() {
if (Tok.is(tok::identifier))
return false;
if (const auto *II = Tok.getIdentifierInfo()) {
if (II->isCPlusPlusKeyword(getLangOpts())) {
Diag(Tok, diag::err_expected_token_instead_of_objcxx_keyword)
<< tok::identifier << Tok.getIdentifierInfo();
// Objective-C++: Recover by treating this keyword as a valid identifier.
return false;
}
}
Diag(Tok, diag::err_expected) << tok::identifier;
return true;
}
//===----------------------------------------------------------------------===//
// Error recovery.
//===----------------------------------------------------------------------===//
static bool HasFlagsSet(Parser::SkipUntilFlags L, Parser::SkipUntilFlags R) {
return (static_cast<unsigned>(L) & static_cast<unsigned>(R)) != 0;
}
/// SkipUntil - Read tokens until we get to the specified token, then consume
/// it (unless no flag StopBeforeMatch). Because we cannot guarantee that the
/// token will ever occur, this skips to the next token, or to some likely
/// good stopping point. If StopAtSemi is true, skipping will stop at a ';'
/// character.
///
/// If SkipUntil finds the specified token, it returns true, otherwise it
/// returns false.
bool Parser::SkipUntil(ArrayRef<tok::TokenKind> Toks, SkipUntilFlags Flags) {
// We always want this function to skip at least one token if the first token
// isn't T and if not at EOF.
bool isFirstTokenSkipped = true;
while (1) {
// If we found one of the tokens, stop and return true.
for (unsigned i = 0, NumToks = Toks.size(); i != NumToks; ++i) {
if (Tok.is(Toks[i])) {
if (HasFlagsSet(Flags, StopBeforeMatch)) {
// Noop, don't consume the token.
} else {
ConsumeAnyToken();
}
return true;
}
}
// Important special case: The caller has given up and just wants us to
// skip the rest of the file. Do this without recursing, since we can
// get here precisely because the caller detected too much recursion.
if (Toks.size() == 1 && Toks[0] == tok::eof &&
!HasFlagsSet(Flags, StopAtSemi) &&
!HasFlagsSet(Flags, StopAtCodeCompletion)) {
while (Tok.isNot(tok::eof))
ConsumeAnyToken();
return true;
}
switch (Tok.getKind()) {
case tok::eof:
// Ran out of tokens.
return false;
case tok::annot_pragma_openmp:
case tok::annot_pragma_openmp_end:
// Stop before an OpenMP pragma boundary.
case tok::annot_module_begin:
case tok::annot_module_end:
case tok::annot_module_include:
// Stop before we change submodules. They generally indicate a "good"
// place to pick up parsing again (except in the special case where
// we're trying to skip to EOF).
return false;
case tok::code_completion:
if (!HasFlagsSet(Flags, StopAtCodeCompletion))
handleUnexpectedCodeCompletionToken();
return false;
case tok::l_paren:
// Recursively skip properly-nested parens.
ConsumeParen();
if (HasFlagsSet(Flags, StopAtCodeCompletion))
SkipUntil(tok::r_paren, StopAtCodeCompletion);
else
SkipUntil(tok::r_paren);
break;
case tok::l_square:
// Recursively skip properly-nested square brackets.
ConsumeBracket();
if (HasFlagsSet(Flags, StopAtCodeCompletion))
SkipUntil(tok::r_square, StopAtCodeCompletion);
else
SkipUntil(tok::r_square);
break;
case tok::l_brace:
// Recursively skip properly-nested braces.
ConsumeBrace();
if (HasFlagsSet(Flags, StopAtCodeCompletion))
SkipUntil(tok::r_brace, StopAtCodeCompletion);
else
SkipUntil(tok::r_brace);
break;
// Okay, we found a ']' or '}' or ')', which we think should be balanced.
// Since the user wasn't looking for this token (if they were, it would
// already be handled), this isn't balanced. If there is a LHS token at a
// higher level, we will assume that this matches the unbalanced token
// and return it. Otherwise, this is a spurious RHS token, which we skip.
case tok::r_paren:
if (ParenCount && !isFirstTokenSkipped)
return false; // Matches something.
ConsumeParen();
break;
case tok::r_square:
if (BracketCount && !isFirstTokenSkipped)
return false; // Matches something.
ConsumeBracket();
break;
case tok::r_brace:
if (BraceCount && !isFirstTokenSkipped)
return false; // Matches something.
ConsumeBrace();
break;
case tok::semi:
if (HasFlagsSet(Flags, StopAtSemi))
return false;
// FALL THROUGH.
default:
// Skip this token.
ConsumeAnyToken();
break;
}
isFirstTokenSkipped = false;
}
}
//===----------------------------------------------------------------------===//
// Scope manipulation
//===----------------------------------------------------------------------===//
/// EnterScope - Start a new scope.
void Parser::EnterScope(unsigned ScopeFlags) {
if (NumCachedScopes) {
Scope *N = ScopeCache[--NumCachedScopes];
N->Init(getCurScope(), ScopeFlags);
Actions.CurScope = N;
} else {
Actions.CurScope = new Scope(getCurScope(), ScopeFlags, Diags);
}
}
/// ExitScope - Pop a scope off the scope stack.
void Parser::ExitScope() {
assert(getCurScope() && "Scope imbalance!");
// Inform the actions module that this scope is going away if there are any
// decls in it.
Actions.ActOnPopScope(Tok.getLocation(), getCurScope());
Scope *OldScope = getCurScope();
Actions.CurScope = OldScope->getParent();
if (NumCachedScopes == ScopeCacheSize)
delete OldScope;
else
ScopeCache[NumCachedScopes++] = OldScope;
}
/// Set the flags for the current scope to ScopeFlags. If ManageFlags is false,
/// this object does nothing.
Parser::ParseScopeFlags::ParseScopeFlags(Parser *Self, unsigned ScopeFlags,
bool ManageFlags)
: CurScope(ManageFlags ? Self->getCurScope() : nullptr) {
if (CurScope) {
OldFlags = CurScope->getFlags();
CurScope->setFlags(ScopeFlags);
}
}
/// Restore the flags for the current scope to what they were before this
/// object overrode them.
Parser::ParseScopeFlags::~ParseScopeFlags() {
if (CurScope)
CurScope->setFlags(OldFlags);
}
//===----------------------------------------------------------------------===//
// C99 6.9: External Definitions.
//===----------------------------------------------------------------------===//
Parser::~Parser() {
// If we still have scopes active, delete the scope tree.
delete getCurScope();
Actions.CurScope = nullptr;
// Free the scope cache.
for (unsigned i = 0, e = NumCachedScopes; i != e; ++i)
delete ScopeCache[i];
resetPragmaHandlers();
PP.removeCommentHandler(CommentSemaHandler.get());
PP.clearCodeCompletionHandler();
if (getLangOpts().DelayedTemplateParsing &&
!PP.isIncrementalProcessingEnabled() && !TemplateIds.empty()) {
// If an ASTConsumer parsed delay-parsed templates in their
// HandleTranslationUnit() method, TemplateIds created there were not
// guarded by a DestroyTemplateIdAnnotationsRAIIObj object in
// ParseTopLevelDecl(). Destroy them here.
DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(TemplateIds);
}
assert(TemplateIds.empty() && "Still alive TemplateIdAnnotations around?");
}
/// Initialize - Warm up the parser.
///
void Parser::Initialize() {
// Create the translation unit scope. Install it as the current scope.
assert(getCurScope() == nullptr && "A scope is already active?");
EnterScope(Scope::DeclScope);
Actions.ActOnTranslationUnitScope(getCurScope());
// Initialization for Objective-C context sensitive keywords recognition.
// Referenced in Parser::ParseObjCTypeQualifierList.
if (getLangOpts().ObjC1) {
ObjCTypeQuals[objc_in] = &PP.getIdentifierTable().get("in");
ObjCTypeQuals[objc_out] = &PP.getIdentifierTable().get("out");
ObjCTypeQuals[objc_inout] = &PP.getIdentifierTable().get("inout");
ObjCTypeQuals[objc_oneway] = &PP.getIdentifierTable().get("oneway");
ObjCTypeQuals[objc_bycopy] = &PP.getIdentifierTable().get("bycopy");
ObjCTypeQuals[objc_byref] = &PP.getIdentifierTable().get("byref");
ObjCTypeQuals[objc_nonnull] = &PP.getIdentifierTable().get("nonnull");
ObjCTypeQuals[objc_nullable] = &PP.getIdentifierTable().get("nullable");
ObjCTypeQuals[objc_null_unspecified]
= &PP.getIdentifierTable().get("null_unspecified");
}
Ident_instancetype = nullptr;
Ident_final = nullptr;
Ident_sealed = nullptr;
Ident_override = nullptr;
Ident_GNU_final = nullptr;
Ident_super = &PP.getIdentifierTable().get("super");
Ident_vector = nullptr;
Ident_bool = nullptr;
Ident_pixel = nullptr;
if (getLangOpts().AltiVec || getLangOpts().ZVector) {
Ident_vector = &PP.getIdentifierTable().get("vector");
Ident_bool = &PP.getIdentifierTable().get("bool");
}
if (getLangOpts().AltiVec)
Ident_pixel = &PP.getIdentifierTable().get("pixel");
Ident_introduced = nullptr;
Ident_deprecated = nullptr;
Ident_obsoleted = nullptr;
Ident_unavailable = nullptr;
Ident_strict = nullptr;
Ident_replacement = nullptr;
Ident_language = Ident_defined_in = Ident_generated_declaration = nullptr;
Ident__except = nullptr;
Ident__exception_code = Ident__exception_info = nullptr;
Ident__abnormal_termination = Ident___exception_code = nullptr;
Ident___exception_info = Ident___abnormal_termination = nullptr;
Ident_GetExceptionCode = Ident_GetExceptionInfo = nullptr;
Ident_AbnormalTermination = nullptr;
if(getLangOpts().Borland) {
Ident__exception_info = PP.getIdentifierInfo("_exception_info");
Ident___exception_info = PP.getIdentifierInfo("__exception_info");
Ident_GetExceptionInfo = PP.getIdentifierInfo("GetExceptionInformation");
Ident__exception_code = PP.getIdentifierInfo("_exception_code");
Ident___exception_code = PP.getIdentifierInfo("__exception_code");
Ident_GetExceptionCode = PP.getIdentifierInfo("GetExceptionCode");
Ident__abnormal_termination = PP.getIdentifierInfo("_abnormal_termination");
Ident___abnormal_termination = PP.getIdentifierInfo("__abnormal_termination");
Ident_AbnormalTermination = PP.getIdentifierInfo("AbnormalTermination");
PP.SetPoisonReason(Ident__exception_code,diag::err_seh___except_block);
PP.SetPoisonReason(Ident___exception_code,diag::err_seh___except_block);
PP.SetPoisonReason(Ident_GetExceptionCode,diag::err_seh___except_block);
PP.SetPoisonReason(Ident__exception_info,diag::err_seh___except_filter);
PP.SetPoisonReason(Ident___exception_info,diag::err_seh___except_filter);
PP.SetPoisonReason(Ident_GetExceptionInfo,diag::err_seh___except_filter);
PP.SetPoisonReason(Ident__abnormal_termination,diag::err_seh___finally_block);
PP.SetPoisonReason(Ident___abnormal_termination,diag::err_seh___finally_block);
PP.SetPoisonReason(Ident_AbnormalTermination,diag::err_seh___finally_block);
}
Actions.Initialize();
// Prime the lexer look-ahead.
ConsumeToken();
-
- PP.replayPreambleConditionalStack();
}
void Parser::LateTemplateParserCleanupCallback(void *P) {
// While this RAII helper doesn't bracket any actual work, the destructor will
// clean up annotations that were created during ActOnEndOfTranslationUnit
// when incremental processing is enabled.
DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(((Parser *)P)->TemplateIds);
}
bool Parser::ParseFirstTopLevelDecl(DeclGroupPtrTy &Result) {
Actions.ActOnStartOfTranslationUnit();
// C11 6.9p1 says translation units must have at least one top-level
// declaration. C++ doesn't have this restriction. We also don't want to
// complain if we have a precompiled header, although technically if the PCH
// is empty we should still emit the (pedantic) diagnostic.
bool NoTopLevelDecls = ParseTopLevelDecl(Result);
if (NoTopLevelDecls && !Actions.getASTContext().getExternalSource() &&
!getLangOpts().CPlusPlus)
Diag(diag::ext_empty_translation_unit);
return NoTopLevelDecls;
}
/// ParseTopLevelDecl - Parse one top-level declaration, return whatever the
/// action tells us to. This returns true if the EOF was encountered.
bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result) {
DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(TemplateIds);
// Skip over the EOF token, flagging end of previous input for incremental
// processing
if (PP.isIncrementalProcessingEnabled() && Tok.is(tok::eof))
ConsumeToken();
Result = nullptr;
switch (Tok.getKind()) {
case tok::annot_pragma_unused:
HandlePragmaUnused();
return false;
case tok::kw_import:
Result = ParseModuleImport(SourceLocation());
return false;
case tok::kw_export:
if (NextToken().isNot(tok::kw_module))
break;
LLVM_FALLTHROUGH;
case tok::kw_module:
Result = ParseModuleDecl();
return false;
case tok::annot_module_include:
Actions.ActOnModuleInclude(Tok.getLocation(),
reinterpret_cast<Module *>(
Tok.getAnnotationValue()));
ConsumeAnnotationToken();
return false;
case tok::annot_module_begin:
Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast<Module *>(
Tok.getAnnotationValue()));
ConsumeAnnotationToken();
return false;
case tok::annot_module_end:
Actions.ActOnModuleEnd(Tok.getLocation(), reinterpret_cast<Module *>(
Tok.getAnnotationValue()));
ConsumeAnnotationToken();
return false;
case tok::annot_pragma_attribute:
HandlePragmaAttribute();
return false;
case tok::eof:
// Late template parsing can begin.
if (getLangOpts().DelayedTemplateParsing)
Actions.SetLateTemplateParser(LateTemplateParserCallback,
PP.isIncrementalProcessingEnabled() ?
LateTemplateParserCleanupCallback : nullptr,
this);
if (!PP.isIncrementalProcessingEnabled())
Actions.ActOnEndOfTranslationUnit();
//else don't tell Sema that we ended parsing: more input might come.
return true;
default:
break;
}
ParsedAttributesWithRange attrs(AttrFactory);
MaybeParseCXX11Attributes(attrs);
Result = ParseExternalDeclaration(attrs);
return false;
}
/// ParseExternalDeclaration:
///
/// external-declaration: [C99 6.9], declaration: [C++ dcl.dcl]
/// function-definition
/// declaration
/// [GNU] asm-definition
/// [GNU] __extension__ external-declaration
/// [OBJC] objc-class-definition
/// [OBJC] objc-class-declaration
/// [OBJC] objc-alias-declaration
/// [OBJC] objc-protocol-definition
/// [OBJC] objc-method-definition
/// [OBJC] @end
/// [C++] linkage-specification
/// [GNU] asm-definition:
/// simple-asm-expr ';'
/// [C++11] empty-declaration
/// [C++11] attribute-declaration
///
/// [C++11] empty-declaration:
/// ';'
///
/// [C++0x/GNU] 'extern' 'template' declaration
Parser::DeclGroupPtrTy
Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
ParsingDeclSpec *DS) {
DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(TemplateIds);
ParenBraceBracketBalancer BalancerRAIIObj(*this);
if (PP.isCodeCompletionReached()) {
cutOffParsing();
return nullptr;
}
Decl *SingleDecl = nullptr;
switch (Tok.getKind()) {
case tok::annot_pragma_vis:
HandlePragmaVisibility();
return nullptr;
case tok::annot_pragma_pack:
HandlePragmaPack();
return nullptr;
case tok::annot_pragma_msstruct:
HandlePragmaMSStruct();
return nullptr;
case tok::annot_pragma_align:
HandlePragmaAlign();
return nullptr;
case tok::annot_pragma_weak:
HandlePragmaWeak();
return nullptr;
case tok::annot_pragma_weakalias:
HandlePragmaWeakAlias();
return nullptr;
case tok::annot_pragma_redefine_extname:
HandlePragmaRedefineExtname();
return nullptr;
case tok::annot_pragma_fp_contract:
HandlePragmaFPContract();
return nullptr;
case tok::annot_pragma_fp:
HandlePragmaFP();
break;
case tok::annot_pragma_opencl_extension:
HandlePragmaOpenCLExtension();
return nullptr;
case tok::annot_pragma_openmp: {
AccessSpecifier AS = AS_none;
return ParseOpenMPDeclarativeDirectiveWithExtDecl(AS, attrs);
}
case tok::annot_pragma_ms_pointers_to_members:
HandlePragmaMSPointersToMembers();
return nullptr;
case tok::annot_pragma_ms_vtordisp:
HandlePragmaMSVtorDisp();
return nullptr;
case tok::annot_pragma_ms_pragma:
HandlePragmaMSPragma();
return nullptr;
case tok::annot_pragma_dump:
HandlePragmaDump();
return nullptr;
case tok::semi:
// Either a C++11 empty-declaration or attribute-declaration.
SingleDecl = Actions.ActOnEmptyDeclaration(getCurScope(),
attrs.getList(),
Tok.getLocation());
ConsumeExtraSemi(OutsideFunction);
break;
case tok::r_brace:
Diag(Tok, diag::err_extraneous_closing_brace);
ConsumeBrace();
return nullptr;
case tok::eof:
Diag(Tok, diag::err_expected_external_declaration);
return nullptr;
case tok::kw___extension__: {
// __extension__ silences extension warnings in the subexpression.
ExtensionRAIIObject O(Diags); // Use RAII to do this.
ConsumeToken();
return ParseExternalDeclaration(attrs);
}
case tok::kw_asm: {
ProhibitAttributes(attrs);
SourceLocation StartLoc = Tok.getLocation();
SourceLocation EndLoc;
ExprResult Result(ParseSimpleAsm(&EndLoc));
// Check if GNU-style InlineAsm is disabled.
// Empty asm string is allowed because it will not introduce
// any assembly code.
if (!(getLangOpts().GNUAsm || Result.isInvalid())) {
const auto *SL = cast<StringLiteral>(Result.get());
if (!SL->getString().trim().empty())
Diag(StartLoc, diag::err_gnu_inline_asm_disabled);
}
ExpectAndConsume(tok::semi, diag::err_expected_after,
"top-level asm block");
if (Result.isInvalid())
return nullptr;
SingleDecl = Actions.ActOnFileScopeAsmDecl(Result.get(), StartLoc, EndLoc);
break;
}
case tok::at:
return ParseObjCAtDirectives();
case tok::minus:
case tok::plus:
if (!getLangOpts().ObjC1) {
Diag(Tok, diag::err_expected_external_declaration);
ConsumeToken();
return nullptr;
}
SingleDecl = ParseObjCMethodDefinition();
break;
case tok::code_completion:
Actions.CodeCompleteOrdinaryName(getCurScope(),
CurParsedObjCImpl? Sema::PCC_ObjCImplementation
: Sema::PCC_Namespace);
cutOffParsing();
return nullptr;
case tok::kw_export:
if (getLangOpts().ModulesTS) {
SingleDecl = ParseExportDeclaration();
break;
}
// This must be 'export template'. Parse it so we can diagnose our lack
// of support.
LLVM_FALLTHROUGH;
case tok::kw_using:
case tok::kw_namespace:
case tok::kw_typedef:
case tok::kw_template:
case tok::kw_static_assert:
case tok::kw__Static_assert:
// A function definition cannot start with any of these keywords.
{
SourceLocation DeclEnd;
return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
}
case tok::kw_static:
// Parse (then ignore) 'static' prior to a template instantiation. This is
// a GCC extension that we intentionally do not support.
if (getLangOpts().CPlusPlus && NextToken().is(tok::kw_template)) {
Diag(ConsumeToken(), diag::warn_static_inline_explicit_inst_ignored)
<< 0;
SourceLocation DeclEnd;
return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
}
goto dont_know;
case tok::kw_inline:
if (getLangOpts().CPlusPlus) {
tok::TokenKind NextKind = NextToken().getKind();
// Inline namespaces. Allowed as an extension even in C++03.
if (NextKind == tok::kw_namespace) {
SourceLocation DeclEnd;
return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
}
// Parse (then ignore) 'inline' prior to a template instantiation. This is
// a GCC extension that we intentionally do not support.
if (NextKind == tok::kw_template) {
Diag(ConsumeToken(), diag::warn_static_inline_explicit_inst_ignored)
<< 1;
SourceLocation DeclEnd;
return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
}
}
goto dont_know;
case tok::kw_extern:
if (getLangOpts().CPlusPlus && NextToken().is(tok::kw_template)) {
// Extern templates
SourceLocation ExternLoc = ConsumeToken();
SourceLocation TemplateLoc = ConsumeToken();
Diag(ExternLoc, getLangOpts().CPlusPlus11 ?
diag::warn_cxx98_compat_extern_template :
diag::ext_extern_template) << SourceRange(ExternLoc, TemplateLoc);
SourceLocation DeclEnd;
return Actions.ConvertDeclToDeclGroup(
ParseExplicitInstantiation(Declarator::FileContext,
ExternLoc, TemplateLoc, DeclEnd));
}
goto dont_know;
case tok::kw___if_exists:
case tok::kw___if_not_exists:
ParseMicrosoftIfExistsExternalDeclaration();
return nullptr;
case tok::kw_module:
Diag(Tok, diag::err_unexpected_module_decl);
SkipUntil(tok::semi);
return nullptr;
default:
dont_know:
if (Tok.isEditorPlaceholder()) {
ConsumeToken();
return nullptr;
}
// We can't tell whether this is a function-definition or declaration yet.
return ParseDeclarationOrFunctionDefinition(attrs, DS);
}
// This routine returns a DeclGroup, if the thing we parsed only contains a
// single decl, convert it now.
return Actions.ConvertDeclToDeclGroup(SingleDecl);
}
/// \brief Determine whether the current token, if it occurs after a
/// declarator, continues a declaration or declaration list.
bool Parser::isDeclarationAfterDeclarator() {
// Check for '= delete' or '= default'
if (getLangOpts().CPlusPlus && Tok.is(tok::equal)) {
const Token &KW = NextToken();
if (KW.is(tok::kw_default) || KW.is(tok::kw_delete))
return false;
}
return Tok.is(tok::equal) || // int X()= -> not a function def
Tok.is(tok::comma) || // int X(), -> not a function def
Tok.is(tok::semi) || // int X(); -> not a function def
Tok.is(tok::kw_asm) || // int X() __asm__ -> not a function def
Tok.is(tok::kw___attribute) || // int X() __attr__ -> not a function def
(getLangOpts().CPlusPlus &&
Tok.is(tok::l_paren)); // int X(0) -> not a function def [C++]
}
/// \brief Determine whether the current token, if it occurs after a
/// declarator, indicates the start of a function definition.
bool Parser::isStartOfFunctionDefinition(const ParsingDeclarator &Declarator) {
assert(Declarator.isFunctionDeclarator() && "Isn't a function declarator");
if (Tok.is(tok::l_brace)) // int X() {}
return true;
// Handle K&R C argument lists: int X(f) int f; {}
if (!getLangOpts().CPlusPlus &&
Declarator.getFunctionTypeInfo().isKNRPrototype())
return isDeclarationSpecifier();
if (getLangOpts().CPlusPlus && Tok.is(tok::equal)) {
const Token &KW = NextToken();
return KW.is(tok::kw_default) || KW.is(tok::kw_delete);
}
return Tok.is(tok::colon) || // X() : Base() {} (used for ctors)
Tok.is(tok::kw_try); // X() try { ... }
}
/// Parse either a function-definition or a declaration. We can't tell which
/// we have until we read up to the compound-statement in function-definition.
/// TemplateParams, if non-NULL, provides the template parameters when we're
/// parsing a C++ template-declaration.
///
/// function-definition: [C99 6.9.1]
/// decl-specs declarator declaration-list[opt] compound-statement
/// [C90] function-definition: [C99 6.7.1] - implicit int result
/// [C90] decl-specs[opt] declarator declaration-list[opt] compound-statement
///
/// declaration: [C99 6.7]
/// declaration-specifiers init-declarator-list[opt] ';'
/// [!C99] init-declarator-list ';' [TODO: warn in c99 mode]
/// [OMP] threadprivate-directive [TODO]
///
Parser::DeclGroupPtrTy
Parser::ParseDeclOrFunctionDefInternal(ParsedAttributesWithRange &attrs,
ParsingDeclSpec &DS,
AccessSpecifier AS) {
MaybeParseMicrosoftAttributes(DS.getAttributes());
// Parse the common declaration-specifiers piece.
ParseDeclarationSpecifiers(DS, ParsedTemplateInfo(), AS, DSC_top_level);
// If we had a free-standing type definition with a missing semicolon, we
// may get this far before the problem becomes obvious.
if (DS.hasTagDefinition() &&
DiagnoseMissingSemiAfterTagDefinition(DS, AS, DSC_top_level))
return nullptr;
// C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };"
// declaration-specifiers init-declarator-list[opt] ';'
if (Tok.is(tok::semi)) {
ProhibitAttributes(attrs);
ConsumeToken();
RecordDecl *AnonRecord = nullptr;
Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none,
DS, AnonRecord);
DS.complete(TheDecl);
if (getLangOpts().OpenCL)
Actions.setCurrentOpenCLExtensionForDecl(TheDecl);
if (AnonRecord) {
Decl* decls[] = {AnonRecord, TheDecl};
return Actions.BuildDeclaratorGroup(decls);
}
return Actions.ConvertDeclToDeclGroup(TheDecl);
}
DS.takeAttributesFrom(attrs);
// ObjC2 allows prefix attributes on class interfaces and protocols.
// FIXME: This still needs better diagnostics. We should only accept
// attributes here, no types, etc.
if (getLangOpts().ObjC2 && Tok.is(tok::at)) {
SourceLocation AtLoc = ConsumeToken(); // the "@"
if (!Tok.isObjCAtKeyword(tok::objc_interface) &&
!Tok.isObjCAtKeyword(tok::objc_protocol)) {
Diag(Tok, diag::err_objc_unexpected_attr);
SkipUntil(tok::semi); // FIXME: better skip?
return nullptr;
}
DS.abort();
const char *PrevSpec = nullptr;
unsigned DiagID;
if (DS.SetTypeSpecType(DeclSpec::TST_unspecified, AtLoc, PrevSpec, DiagID,
Actions.getASTContext().getPrintingPolicy()))
Diag(AtLoc, DiagID) << PrevSpec;
if (Tok.isObjCAtKeyword(tok::objc_protocol))
return ParseObjCAtProtocolDeclaration(AtLoc, DS.getAttributes());
return Actions.ConvertDeclToDeclGroup(
ParseObjCAtInterfaceDeclaration(AtLoc, DS.getAttributes()));
}
// If the declspec consisted only of 'extern' and we have a string
// literal following it, this must be a C++ linkage specifier like
// 'extern "C"'.
if (getLangOpts().CPlusPlus && isTokenStringLiteral() &&
DS.getStorageClassSpec() == DeclSpec::SCS_extern &&
DS.getParsedSpecifiers() == DeclSpec::PQ_StorageClassSpecifier) {
Decl *TheDecl = ParseLinkage(DS, Declarator::FileContext);
return Actions.ConvertDeclToDeclGroup(TheDecl);
}
return ParseDeclGroup(DS, Declarator::FileContext);
}
Parser::DeclGroupPtrTy
Parser::ParseDeclarationOrFunctionDefinition(ParsedAttributesWithRange &attrs,
ParsingDeclSpec *DS,
AccessSpecifier AS) {
if (DS) {
return ParseDeclOrFunctionDefInternal(attrs, *DS, AS);
} else {
ParsingDeclSpec PDS(*this);
// Must temporarily exit the objective-c container scope for
// parsing c constructs and re-enter objc container scope
// afterwards.
ObjCDeclContextSwitch ObjCDC(*this);
return ParseDeclOrFunctionDefInternal(attrs, PDS, AS);
}
}
/// ParseFunctionDefinition - We parsed and verified that the specified
/// Declarator is well formed. If this is a K&R-style function, read the
/// parameters declaration-list, then start the compound-statement.
///
/// function-definition: [C99 6.9.1]
/// decl-specs declarator declaration-list[opt] compound-statement
/// [C90] function-definition: [C99 6.7.1] - implicit int result
/// [C90] decl-specs[opt] declarator declaration-list[opt] compound-statement
/// [C++] function-definition: [C++ 8.4]
/// decl-specifier-seq[opt] declarator ctor-initializer[opt]
/// function-body
/// [C++] function-definition: [C++ 8.4]
/// decl-specifier-seq[opt] declarator function-try-block
///
Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D,
const ParsedTemplateInfo &TemplateInfo,
LateParsedAttrList *LateParsedAttrs) {
// Poison SEH identifiers so they are flagged as illegal in function bodies.
PoisonSEHIdentifiersRAIIObject PoisonSEHIdentifiers(*this, true);
const DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
// If this is C90 and the declspecs were completely missing, fudge in an
// implicit int. We do this here because this is the only place where
// declaration-specifiers are completely optional in the grammar.
if (getLangOpts().ImplicitInt && D.getDeclSpec().isEmpty()) {
const char *PrevSpec;
unsigned DiagID;
const PrintingPolicy &Policy = Actions.getASTContext().getPrintingPolicy();
D.getMutableDeclSpec().SetTypeSpecType(DeclSpec::TST_int,
D.getIdentifierLoc(),
PrevSpec, DiagID,
Policy);
D.SetRangeBegin(D.getDeclSpec().getSourceRange().getBegin());
}
// If this declaration was formed with a K&R-style identifier list for the
// arguments, parse declarations for all of the args next.
// int foo(a,b) int a; float b; {}
if (FTI.isKNRPrototype())
ParseKNRParamDeclarations(D);
// We should have either an opening brace or, in a C++ constructor,
// we may have a colon.
if (Tok.isNot(tok::l_brace) &&
(!getLangOpts().CPlusPlus ||
(Tok.isNot(tok::colon) && Tok.isNot(tok::kw_try) &&
Tok.isNot(tok::equal)))) {
Diag(Tok, diag::err_expected_fn_body);
// Skip over garbage, until we get to '{'. Don't eat the '{'.
SkipUntil(tok::l_brace, StopAtSemi | StopBeforeMatch);
// If we didn't find the '{', bail out.
if (Tok.isNot(tok::l_brace))
return nullptr;
}
// Check to make sure that any normal attributes are allowed to be on
// a definition. Late parsed attributes are checked at the end.
if (Tok.isNot(tok::equal)) {
AttributeList *DtorAttrs = D.getAttributes();
while (DtorAttrs) {
if (DtorAttrs->isKnownToGCC() &&
!DtorAttrs->isCXX11Attribute()) {
Diag(DtorAttrs->getLoc(), diag::warn_attribute_on_function_definition)
<< DtorAttrs->getName();
}
DtorAttrs = DtorAttrs->getNext();
}
}
// In delayed template parsing mode, for function template we consume the
// tokens and store them for late parsing at the end of the translation unit.
if (getLangOpts().DelayedTemplateParsing && Tok.isNot(tok::equal) &&
TemplateInfo.Kind == ParsedTemplateInfo::Template &&
Actions.canDelayFunctionBody(D)) {
MultiTemplateParamsArg TemplateParameterLists(*TemplateInfo.TemplateParams);
ParseScope BodyScope(this, Scope::FnScope|Scope::DeclScope);
Scope *ParentScope = getCurScope()->getParent();
D.setFunctionDefinitionKind(FDK_Definition);
Decl *DP = Actions.HandleDeclarator(ParentScope, D,
TemplateParameterLists);
D.complete(DP);
D.getMutableDeclSpec().abort();
if (SkipFunctionBodies && (!DP || Actions.canSkipFunctionBody(DP)) &&
trySkippingFunctionBody()) {
BodyScope.Exit();
return Actions.ActOnSkippedFunctionBody(DP);
}
CachedTokens Toks;
LexTemplateFunctionForLateParsing(Toks);
if (DP) {
FunctionDecl *FnD = DP->getAsFunction();
Actions.CheckForFunctionRedefinition(FnD);
Actions.MarkAsLateParsedTemplate(FnD, DP, Toks);
}
return DP;
}
else if (CurParsedObjCImpl &&
!TemplateInfo.TemplateParams &&
(Tok.is(tok::l_brace) || Tok.is(tok::kw_try) ||
Tok.is(tok::colon)) &&
Actions.CurContext->isTranslationUnit()) {
ParseScope BodyScope(this, Scope::FnScope|Scope::DeclScope);
Scope *ParentScope = getCurScope()->getParent();
D.setFunctionDefinitionKind(FDK_Definition);
Decl *FuncDecl = Actions.HandleDeclarator(ParentScope, D,
MultiTemplateParamsArg());
D.complete(FuncDecl);
D.getMutableDeclSpec().abort();
if (FuncDecl) {
// Consume the tokens and store them for later parsing.
StashAwayMethodOrFunctionBodyTokens(FuncDecl);
CurParsedObjCImpl->HasCFunction = true;
return FuncDecl;
}
// FIXME: Should we really fall through here?
}
// Enter a scope for the function body.
ParseScope BodyScope(this, Scope::FnScope|Scope::DeclScope);
// Tell the actions module that we have entered a function definition with the
// specified Declarator for the function.
Sema::SkipBodyInfo SkipBody;
Decl *Res = Actions.ActOnStartOfFunctionDef(getCurScope(), D,
TemplateInfo.TemplateParams
? *TemplateInfo.TemplateParams
: MultiTemplateParamsArg(),
&SkipBody);
if (SkipBody.ShouldSkip) {
SkipFunctionBody();
return Res;
}
// Break out of the ParsingDeclarator context before we parse the body.
D.complete(Res);
// Break out of the ParsingDeclSpec context, too. This const_cast is
// safe because we're always the sole owner.
D.getMutableDeclSpec().abort();
if (TryConsumeToken(tok::equal)) {
assert(getLangOpts().CPlusPlus && "Only C++ function definitions have '='");
bool Delete = false;
SourceLocation KWLoc;
if (TryConsumeToken(tok::kw_delete, KWLoc)) {
Diag(KWLoc, getLangOpts().CPlusPlus11
? diag::warn_cxx98_compat_defaulted_deleted_function
: diag::ext_defaulted_deleted_function)
<< 1 /* deleted */;
Actions.SetDeclDeleted(Res, KWLoc);
Delete = true;
} else if (TryConsumeToken(tok::kw_default, KWLoc)) {
Diag(KWLoc, getLangOpts().CPlusPlus11
? diag::warn_cxx98_compat_defaulted_deleted_function
: diag::ext_defaulted_deleted_function)
<< 0 /* defaulted */;
Actions.SetDeclDefaulted(Res, KWLoc);
} else {
llvm_unreachable("function definition after = not 'delete' or 'default'");
}
if (Tok.is(tok::comma)) {
Diag(KWLoc, diag::err_default_delete_in_multiple_declaration)
<< Delete;
SkipUntil(tok::semi);
} else if (ExpectAndConsume(tok::semi, diag::err_expected_after,
Delete ? "delete" : "default")) {
SkipUntil(tok::semi);
}
Stmt *GeneratedBody = Res ? Res->getBody() : nullptr;
Actions.ActOnFinishFunctionBody(Res, GeneratedBody, false);
return Res;
}
if (SkipFunctionBodies && (!Res || Actions.canSkipFunctionBody(Res)) &&
trySkippingFunctionBody()) {
BodyScope.Exit();
Actions.ActOnSkippedFunctionBody(Res);
return Actions.ActOnFinishFunctionBody(Res, nullptr, false);
}
if (Tok.is(tok::kw_try))
return ParseFunctionTryBlock(Res, BodyScope);
// If we have a colon, then we're probably parsing a C++
// ctor-initializer.
if (Tok.is(tok::colon)) {
ParseConstructorInitializer(Res);
// Recover from error.
if (!Tok.is(tok::l_brace)) {
BodyScope.Exit();
Actions.ActOnFinishFunctionBody(Res, nullptr);
return Res;
}
} else
Actions.ActOnDefaultCtorInitializers(Res);
// Late attributes are parsed in the same scope as the function body.
if (LateParsedAttrs)
ParseLexedAttributeList(*LateParsedAttrs, Res, false, true);
return ParseFunctionStatementBody(Res, BodyScope);
}
void Parser::SkipFunctionBody() {
if (Tok.is(tok::equal)) {
SkipUntil(tok::semi);
return;
}
bool IsFunctionTryBlock = Tok.is(tok::kw_try);
if (IsFunctionTryBlock)
ConsumeToken();
CachedTokens Skipped;
if (ConsumeAndStoreFunctionPrologue(Skipped))
SkipMalformedDecl();
else {
SkipUntil(tok::r_brace);
while (IsFunctionTryBlock && Tok.is(tok::kw_catch)) {
SkipUntil(tok::l_brace);
SkipUntil(tok::r_brace);
}
}
}
/// ParseKNRParamDeclarations - Parse 'declaration-list[opt]' which provides
/// types for a function with a K&R-style identifier list for arguments.
void Parser::ParseKNRParamDeclarations(Declarator &D) {
// We know that the top-level of this declarator is a function.
DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
// Enter function-declaration scope, limiting any declarators to the
// function prototype scope, including parameter declarators.
ParseScope PrototypeScope(this, Scope::FunctionPrototypeScope |
Scope::FunctionDeclarationScope | Scope::DeclScope);
// Read all the argument declarations.
while (isDeclarationSpecifier()) {
SourceLocation DSStart = Tok.getLocation();
// Parse the common declaration-specifiers piece.
DeclSpec DS(AttrFactory);
ParseDeclarationSpecifiers(DS);
// C99 6.9.1p6: 'each declaration in the declaration list shall have at
// least one declarator'.
// NOTE: GCC just makes this an ext-warn. It's not clear what it does with
// the declarations though. It's trivial to ignore them, really hard to do
// anything else with them.
if (TryConsumeToken(tok::semi)) {
Diag(DSStart, diag::err_declaration_does_not_declare_param);
continue;
}
// C99 6.9.1p6: Declarations shall contain no storage-class specifiers other
// than register.
if (DS.getStorageClassSpec() != DeclSpec::SCS_unspecified &&
DS.getStorageClassSpec() != DeclSpec::SCS_register) {
Diag(DS.getStorageClassSpecLoc(),
diag::err_invalid_storage_class_in_func_decl);
DS.ClearStorageClassSpecs();
}
if (DS.getThreadStorageClassSpec() != DeclSpec::TSCS_unspecified) {
Diag(DS.getThreadStorageClassSpecLoc(),
diag::err_invalid_storage_class_in_func_decl);
DS.ClearStorageClassSpecs();
}
// Parse the first declarator attached to this declspec.
Declarator ParmDeclarator(DS, Declarator::KNRTypeListContext);
ParseDeclarator(ParmDeclarator);
// Handle the full declarator list.
while (1) {
// If attributes are present, parse them.
MaybeParseGNUAttributes(ParmDeclarator);
// Ask the actions module to compute the type for this declarator.
Decl *Param =
Actions.ActOnParamDeclarator(getCurScope(), ParmDeclarator);
if (Param &&
// A missing identifier has already been diagnosed.
ParmDeclarator.getIdentifier()) {
// Scan the argument list looking for the correct param to apply this
// type.
for (unsigned i = 0; ; ++i) {
// C99 6.9.1p6: those declarators shall declare only identifiers from
// the identifier list.
if (i == FTI.NumParams) {
Diag(ParmDeclarator.getIdentifierLoc(), diag::err_no_matching_param)
<< ParmDeclarator.getIdentifier();
break;
}
if (FTI.Params[i].Ident == ParmDeclarator.getIdentifier()) {
// Reject redefinitions of parameters.
if (FTI.Params[i].Param) {
Diag(ParmDeclarator.getIdentifierLoc(),
diag::err_param_redefinition)
<< ParmDeclarator.getIdentifier();
} else {
FTI.Params[i].Param = Param;
}
break;
}
}
}
// If we don't have a comma, it is either the end of the list (a ';') or
// an error, bail out.
if (Tok.isNot(tok::comma))
break;
ParmDeclarator.clear();
// Consume the comma.
ParmDeclarator.setCommaLoc(ConsumeToken());
// Parse the next declarator.
ParseDeclarator(ParmDeclarator);
}
// Consume ';' and continue parsing.
if (!ExpectAndConsumeSemi(diag::err_expected_semi_declaration))
continue;
// Otherwise recover by skipping to next semi or mandatory function body.
if (SkipUntil(tok::l_brace, StopAtSemi | StopBeforeMatch))
break;
TryConsumeToken(tok::semi);
}
// The actions module must verify that all arguments were declared.
Actions.ActOnFinishKNRParamDeclarations(getCurScope(), D, Tok.getLocation());
}
/// ParseAsmStringLiteral - This is just a normal string-literal, but is not
/// allowed to be a wide string, and is not subject to character translation.
///
/// [GNU] asm-string-literal:
/// string-literal
///
ExprResult Parser::ParseAsmStringLiteral() {
if (!isTokenStringLiteral()) {
Diag(Tok, diag::err_expected_string_literal)
<< /*Source='in...'*/0 << "'asm'";
return ExprError();
}
ExprResult AsmString(ParseStringLiteralExpression());
if (!AsmString.isInvalid()) {
const auto *SL = cast<StringLiteral>(AsmString.get());
if (!SL->isAscii()) {
Diag(Tok, diag::err_asm_operand_wide_string_literal)
<< SL->isWide()
<< SL->getSourceRange();
return ExprError();
}
}
return AsmString;
}
/// ParseSimpleAsm
///
/// [GNU] simple-asm-expr:
/// 'asm' '(' asm-string-literal ')'
///
ExprResult Parser::ParseSimpleAsm(SourceLocation *EndLoc) {
assert(Tok.is(tok::kw_asm) && "Not an asm!");
SourceLocation Loc = ConsumeToken();
if (Tok.is(tok::kw_volatile)) {
// Remove from the end of 'asm' to the end of 'volatile'.
SourceRange RemovalRange(PP.getLocForEndOfToken(Loc),
PP.getLocForEndOfToken(Tok.getLocation()));
Diag(Tok, diag::warn_file_asm_volatile)
<< FixItHint::CreateRemoval(RemovalRange);
ConsumeToken();
}
BalancedDelimiterTracker T(*this, tok::l_paren);
if (T.consumeOpen()) {
Diag(Tok, diag::err_expected_lparen_after) << "asm";
return ExprError();
}
ExprResult Result(ParseAsmStringLiteral());
if (!Result.isInvalid()) {
// Close the paren and get the location of the end bracket
T.consumeClose();
if (EndLoc)
*EndLoc = T.getCloseLocation();
} else if (SkipUntil(tok::r_paren, StopAtSemi | StopBeforeMatch)) {
if (EndLoc)
*EndLoc = Tok.getLocation();
ConsumeParen();
}
return Result;
}
/// \brief Get the TemplateIdAnnotation from the token and put it in the
/// cleanup pool so that it gets destroyed when parsing the current top level
/// declaration is finished.
TemplateIdAnnotation *Parser::takeTemplateIdAnnotation(const Token &tok) {
assert(tok.is(tok::annot_template_id) && "Expected template-id token");
TemplateIdAnnotation *
Id = static_cast<TemplateIdAnnotation *>(tok.getAnnotationValue());
return Id;
}
void Parser::AnnotateScopeToken(CXXScopeSpec &SS, bool IsNewAnnotation) {
// Push the current token back into the token stream (or revert it if it is
// cached) and use an annotation scope token for current token.
if (PP.isBacktrackEnabled())
PP.RevertCachedTokens(1);
else
PP.EnterToken(Tok);
Tok.setKind(tok::annot_cxxscope);
Tok.setAnnotationValue(Actions.SaveNestedNameSpecifierAnnotation(SS));
Tok.setAnnotationRange(SS.getRange());
// In case the tokens were cached, have Preprocessor replace them
// with the annotation token. We don't need to do this if we've
// just reverted back to a prior state.
if (IsNewAnnotation)
PP.AnnotateCachedTokens(Tok);
}
/// \brief Attempt to classify the name at the current token position. This may
/// form a type, scope or primary expression annotation, or replace the token
/// with a typo-corrected keyword. This is only appropriate when the current
/// name must refer to an entity which has already been declared.
///
/// \param IsAddressOfOperand Must be \c true if the name is preceded by an '&'
/// and might possibly have a dependent nested name specifier.
/// \param CCC Indicates how to perform typo-correction for this name. If NULL,
/// no typo correction will be performed.
Parser::AnnotatedNameKind
Parser::TryAnnotateName(bool IsAddressOfOperand,
std::unique_ptr<CorrectionCandidateCallback> CCC) {
assert(Tok.is(tok::identifier) || Tok.is(tok::annot_cxxscope));
const bool EnteringContext = false;
const bool WasScopeAnnotation = Tok.is(tok::annot_cxxscope);
CXXScopeSpec SS;
if (getLangOpts().CPlusPlus &&
ParseOptionalCXXScopeSpecifier(SS, nullptr, EnteringContext))
return ANK_Error;
if (Tok.isNot(tok::identifier) || SS.isInvalid()) {
if (TryAnnotateTypeOrScopeTokenAfterScopeSpec(SS, !WasScopeAnnotation))
return ANK_Error;
return ANK_Unresolved;
}
IdentifierInfo *Name = Tok.getIdentifierInfo();
SourceLocation NameLoc = Tok.getLocation();
// FIXME: Move the tentative declaration logic into ClassifyName so we can
// typo-correct to tentatively-declared identifiers.
if (isTentativelyDeclared(Name)) {
// Identifier has been tentatively declared, and thus cannot be resolved as
// an expression. Fall back to annotating it as a type.
if (TryAnnotateTypeOrScopeTokenAfterScopeSpec(SS, !WasScopeAnnotation))
return ANK_Error;
return Tok.is(tok::annot_typename) ? ANK_Success : ANK_TentativeDecl;
}
Token Next = NextToken();
// Look up and classify the identifier. We don't perform any typo-correction
// after a scope specifier, because in general we can't recover from typos
// there (eg, after correcting 'A::tempalte B<X>::C' [sic], we would need to
// jump back into scope specifier parsing).
Sema::NameClassification Classification = Actions.ClassifyName(
getCurScope(), SS, Name, NameLoc, Next, IsAddressOfOperand,
SS.isEmpty() ? std::move(CCC) : nullptr);
switch (Classification.getKind()) {
case Sema::NC_Error:
return ANK_Error;
case Sema::NC_Keyword:
// The identifier was typo-corrected to a keyword.
Tok.setIdentifierInfo(Name);
Tok.setKind(Name->getTokenID());
PP.TypoCorrectToken(Tok);
if (SS.isNotEmpty())
AnnotateScopeToken(SS, !WasScopeAnnotation);
// We've "annotated" this as a keyword.
return ANK_Success;
case Sema::NC_Unknown:
// It's not something we know about. Leave it unannotated.
break;
case Sema::NC_Type: {
SourceLocation BeginLoc = NameLoc;
if (SS.isNotEmpty())
BeginLoc = SS.getBeginLoc();
/// An Objective-C object type followed by '<' is a specialization of
/// a parameterized class type or a protocol-qualified type.
ParsedType Ty = Classification.getType();
if (getLangOpts().ObjC1 && NextToken().is(tok::less) &&
(Ty.get()->isObjCObjectType() ||
Ty.get()->isObjCObjectPointerType())) {
// Consume the name.
SourceLocation IdentifierLoc = ConsumeToken();
SourceLocation NewEndLoc;
TypeResult NewType
= parseObjCTypeArgsAndProtocolQualifiers(IdentifierLoc, Ty,
/*consumeLastToken=*/false,
NewEndLoc);
if (NewType.isUsable())
Ty = NewType.get();
else if (Tok.is(tok::eof)) // Nothing to do here, bail out...
return ANK_Error;
}
Tok.setKind(tok::annot_typename);
setTypeAnnotation(Tok, Ty);
Tok.setAnnotationEndLoc(Tok.getLocation());
Tok.setLocation(BeginLoc);
PP.AnnotateCachedTokens(Tok);
return ANK_Success;
}
case Sema::NC_Expression:
Tok.setKind(tok::annot_primary_expr);
setExprAnnotation(Tok, Classification.getExpression());
Tok.setAnnotationEndLoc(NameLoc);
if (SS.isNotEmpty())
Tok.setLocation(SS.getBeginLoc());
PP.AnnotateCachedTokens(Tok);
return ANK_Success;
case Sema::NC_TypeTemplate:
if (Next.isNot(tok::less)) {
// This may be a type template being used as a template template argument.
if (SS.isNotEmpty())
AnnotateScopeToken(SS, !WasScopeAnnotation);
return ANK_TemplateName;
}
// Fall through.
case Sema::NC_VarTemplate:
case Sema::NC_FunctionTemplate: {
// We have a type, variable or function template followed by '<'.
ConsumeToken();
UnqualifiedId Id;
Id.setIdentifier(Name, NameLoc);
if (AnnotateTemplateIdToken(
TemplateTy::make(Classification.getTemplateName()),
Classification.getTemplateNameKind(), SS, SourceLocation(), Id))
return ANK_Error;
return ANK_Success;
}
case Sema::NC_NestedNameSpecifier:
llvm_unreachable("already parsed nested name specifier");
}
// Unable to classify the name, but maybe we can annotate a scope specifier.
if (SS.isNotEmpty())
AnnotateScopeToken(SS, !WasScopeAnnotation);
return ANK_Unresolved;
}
bool Parser::TryKeywordIdentFallback(bool DisableKeyword) {
assert(Tok.isNot(tok::identifier));
Diag(Tok, diag::ext_keyword_as_ident)
<< PP.getSpelling(Tok)
<< DisableKeyword;
if (DisableKeyword)
Tok.getIdentifierInfo()->revertTokenIDToIdentifier();
Tok.setKind(tok::identifier);
return true;
}
/// TryAnnotateTypeOrScopeToken - If the current token position is on a
/// typename (possibly qualified in C++) or a C++ scope specifier not followed
/// by a typename, TryAnnotateTypeOrScopeToken will replace one or more tokens
/// with a single annotation token representing the typename or C++ scope
/// respectively.
/// This simplifies handling of C++ scope specifiers and allows efficient
/// backtracking without the need to re-parse and resolve nested-names and
/// typenames.
/// It will mainly be called when we expect to treat identifiers as typenames
/// (if they are typenames). For example, in C we do not expect identifiers
/// inside expressions to be treated as typenames so it will not be called
/// for expressions in C.
/// The benefit for C/ObjC is that a typename will be annotated and
/// Actions.getTypeName will not be needed to be called again (e.g. getTypeName
/// will not be called twice, once to check whether we have a declaration
/// specifier, and another one to get the actual type inside
/// ParseDeclarationSpecifiers).
///
/// This returns true if an error occurred.
///
/// Note that this routine emits an error if you call it with ::new or ::delete
/// as the current tokens, so only call it in contexts where these are invalid.
bool Parser::TryAnnotateTypeOrScopeToken() {
assert((Tok.is(tok::identifier) || Tok.is(tok::coloncolon) ||
Tok.is(tok::kw_typename) || Tok.is(tok::annot_cxxscope) ||
Tok.is(tok::kw_decltype) || Tok.is(tok::annot_template_id) ||
Tok.is(tok::kw___super)) &&
"Cannot be a type or scope token!");
if (Tok.is(tok::kw_typename)) {
// MSVC lets you do stuff like:
// typename typedef T_::D D;
//
// We will consume the typedef token here and put it back after we have
// parsed the first identifier, transforming it into something more like:
// typename T_::D typedef D;
if (getLangOpts().MSVCCompat && NextToken().is(tok::kw_typedef)) {
Token TypedefToken;
PP.Lex(TypedefToken);
bool Result = TryAnnotateTypeOrScopeToken();
PP.EnterToken(Tok);
Tok = TypedefToken;
if (!Result)
Diag(Tok.getLocation(), diag::warn_expected_qualified_after_typename);
return Result;
}
// Parse a C++ typename-specifier, e.g., "typename T::type".
//
// typename-specifier:
// 'typename' '::' [opt] nested-name-specifier identifier
// 'typename' '::' [opt] nested-name-specifier template [opt]
// simple-template-id
SourceLocation TypenameLoc = ConsumeToken();
CXXScopeSpec SS;
if (ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
/*EnteringContext=*/false, nullptr,
/*IsTypename*/ true))
return true;
if (!SS.isSet()) {
if (Tok.is(tok::identifier) || Tok.is(tok::annot_template_id) ||
Tok.is(tok::annot_decltype)) {
// Attempt to recover by skipping the invalid 'typename'
if (Tok.is(tok::annot_decltype) ||
(!TryAnnotateTypeOrScopeToken() && Tok.isAnnotation())) {
unsigned DiagID = diag::err_expected_qualified_after_typename;
// MS compatibility: MSVC permits using known types with typename.
// e.g. "typedef typename T* pointer_type"
if (getLangOpts().MicrosoftExt)
DiagID = diag::warn_expected_qualified_after_typename;
Diag(Tok.getLocation(), DiagID);
return false;
}
}
if (Tok.isEditorPlaceholder())
return true;
Diag(Tok.getLocation(), diag::err_expected_qualified_after_typename);
return true;
}
TypeResult Ty;
if (Tok.is(tok::identifier)) {
// FIXME: check whether the next token is '<', first!
Ty = Actions.ActOnTypenameType(getCurScope(), TypenameLoc, SS,
*Tok.getIdentifierInfo(),
Tok.getLocation());
} else if (Tok.is(tok::annot_template_id)) {
TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Tok);
if (TemplateId->Kind != TNK_Type_template &&
TemplateId->Kind != TNK_Dependent_template_name) {
Diag(Tok, diag::err_typename_refers_to_non_type_template)
<< Tok.getAnnotationRange();
return true;
}
ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
TemplateId->NumArgs);
Ty = Actions.ActOnTypenameType(getCurScope(), TypenameLoc, SS,
TemplateId->TemplateKWLoc,
TemplateId->Template,
TemplateId->Name,
TemplateId->TemplateNameLoc,
TemplateId->LAngleLoc,
TemplateArgsPtr,
TemplateId->RAngleLoc);
} else {
Diag(Tok, diag::err_expected_type_name_after_typename)
<< SS.getRange();
return true;
}
SourceLocation EndLoc = Tok.getLastLoc();
Tok.setKind(tok::annot_typename);
setTypeAnnotation(Tok, Ty.isInvalid() ? nullptr : Ty.get());
Tok.setAnnotationEndLoc(EndLoc);
Tok.setLocation(TypenameLoc);
PP.AnnotateCachedTokens(Tok);
return false;
}
// Remembers whether the token was originally a scope annotation.
bool WasScopeAnnotation = Tok.is(tok::annot_cxxscope);
CXXScopeSpec SS;
if (getLangOpts().CPlusPlus)
if (ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext*/false))
return true;
return TryAnnotateTypeOrScopeTokenAfterScopeSpec(SS, !WasScopeAnnotation);
}
/// \brief Try to annotate a type or scope token, having already parsed an
/// optional scope specifier. \p IsNewScope should be \c true unless the scope
/// specifier was extracted from an existing tok::annot_cxxscope annotation.
bool Parser::TryAnnotateTypeOrScopeTokenAfterScopeSpec(CXXScopeSpec &SS,
bool IsNewScope) {
if (Tok.is(tok::identifier)) {
// Determine whether the identifier is a type name.
if (ParsedType Ty = Actions.getTypeName(
*Tok.getIdentifierInfo(), Tok.getLocation(), getCurScope(), &SS,
false, NextToken().is(tok::period), nullptr,
/*IsCtorOrDtorName=*/false,
/*NonTrivialTypeSourceInfo*/ true,
/*IsClassTemplateDeductionContext*/GreaterThanIsOperator)) {
SourceLocation BeginLoc = Tok.getLocation();
if (SS.isNotEmpty()) // it was a C++ qualified type name.
BeginLoc = SS.getBeginLoc();
/// An Objective-C object type followed by '<' is a specialization of
/// a parameterized class type or a protocol-qualified type.
if (getLangOpts().ObjC1 && NextToken().is(tok::less) &&
(Ty.get()->isObjCObjectType() ||
Ty.get()->isObjCObjectPointerType())) {
// Consume the name.
SourceLocation IdentifierLoc = ConsumeToken();
SourceLocation NewEndLoc;
TypeResult NewType
= parseObjCTypeArgsAndProtocolQualifiers(IdentifierLoc, Ty,
/*consumeLastToken=*/false,
NewEndLoc);
if (NewType.isUsable())
Ty = NewType.get();
else if (Tok.is(tok::eof)) // Nothing to do here, bail out...
return false;
}
// This is a typename. Replace the current token in-place with an
// annotation type token.
Tok.setKind(tok::annot_typename);
setTypeAnnotation(Tok, Ty);
Tok.setAnnotationEndLoc(Tok.getLocation());
Tok.setLocation(BeginLoc);
// In case the tokens were cached, have Preprocessor replace
// them with the annotation token.
PP.AnnotateCachedTokens(Tok);
return false;
}
if (!getLangOpts().CPlusPlus) {
// If we're in C, we can't have :: tokens at all (the lexer won't return
// them). If the identifier is not a type, then it can't be scope either,
// just early exit.
return false;
}
// If this is a template-id, annotate with a template-id or type token.
if (NextToken().is(tok::less)) {
TemplateTy Template;
UnqualifiedId TemplateName;
TemplateName.setIdentifier(Tok.getIdentifierInfo(), Tok.getLocation());
bool MemberOfUnknownSpecialization;
if (TemplateNameKind TNK = Actions.isTemplateName(
getCurScope(), SS,
/*hasTemplateKeyword=*/false, TemplateName,
/*ObjectType=*/nullptr, /*EnteringContext*/false, Template,
MemberOfUnknownSpecialization)) {
// Consume the identifier.
ConsumeToken();
if (AnnotateTemplateIdToken(Template, TNK, SS, SourceLocation(),
TemplateName)) {
// If an unrecoverable error occurred, we need to return true here,
// because the token stream is in a damaged state. We may not return
// a valid identifier.
return true;
}
}
}
// The current token, which is either an identifier or a
// template-id, is not part of the annotation. Fall through to
// push that token back into the stream and complete the C++ scope
// specifier annotation.
}
if (Tok.is(tok::annot_template_id)) {
TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Tok);
if (TemplateId->Kind == TNK_Type_template) {
// A template-id that refers to a type was parsed into a
// template-id annotation in a context where we weren't allowed
// to produce a type annotation token. Update the template-id
// annotation token to a type annotation token now.
AnnotateTemplateIdTokenAsType();
return false;
}
}
if (SS.isEmpty())
return false;
// A C++ scope specifier that isn't followed by a typename.
AnnotateScopeToken(SS, IsNewScope);
return false;
}
/// TryAnnotateScopeToken - Like TryAnnotateTypeOrScopeToken but only
/// annotates C++ scope specifiers and template-ids. This returns
/// true if there was an error that could not be recovered from.
///
/// Note that this routine emits an error if you call it with ::new or ::delete
/// as the current tokens, so only call it in contexts where these are invalid.
bool Parser::TryAnnotateCXXScopeToken(bool EnteringContext) {
assert(getLangOpts().CPlusPlus &&
"Call sites of this function should be guarded by checking for C++");
assert((Tok.is(tok::identifier) || Tok.is(tok::coloncolon) ||
(Tok.is(tok::annot_template_id) && NextToken().is(tok::coloncolon)) ||
Tok.is(tok::kw_decltype) || Tok.is(tok::kw___super)) &&
"Cannot be a type or scope token!");
CXXScopeSpec SS;
if (ParseOptionalCXXScopeSpecifier(SS, nullptr, EnteringContext))
return true;
if (SS.isEmpty())
return false;
AnnotateScopeToken(SS, true);
return false;
}
bool Parser::isTokenEqualOrEqualTypo() {
tok::TokenKind Kind = Tok.getKind();
switch (Kind) {
default:
return false;
case tok::ampequal: // &=
case tok::starequal: // *=
case tok::plusequal: // +=
case tok::minusequal: // -=
case tok::exclaimequal: // !=
case tok::slashequal: // /=
case tok::percentequal: // %=
case tok::lessequal: // <=
case tok::lesslessequal: // <<=
case tok::greaterequal: // >=
case tok::greatergreaterequal: // >>=
case tok::caretequal: // ^=
case tok::pipeequal: // |=
case tok::equalequal: // ==
Diag(Tok, diag::err_invalid_token_after_declarator_suggest_equal)
<< Kind
<< FixItHint::CreateReplacement(SourceRange(Tok.getLocation()), "=");
LLVM_FALLTHROUGH;
case tok::equal:
return true;
}
}
SourceLocation Parser::handleUnexpectedCodeCompletionToken() {
assert(Tok.is(tok::code_completion));
PrevTokLocation = Tok.getLocation();
for (Scope *S = getCurScope(); S; S = S->getParent()) {
if (S->getFlags() & Scope::FnScope) {
Actions.CodeCompleteOrdinaryName(getCurScope(),
Sema::PCC_RecoveryInFunction);
cutOffParsing();
return PrevTokLocation;
}
if (S->getFlags() & Scope::ClassScope) {
Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Class);
cutOffParsing();
return PrevTokLocation;
}
}
Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Namespace);
cutOffParsing();
return PrevTokLocation;
}
// Code-completion pass-through functions
void Parser::CodeCompleteDirective(bool InConditional) {
Actions.CodeCompletePreprocessorDirective(InConditional);
}
void Parser::CodeCompleteInConditionalExclusion() {
Actions.CodeCompleteInPreprocessorConditionalExclusion(getCurScope());
}
void Parser::CodeCompleteMacroName(bool IsDefinition) {
Actions.CodeCompletePreprocessorMacroName(IsDefinition);
}
void Parser::CodeCompletePreprocessorExpression() {
Actions.CodeCompletePreprocessorExpression();
}
void Parser::CodeCompleteMacroArgument(IdentifierInfo *Macro,
MacroInfo *MacroInfo,
unsigned ArgumentIndex) {
Actions.CodeCompletePreprocessorMacroArgument(getCurScope(), Macro, MacroInfo,
ArgumentIndex);
}
void Parser::CodeCompleteNaturalLanguage() {
Actions.CodeCompleteNaturalLanguage();
}
bool Parser::ParseMicrosoftIfExistsCondition(IfExistsCondition& Result) {
assert((Tok.is(tok::kw___if_exists) || Tok.is(tok::kw___if_not_exists)) &&
"Expected '__if_exists' or '__if_not_exists'");
Result.IsIfExists = Tok.is(tok::kw___if_exists);
Result.KeywordLoc = ConsumeToken();
BalancedDelimiterTracker T(*this, tok::l_paren);
if (T.consumeOpen()) {
Diag(Tok, diag::err_expected_lparen_after)
<< (Result.IsIfExists? "__if_exists" : "__if_not_exists");
return true;
}
// Parse nested-name-specifier.
if (getLangOpts().CPlusPlus)
ParseOptionalCXXScopeSpecifier(Result.SS, nullptr,
/*EnteringContext=*/false);
// Check nested-name specifier.
if (Result.SS.isInvalid()) {
T.skipToEnd();
return true;
}
// Parse the unqualified-id.
SourceLocation TemplateKWLoc; // FIXME: parsed, but unused.
if (ParseUnqualifiedId(
Result.SS, /*EnteringContext*/false, /*AllowDestructorName*/true,
/*AllowConstructorName*/true, /*AllowDeductionGuide*/false, nullptr,
TemplateKWLoc, Result.Name)) {
T.skipToEnd();
return true;
}
if (T.consumeClose())
return true;
// Check if the symbol exists.
switch (Actions.CheckMicrosoftIfExistsSymbol(getCurScope(), Result.KeywordLoc,
Result.IsIfExists, Result.SS,
Result.Name)) {
case Sema::IER_Exists:
Result.Behavior = Result.IsIfExists ? IEB_Parse : IEB_Skip;
break;
case Sema::IER_DoesNotExist:
Result.Behavior = !Result.IsIfExists ? IEB_Parse : IEB_Skip;
break;
case Sema::IER_Dependent:
Result.Behavior = IEB_Dependent;
break;
case Sema::IER_Error:
return true;
}
return false;
}
void Parser::ParseMicrosoftIfExistsExternalDeclaration() {
IfExistsCondition Result;
if (ParseMicrosoftIfExistsCondition(Result))
return;
BalancedDelimiterTracker Braces(*this, tok::l_brace);
if (Braces.consumeOpen()) {
Diag(Tok, diag::err_expected) << tok::l_brace;
return;
}
switch (Result.Behavior) {
case IEB_Parse:
// Parse declarations below.
break;
case IEB_Dependent:
llvm_unreachable("Cannot have a dependent external declaration");
case IEB_Skip:
Braces.skipToEnd();
return;
}
// Parse the declarations.
// FIXME: Support module import within __if_exists?
while (Tok.isNot(tok::r_brace) && !isEofOrEom()) {
ParsedAttributesWithRange attrs(AttrFactory);
MaybeParseCXX11Attributes(attrs);
DeclGroupPtrTy Result = ParseExternalDeclaration(attrs);
if (Result && !getCurScope()->getParent())
Actions.getASTConsumer().HandleTopLevelDecl(Result.get());
}
Braces.consumeClose();
}
/// Parse a C++ Modules TS module declaration, which appears at the beginning
/// of a module interface, module partition, or module implementation file.
///
/// module-declaration: [Modules TS + P0273R0 + P0629R0]
/// 'export'[opt] 'module' 'partition'[opt]
/// module-name attribute-specifier-seq[opt] ';'
///
/// Note that 'partition' is a context-sensitive keyword.
Parser::DeclGroupPtrTy Parser::ParseModuleDecl() {
SourceLocation StartLoc = Tok.getLocation();
Sema::ModuleDeclKind MDK = TryConsumeToken(tok::kw_export)
? Sema::ModuleDeclKind::Module
: Sema::ModuleDeclKind::Implementation;
assert(Tok.is(tok::kw_module) && "not a module declaration");
SourceLocation ModuleLoc = ConsumeToken();
if (Tok.is(tok::identifier) && NextToken().is(tok::identifier) &&
Tok.getIdentifierInfo()->isStr("partition")) {
// If 'partition' is present, this must be a module interface unit.
if (MDK != Sema::ModuleDeclKind::Module)
Diag(Tok.getLocation(), diag::err_module_implementation_partition)
<< FixItHint::CreateInsertion(ModuleLoc, "export ");
MDK = Sema::ModuleDeclKind::Partition;
ConsumeToken();
}
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
if (ParseModuleName(ModuleLoc, Path, /*IsImport*/false))
return nullptr;
// We don't support any module attributes yet; just parse them and diagnose.
ParsedAttributesWithRange Attrs(AttrFactory);
MaybeParseCXX11Attributes(Attrs);
ProhibitCXX11Attributes(Attrs, diag::err_attribute_not_module_attr);
ExpectAndConsumeSemi(diag::err_module_expected_semi);
return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path);
}
/// Parse a module import declaration. This is essentially the same for
/// Objective-C and the C++ Modules TS, except for the leading '@' (in ObjC)
/// and the trailing optional attributes (in C++).
///
/// [ObjC] @import declaration:
/// '@' 'import' module-name ';'
/// [ModTS] module-import-declaration:
/// 'import' module-name attribute-specifier-seq[opt] ';'
Parser::DeclGroupPtrTy Parser::ParseModuleImport(SourceLocation AtLoc) {
assert((AtLoc.isInvalid() ? Tok.is(tok::kw_import)
: Tok.isObjCAtKeyword(tok::objc_import)) &&
"Improper start to module import");
SourceLocation ImportLoc = ConsumeToken();
SourceLocation StartLoc = AtLoc.isInvalid() ? ImportLoc : AtLoc;
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
if (ParseModuleName(ImportLoc, Path, /*IsImport*/true))
return nullptr;
ParsedAttributesWithRange Attrs(AttrFactory);
MaybeParseCXX11Attributes(Attrs);
// We don't support any module import attributes yet.
ProhibitCXX11Attributes(Attrs, diag::err_attribute_not_import_attr);
if (PP.hadModuleLoaderFatalFailure()) {
// With a fatal failure in the module loader, we abort parsing.
cutOffParsing();
return nullptr;
}
DeclResult Import = Actions.ActOnModuleImport(StartLoc, ImportLoc, Path);
ExpectAndConsumeSemi(diag::err_module_expected_semi);
if (Import.isInvalid())
return nullptr;
return Actions.ConvertDeclToDeclGroup(Import.get());
}
/// Parse a C++ Modules TS / Objective-C module name (both forms use the same
/// grammar).
///
/// module-name:
/// module-name-qualifier[opt] identifier
/// module-name-qualifier:
/// module-name-qualifier[opt] identifier '.'
bool Parser::ParseModuleName(
SourceLocation UseLoc,
SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path,
bool IsImport) {
// Parse the module path.
while (true) {
if (!Tok.is(tok::identifier)) {
if (Tok.is(tok::code_completion)) {
Actions.CodeCompleteModuleImport(UseLoc, Path);
cutOffParsing();
return true;
}
Diag(Tok, diag::err_module_expected_ident) << IsImport;
SkipUntil(tok::semi);
return true;
}
// Record this part of the module path.
Path.push_back(std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation()));
ConsumeToken();
if (Tok.isNot(tok::period))
return false;
ConsumeToken();
}
}
/// \brief Try recover parser when module annotation appears where it must not
/// be found.
/// \returns false if the recover was successful and parsing may be continued, or
/// true if parser must bail out to top level and handle the token there.
bool Parser::parseMisplacedModuleImport() {
while (true) {
switch (Tok.getKind()) {
case tok::annot_module_end:
// If we recovered from a misplaced module begin, we expect to hit a
// misplaced module end too. Stay in the current context when this
// happens.
if (MisplacedModuleBeginCount) {
--MisplacedModuleBeginCount;
Actions.ActOnModuleEnd(Tok.getLocation(),
reinterpret_cast<Module *>(
Tok.getAnnotationValue()));
ConsumeAnnotationToken();
continue;
}
// Inform caller that recovery failed, the error must be handled at upper
// level. This will generate the desired "missing '}' at end of module"
// diagnostics on the way out.
return true;
case tok::annot_module_begin:
// Recover by entering the module (Sema will diagnose).
Actions.ActOnModuleBegin(Tok.getLocation(),
reinterpret_cast<Module *>(
Tok.getAnnotationValue()));
ConsumeAnnotationToken();
++MisplacedModuleBeginCount;
continue;
case tok::annot_module_include:
// Module import found where it should not be, for instance, inside a
// namespace. Recover by importing the module.
Actions.ActOnModuleInclude(Tok.getLocation(),
reinterpret_cast<Module *>(
Tok.getAnnotationValue()));
ConsumeAnnotationToken();
// If there is another module import, process it.
continue;
default:
return false;
}
}
return false;
}
bool BalancedDelimiterTracker::diagnoseOverflow() {
P.Diag(P.Tok, diag::err_bracket_depth_exceeded)
<< P.getLangOpts().BracketDepth;
P.Diag(P.Tok, diag::note_bracket_depth);
P.cutOffParsing();
return true;
}
bool BalancedDelimiterTracker::expectAndConsume(unsigned DiagID,
const char *Msg,
tok::TokenKind SkipToTok) {
LOpen = P.Tok.getLocation();
if (P.ExpectAndConsume(Kind, DiagID, Msg)) {
if (SkipToTok != tok::unknown)
P.SkipUntil(SkipToTok, Parser::StopAtSemi);
return true;
}
if (getDepth() < MaxDepth)
return false;
return diagnoseOverflow();
}
bool BalancedDelimiterTracker::diagnoseMissingClose() {
assert(!P.Tok.is(Close) && "Should have consumed closing delimiter");
if (P.Tok.is(tok::annot_module_end))
P.Diag(P.Tok, diag::err_missing_before_module_end) << Close;
else
P.Diag(P.Tok, diag::err_expected) << Close;
P.Diag(LOpen, diag::note_matching) << Kind;
// If we're not already at some kind of closing bracket, skip to our closing
// token.
if (P.Tok.isNot(tok::r_paren) && P.Tok.isNot(tok::r_brace) &&
P.Tok.isNot(tok::r_square) &&
P.SkipUntil(Close, FinalToken,
Parser::StopAtSemi | Parser::StopBeforeMatch) &&
P.Tok.is(Close))
LClose = P.ConsumeAnyToken();
return true;
}
void BalancedDelimiterTracker::skipToEnd() {
P.SkipUntil(Close, Parser::StopBeforeMatch);
consumeClose();
}
Index: head/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp (revision 322855)
@@ -1,14920 +1,14970 @@
//===------ SemaDeclCXX.cpp - Semantic Analysis for C++ Declarations ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements semantic analysis for C++ declarations.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTLambda.h"
#include "clang/AST/ASTMutationListener.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/EvaluatedExprVisitor.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/AST/TypeLoc.h"
#include "clang/AST/TypeOrdering.h"
#include "clang/Basic/PartialDiagnostic.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/CXXFieldCollector.h"
#include "clang/Sema/DeclSpec.h"
#include "clang/Sema/Initialization.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/ParsedTemplate.h"
#include "clang/Sema/Scope.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/SemaInternal.h"
#include "clang/Sema/Template.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include <map>
#include <set>
using namespace clang;
//===----------------------------------------------------------------------===//
// CheckDefaultArgumentVisitor
//===----------------------------------------------------------------------===//
namespace {
/// CheckDefaultArgumentVisitor - C++ [dcl.fct.default] Traverses
/// the default argument of a parameter to determine whether it
/// contains any ill-formed subexpressions. For example, this will
/// diagnose the use of local variables or parameters within the
/// default argument expression.
class CheckDefaultArgumentVisitor
: public StmtVisitor<CheckDefaultArgumentVisitor, bool> {
Expr *DefaultArg;
Sema *S;
public:
CheckDefaultArgumentVisitor(Expr *defarg, Sema *s)
: DefaultArg(defarg), S(s) {}
bool VisitExpr(Expr *Node);
bool VisitDeclRefExpr(DeclRefExpr *DRE);
bool VisitCXXThisExpr(CXXThisExpr *ThisE);
bool VisitLambdaExpr(LambdaExpr *Lambda);
bool VisitPseudoObjectExpr(PseudoObjectExpr *POE);
};
/// VisitExpr - Visit all of the children of this expression.
bool CheckDefaultArgumentVisitor::VisitExpr(Expr *Node) {
bool IsInvalid = false;
for (Stmt *SubStmt : Node->children())
IsInvalid |= Visit(SubStmt);
return IsInvalid;
}
/// VisitDeclRefExpr - Visit a reference to a declaration, to
/// determine whether this declaration can be used in the default
/// argument expression.
bool CheckDefaultArgumentVisitor::VisitDeclRefExpr(DeclRefExpr *DRE) {
NamedDecl *Decl = DRE->getDecl();
if (ParmVarDecl *Param = dyn_cast<ParmVarDecl>(Decl)) {
// C++ [dcl.fct.default]p9
// Default arguments are evaluated each time the function is
// called. The order of evaluation of function arguments is
// unspecified. Consequently, parameters of a function shall not
// be used in default argument expressions, even if they are not
// evaluated. Parameters of a function declared before a default
// argument expression are in scope and can hide namespace and
// class member names.
return S->Diag(DRE->getLocStart(),
diag::err_param_default_argument_references_param)
<< Param->getDeclName() << DefaultArg->getSourceRange();
} else if (VarDecl *VDecl = dyn_cast<VarDecl>(Decl)) {
// C++ [dcl.fct.default]p7
// Local variables shall not be used in default argument
// expressions.
if (VDecl->isLocalVarDecl())
return S->Diag(DRE->getLocStart(),
diag::err_param_default_argument_references_local)
<< VDecl->getDeclName() << DefaultArg->getSourceRange();
}
return false;
}
/// VisitCXXThisExpr - Visit a C++ "this" expression.
bool CheckDefaultArgumentVisitor::VisitCXXThisExpr(CXXThisExpr *ThisE) {
// C++ [dcl.fct.default]p8:
// The keyword this shall not be used in a default argument of a
// member function.
return S->Diag(ThisE->getLocStart(),
diag::err_param_default_argument_references_this)
<< ThisE->getSourceRange();
}
bool CheckDefaultArgumentVisitor::VisitPseudoObjectExpr(PseudoObjectExpr *POE) {
bool Invalid = false;
for (PseudoObjectExpr::semantics_iterator
i = POE->semantics_begin(), e = POE->semantics_end(); i != e; ++i) {
Expr *E = *i;
// Look through bindings.
if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) {
E = OVE->getSourceExpr();
assert(E && "pseudo-object binding without source expression?");
}
Invalid |= Visit(E);
}
return Invalid;
}
bool CheckDefaultArgumentVisitor::VisitLambdaExpr(LambdaExpr *Lambda) {
// C++11 [expr.lambda.prim]p13:
// A lambda-expression appearing in a default argument shall not
// implicitly or explicitly capture any entity.
if (Lambda->capture_begin() == Lambda->capture_end())
return false;
return S->Diag(Lambda->getLocStart(),
diag::err_lambda_capture_default_arg);
}
}
void
Sema::ImplicitExceptionSpecification::CalledDecl(SourceLocation CallLoc,
const CXXMethodDecl *Method) {
// If we have an MSAny spec already, don't bother.
if (!Method || ComputedEST == EST_MSAny)
return;
const FunctionProtoType *Proto
= Method->getType()->getAs<FunctionProtoType>();
Proto = Self->ResolveExceptionSpec(CallLoc, Proto);
if (!Proto)
return;
ExceptionSpecificationType EST = Proto->getExceptionSpecType();
// If we have a throw-all spec at this point, ignore the function.
if (ComputedEST == EST_None)
return;
switch(EST) {
// If this function can throw any exceptions, make a note of that.
case EST_MSAny:
case EST_None:
ClearExceptions();
ComputedEST = EST;
return;
// FIXME: If the call to this decl is using any of its default arguments, we
// need to search them for potentially-throwing calls.
// If this function has a basic noexcept, it doesn't affect the outcome.
case EST_BasicNoexcept:
return;
// If we're still at noexcept(true) and there's a nothrow() callee,
// change to that specification.
case EST_DynamicNone:
if (ComputedEST == EST_BasicNoexcept)
ComputedEST = EST_DynamicNone;
return;
// Check out noexcept specs.
case EST_ComputedNoexcept:
{
FunctionProtoType::NoexceptResult NR =
Proto->getNoexceptSpec(Self->Context);
assert(NR != FunctionProtoType::NR_NoNoexcept &&
"Must have noexcept result for EST_ComputedNoexcept.");
assert(NR != FunctionProtoType::NR_Dependent &&
"Should not generate implicit declarations for dependent cases, "
"and don't know how to handle them anyway.");
// noexcept(false) -> no spec on the new function
if (NR == FunctionProtoType::NR_Throw) {
ClearExceptions();
ComputedEST = EST_None;
}
// noexcept(true) won't change anything either.
return;
}
default:
break;
}
assert(EST == EST_Dynamic && "EST case not considered earlier.");
assert(ComputedEST != EST_None &&
"Shouldn't collect exceptions when throw-all is guaranteed.");
ComputedEST = EST_Dynamic;
// Record the exceptions in this function's exception specification.
for (const auto &E : Proto->exceptions())
if (ExceptionsSeen.insert(Self->Context.getCanonicalType(E)).second)
Exceptions.push_back(E);
}
void Sema::ImplicitExceptionSpecification::CalledExpr(Expr *E) {
if (!E || ComputedEST == EST_MSAny)
return;
// FIXME:
//
// C++0x [except.spec]p14:
// [An] implicit exception-specification specifies the type-id T if and
// only if T is allowed by the exception-specification of a function directly
// invoked by f's implicit definition; f shall allow all exceptions if any
// function it directly invokes allows all exceptions, and f shall allow no
// exceptions if every function it directly invokes allows no exceptions.
//
// Note in particular that if an implicit exception-specification is generated
// for a function containing a throw-expression, that specification can still
// be noexcept(true).
//
// Note also that 'directly invoked' is not defined in the standard, and there
// is no indication that we should only consider potentially-evaluated calls.
//
// Ultimately we should implement the intent of the standard: the exception
// specification should be the set of exceptions which can be thrown by the
// implicit definition. For now, we assume that any non-nothrow expression can
// throw any exception.
if (Self->canThrow(E))
ComputedEST = EST_None;
}
bool
Sema::SetParamDefaultArgument(ParmVarDecl *Param, Expr *Arg,
SourceLocation EqualLoc) {
if (RequireCompleteType(Param->getLocation(), Param->getType(),
diag::err_typecheck_decl_incomplete_type)) {
Param->setInvalidDecl();
return true;
}
// C++ [dcl.fct.default]p5
// A default argument expression is implicitly converted (clause
// 4) to the parameter type. The default argument expression has
// the same semantic constraints as the initializer expression in
// a declaration of a variable of the parameter type, using the
// copy-initialization semantics (8.5).
InitializedEntity Entity = InitializedEntity::InitializeParameter(Context,
Param);
InitializationKind Kind = InitializationKind::CreateCopy(Param->getLocation(),
EqualLoc);
InitializationSequence InitSeq(*this, Entity, Kind, Arg);
ExprResult Result = InitSeq.Perform(*this, Entity, Kind, Arg);
if (Result.isInvalid())
return true;
Arg = Result.getAs<Expr>();
CheckCompletedExpr(Arg, EqualLoc);
Arg = MaybeCreateExprWithCleanups(Arg);
// Okay: add the default argument to the parameter
Param->setDefaultArg(Arg);
// We have already instantiated this parameter; provide each of the
// instantiations with the uninstantiated default argument.
UnparsedDefaultArgInstantiationsMap::iterator InstPos
= UnparsedDefaultArgInstantiations.find(Param);
if (InstPos != UnparsedDefaultArgInstantiations.end()) {
for (unsigned I = 0, N = InstPos->second.size(); I != N; ++I)
InstPos->second[I]->setUninstantiatedDefaultArg(Arg);
// We're done tracking this parameter's instantiations.
UnparsedDefaultArgInstantiations.erase(InstPos);
}
return false;
}
/// ActOnParamDefaultArgument - Check whether the default argument
/// provided for a function parameter is well-formed. If so, attach it
/// to the parameter declaration.
void
Sema::ActOnParamDefaultArgument(Decl *param, SourceLocation EqualLoc,
Expr *DefaultArg) {
if (!param || !DefaultArg)
return;
ParmVarDecl *Param = cast<ParmVarDecl>(param);
UnparsedDefaultArgLocs.erase(Param);
// Default arguments are only permitted in C++
if (!getLangOpts().CPlusPlus) {
Diag(EqualLoc, diag::err_param_default_argument)
<< DefaultArg->getSourceRange();
Param->setInvalidDecl();
return;
}
// Check for unexpanded parameter packs.
if (DiagnoseUnexpandedParameterPack(DefaultArg, UPPC_DefaultArgument)) {
Param->setInvalidDecl();
return;
}
// C++11 [dcl.fct.default]p3
// A default argument expression [...] shall not be specified for a
// parameter pack.
if (Param->isParameterPack()) {
Diag(EqualLoc, diag::err_param_default_argument_on_parameter_pack)
<< DefaultArg->getSourceRange();
return;
}
// Check that the default argument is well-formed
CheckDefaultArgumentVisitor DefaultArgChecker(DefaultArg, this);
if (DefaultArgChecker.Visit(DefaultArg)) {
Param->setInvalidDecl();
return;
}
SetParamDefaultArgument(Param, DefaultArg, EqualLoc);
}
/// ActOnParamUnparsedDefaultArgument - We've seen a default
/// argument for a function parameter, but we can't parse it yet
/// because we're inside a class definition. Note that this default
/// argument will be parsed later.
void Sema::ActOnParamUnparsedDefaultArgument(Decl *param,
SourceLocation EqualLoc,
SourceLocation ArgLoc) {
if (!param)
return;
ParmVarDecl *Param = cast<ParmVarDecl>(param);
Param->setUnparsedDefaultArg();
UnparsedDefaultArgLocs[Param] = ArgLoc;
}
/// ActOnParamDefaultArgumentError - Parsing or semantic analysis of
/// the default argument for the parameter param failed.
void Sema::ActOnParamDefaultArgumentError(Decl *param,
SourceLocation EqualLoc) {
if (!param)
return;
ParmVarDecl *Param = cast<ParmVarDecl>(param);
Param->setInvalidDecl();
UnparsedDefaultArgLocs.erase(Param);
Param->setDefaultArg(new(Context)
OpaqueValueExpr(EqualLoc,
Param->getType().getNonReferenceType(),
VK_RValue));
}
/// CheckExtraCXXDefaultArguments - Check for any extra default
/// arguments in the declarator, which is not a function declaration
/// or definition and therefore is not permitted to have default
/// arguments. This routine should be invoked for every declarator
/// that is not a function declaration or definition.
void Sema::CheckExtraCXXDefaultArguments(Declarator &D) {
// C++ [dcl.fct.default]p3
// A default argument expression shall be specified only in the
// parameter-declaration-clause of a function declaration or in a
// template-parameter (14.1). It shall not be specified for a
// parameter pack. If it is specified in a
// parameter-declaration-clause, it shall not occur within a
// declarator or abstract-declarator of a parameter-declaration.
bool MightBeFunction = D.isFunctionDeclarationContext();
for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
DeclaratorChunk &chunk = D.getTypeObject(i);
if (chunk.Kind == DeclaratorChunk::Function) {
if (MightBeFunction) {
// This is a function declaration. It can have default arguments, but
// keep looking in case its return type is a function type with default
// arguments.
MightBeFunction = false;
continue;
}
for (unsigned argIdx = 0, e = chunk.Fun.NumParams; argIdx != e;
++argIdx) {
ParmVarDecl *Param = cast<ParmVarDecl>(chunk.Fun.Params[argIdx].Param);
if (Param->hasUnparsedDefaultArg()) {
std::unique_ptr<CachedTokens> Toks =
std::move(chunk.Fun.Params[argIdx].DefaultArgTokens);
SourceRange SR;
if (Toks->size() > 1)
SR = SourceRange((*Toks)[1].getLocation(),
Toks->back().getLocation());
else
SR = UnparsedDefaultArgLocs[Param];
Diag(Param->getLocation(), diag::err_param_default_argument_nonfunc)
<< SR;
} else if (Param->getDefaultArg()) {
Diag(Param->getLocation(), diag::err_param_default_argument_nonfunc)
<< Param->getDefaultArg()->getSourceRange();
Param->setDefaultArg(nullptr);
}
}
} else if (chunk.Kind != DeclaratorChunk::Paren) {
MightBeFunction = false;
}
}
}
static bool functionDeclHasDefaultArgument(const FunctionDecl *FD) {
for (unsigned NumParams = FD->getNumParams(); NumParams > 0; --NumParams) {
const ParmVarDecl *PVD = FD->getParamDecl(NumParams-1);
if (!PVD->hasDefaultArg())
return false;
if (!PVD->hasInheritedDefaultArg())
return true;
}
return false;
}
/// MergeCXXFunctionDecl - Merge two declarations of the same C++
/// function, once we already know that they have the same
/// type. Subroutine of MergeFunctionDecl. Returns true if there was an
/// error, false otherwise.
bool Sema::MergeCXXFunctionDecl(FunctionDecl *New, FunctionDecl *Old,
Scope *S) {
bool Invalid = false;
// The declaration context corresponding to the scope is the semantic
// parent, unless this is a local function declaration, in which case
// it is that surrounding function.
DeclContext *ScopeDC = New->isLocalExternDecl()
? New->getLexicalDeclContext()
: New->getDeclContext();
// Find the previous declaration for the purpose of default arguments.
FunctionDecl *PrevForDefaultArgs = Old;
for (/**/; PrevForDefaultArgs;
// Don't bother looking back past the latest decl if this is a local
// extern declaration; nothing else could work.
PrevForDefaultArgs = New->isLocalExternDecl()
? nullptr
: PrevForDefaultArgs->getPreviousDecl()) {
// Ignore hidden declarations.
if (!LookupResult::isVisible(*this, PrevForDefaultArgs))
continue;
if (S && !isDeclInScope(PrevForDefaultArgs, ScopeDC, S) &&
!New->isCXXClassMember()) {
// Ignore default arguments of old decl if they are not in
// the same scope and this is not an out-of-line definition of
// a member function.
continue;
}
if (PrevForDefaultArgs->isLocalExternDecl() != New->isLocalExternDecl()) {
// If only one of these is a local function declaration, then they are
// declared in different scopes, even though isDeclInScope may think
// they're in the same scope. (If both are local, the scope check is
// sufficient, and if neither is local, then they are in the same scope.)
continue;
}
// We found the right previous declaration.
break;
}
// C++ [dcl.fct.default]p4:
// For non-template functions, default arguments can be added in
// later declarations of a function in the same
// scope. Declarations in different scopes have completely
// distinct sets of default arguments. That is, declarations in
// inner scopes do not acquire default arguments from
// declarations in outer scopes, and vice versa. In a given
// function declaration, all parameters subsequent to a
// parameter with a default argument shall have default
// arguments supplied in this or previous declarations. A
// default argument shall not be redefined by a later
// declaration (not even to the same value).
//
// C++ [dcl.fct.default]p6:
// Except for member functions of class templates, the default arguments
// in a member function definition that appears outside of the class
// definition are added to the set of default arguments provided by the
// member function declaration in the class definition.
for (unsigned p = 0, NumParams = PrevForDefaultArgs
? PrevForDefaultArgs->getNumParams()
: 0;
p < NumParams; ++p) {
ParmVarDecl *OldParam = PrevForDefaultArgs->getParamDecl(p);
ParmVarDecl *NewParam = New->getParamDecl(p);
bool OldParamHasDfl = OldParam ? OldParam->hasDefaultArg() : false;
bool NewParamHasDfl = NewParam->hasDefaultArg();
if (OldParamHasDfl && NewParamHasDfl) {
unsigned DiagDefaultParamID =
diag::err_param_default_argument_redefinition;
// MSVC accepts that default parameters be redefined for member functions
// of template class. The new default parameter's value is ignored.
Invalid = true;
if (getLangOpts().MicrosoftExt) {
CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(New);
if (MD && MD->getParent()->getDescribedClassTemplate()) {
// Merge the old default argument into the new parameter.
NewParam->setHasInheritedDefaultArg();
if (OldParam->hasUninstantiatedDefaultArg())
NewParam->setUninstantiatedDefaultArg(
OldParam->getUninstantiatedDefaultArg());
else
NewParam->setDefaultArg(OldParam->getInit());
DiagDefaultParamID = diag::ext_param_default_argument_redefinition;
Invalid = false;
}
}
// FIXME: If we knew where the '=' was, we could easily provide a fix-it
// hint here. Alternatively, we could walk the type-source information
// for NewParam to find the last source location in the type... but it
// isn't worth the effort right now. This is the kind of test case that
// is hard to get right:
// int f(int);
// void g(int (*fp)(int) = f);
// void g(int (*fp)(int) = &f);
Diag(NewParam->getLocation(), DiagDefaultParamID)
<< NewParam->getDefaultArgRange();
// Look for the function declaration where the default argument was
// actually written, which may be a declaration prior to Old.
for (auto Older = PrevForDefaultArgs;
OldParam->hasInheritedDefaultArg(); /**/) {
Older = Older->getPreviousDecl();
OldParam = Older->getParamDecl(p);
}
Diag(OldParam->getLocation(), diag::note_previous_definition)
<< OldParam->getDefaultArgRange();
} else if (OldParamHasDfl) {
// Merge the old default argument into the new parameter unless the new
// function is a friend declaration in a template class. In the latter
// case the default arguments will be inherited when the friend
// declaration will be instantiated.
if (New->getFriendObjectKind() == Decl::FOK_None ||
!New->getLexicalDeclContext()->isDependentContext()) {
// It's important to use getInit() here; getDefaultArg()
// strips off any top-level ExprWithCleanups.
NewParam->setHasInheritedDefaultArg();
if (OldParam->hasUnparsedDefaultArg())
NewParam->setUnparsedDefaultArg();
else if (OldParam->hasUninstantiatedDefaultArg())
NewParam->setUninstantiatedDefaultArg(
OldParam->getUninstantiatedDefaultArg());
else
NewParam->setDefaultArg(OldParam->getInit());
}
} else if (NewParamHasDfl) {
if (New->getDescribedFunctionTemplate()) {
// Paragraph 4, quoted above, only applies to non-template functions.
Diag(NewParam->getLocation(),
diag::err_param_default_argument_template_redecl)
<< NewParam->getDefaultArgRange();
Diag(PrevForDefaultArgs->getLocation(),
diag::note_template_prev_declaration)
<< false;
} else if (New->getTemplateSpecializationKind()
!= TSK_ImplicitInstantiation &&
New->getTemplateSpecializationKind() != TSK_Undeclared) {
// C++ [temp.expr.spec]p21:
// Default function arguments shall not be specified in a declaration
// or a definition for one of the following explicit specializations:
// - the explicit specialization of a function template;
// - the explicit specialization of a member function template;
// - the explicit specialization of a member function of a class
// template where the class template specialization to which the
// member function specialization belongs is implicitly
// instantiated.
Diag(NewParam->getLocation(), diag::err_template_spec_default_arg)
<< (New->getTemplateSpecializationKind() ==TSK_ExplicitSpecialization)
<< New->getDeclName()
<< NewParam->getDefaultArgRange();
} else if (New->getDeclContext()->isDependentContext()) {
// C++ [dcl.fct.default]p6 (DR217):
// Default arguments for a member function of a class template shall
// be specified on the initial declaration of the member function
// within the class template.
//
// Reading the tea leaves a bit in DR217 and its reference to DR205
// leads me to the conclusion that one cannot add default function
// arguments for an out-of-line definition of a member function of a
// dependent type.
int WhichKind = 2;
if (CXXRecordDecl *Record
= dyn_cast<CXXRecordDecl>(New->getDeclContext())) {
if (Record->getDescribedClassTemplate())
WhichKind = 0;
else if (isa<ClassTemplatePartialSpecializationDecl>(Record))
WhichKind = 1;
else
WhichKind = 2;
}
Diag(NewParam->getLocation(),
diag::err_param_default_argument_member_template_redecl)
<< WhichKind
<< NewParam->getDefaultArgRange();
}
}
}
// DR1344: If a default argument is added outside a class definition and that
// default argument makes the function a special member function, the program
// is ill-formed. This can only happen for constructors.
if (isa<CXXConstructorDecl>(New) &&
New->getMinRequiredArguments() < Old->getMinRequiredArguments()) {
CXXSpecialMember NewSM = getSpecialMember(cast<CXXMethodDecl>(New)),
OldSM = getSpecialMember(cast<CXXMethodDecl>(Old));
if (NewSM != OldSM) {
ParmVarDecl *NewParam = New->getParamDecl(New->getMinRequiredArguments());
assert(NewParam->hasDefaultArg());
Diag(NewParam->getLocation(), diag::err_default_arg_makes_ctor_special)
<< NewParam->getDefaultArgRange() << NewSM;
Diag(Old->getLocation(), diag::note_previous_declaration);
}
}
const FunctionDecl *Def;
// C++11 [dcl.constexpr]p1: If any declaration of a function or function
// template has a constexpr specifier then all its declarations shall
// contain the constexpr specifier.
if (New->isConstexpr() != Old->isConstexpr()) {
Diag(New->getLocation(), diag::err_constexpr_redecl_mismatch)
<< New << New->isConstexpr();
Diag(Old->getLocation(), diag::note_previous_declaration);
Invalid = true;
} else if (!Old->getMostRecentDecl()->isInlined() && New->isInlined() &&
Old->isDefined(Def) &&
// If a friend function is inlined but does not have 'inline'
// specifier, it is a definition. Do not report attribute conflict
// in this case, redefinition will be diagnosed later.
(New->isInlineSpecified() ||
New->getFriendObjectKind() == Decl::FOK_None)) {
// C++11 [dcl.fcn.spec]p4:
// If the definition of a function appears in a translation unit before its
// first declaration as inline, the program is ill-formed.
Diag(New->getLocation(), diag::err_inline_decl_follows_def) << New;
Diag(Def->getLocation(), diag::note_previous_definition);
Invalid = true;
}
// FIXME: It's not clear what should happen if multiple declarations of a
// deduction guide have different explicitness. For now at least we simply
// reject any case where the explicitness changes.
auto *NewGuide = dyn_cast<CXXDeductionGuideDecl>(New);
if (NewGuide && NewGuide->isExplicitSpecified() !=
cast<CXXDeductionGuideDecl>(Old)->isExplicitSpecified()) {
Diag(New->getLocation(), diag::err_deduction_guide_explicit_mismatch)
<< NewGuide->isExplicitSpecified();
Diag(Old->getLocation(), diag::note_previous_declaration);
}
// C++11 [dcl.fct.default]p4: If a friend declaration specifies a default
// argument expression, that declaration shall be a definition and shall be
// the only declaration of the function or function template in the
// translation unit.
if (Old->getFriendObjectKind() == Decl::FOK_Undeclared &&
functionDeclHasDefaultArgument(Old)) {
Diag(New->getLocation(), diag::err_friend_decl_with_def_arg_redeclared);
Diag(Old->getLocation(), diag::note_previous_declaration);
Invalid = true;
}
return Invalid;
}
NamedDecl *
Sema::ActOnDecompositionDeclarator(Scope *S, Declarator &D,
MultiTemplateParamsArg TemplateParamLists) {
assert(D.isDecompositionDeclarator());
const DecompositionDeclarator &Decomp = D.getDecompositionDeclarator();
// The syntax only allows a decomposition declarator as a simple-declaration
// or a for-range-declaration, but we parse it in more cases than that.
if (!D.mayHaveDecompositionDeclarator()) {
Diag(Decomp.getLSquareLoc(), diag::err_decomp_decl_context)
<< Decomp.getSourceRange();
return nullptr;
}
if (!TemplateParamLists.empty()) {
// FIXME: There's no rule against this, but there are also no rules that
// would actually make it usable, so we reject it for now.
Diag(TemplateParamLists.front()->getTemplateLoc(),
diag::err_decomp_decl_template);
return nullptr;
}
Diag(Decomp.getLSquareLoc(), getLangOpts().CPlusPlus1z
? diag::warn_cxx14_compat_decomp_decl
: diag::ext_decomp_decl)
<< Decomp.getSourceRange();
// The semantic context is always just the current context.
DeclContext *const DC = CurContext;
// C++1z [dcl.dcl]/8:
// The decl-specifier-seq shall contain only the type-specifier auto
// and cv-qualifiers.
auto &DS = D.getDeclSpec();
{
SmallVector<StringRef, 8> BadSpecifiers;
SmallVector<SourceLocation, 8> BadSpecifierLocs;
if (auto SCS = DS.getStorageClassSpec()) {
BadSpecifiers.push_back(DeclSpec::getSpecifierName(SCS));
BadSpecifierLocs.push_back(DS.getStorageClassSpecLoc());
}
if (auto TSCS = DS.getThreadStorageClassSpec()) {
BadSpecifiers.push_back(DeclSpec::getSpecifierName(TSCS));
BadSpecifierLocs.push_back(DS.getThreadStorageClassSpecLoc());
}
if (DS.isConstexprSpecified()) {
BadSpecifiers.push_back("constexpr");
BadSpecifierLocs.push_back(DS.getConstexprSpecLoc());
}
if (DS.isInlineSpecified()) {
BadSpecifiers.push_back("inline");
BadSpecifierLocs.push_back(DS.getInlineSpecLoc());
}
if (!BadSpecifiers.empty()) {
auto &&Err = Diag(BadSpecifierLocs.front(), diag::err_decomp_decl_spec);
Err << (int)BadSpecifiers.size()
<< llvm::join(BadSpecifiers.begin(), BadSpecifiers.end(), " ");
// Don't add FixItHints to remove the specifiers; we do still respect
// them when building the underlying variable.
for (auto Loc : BadSpecifierLocs)
Err << SourceRange(Loc, Loc);
}
// We can't recover from it being declared as a typedef.
if (DS.getStorageClassSpec() == DeclSpec::SCS_typedef)
return nullptr;
}
TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
QualType R = TInfo->getType();
if (DiagnoseUnexpandedParameterPack(D.getIdentifierLoc(), TInfo,
UPPC_DeclarationType))
D.setInvalidType();
// The syntax only allows a single ref-qualifier prior to the decomposition
// declarator. No other declarator chunks are permitted. Also check the type
// specifier here.
if (DS.getTypeSpecType() != DeclSpec::TST_auto ||
D.hasGroupingParens() || D.getNumTypeObjects() > 1 ||
(D.getNumTypeObjects() == 1 &&
D.getTypeObject(0).Kind != DeclaratorChunk::Reference)) {
Diag(Decomp.getLSquareLoc(),
(D.hasGroupingParens() ||
(D.getNumTypeObjects() &&
D.getTypeObject(0).Kind == DeclaratorChunk::Paren))
? diag::err_decomp_decl_parens
: diag::err_decomp_decl_type)
<< R;
// In most cases, there's no actual problem with an explicitly-specified
// type, but a function type won't work here, and ActOnVariableDeclarator
// shouldn't be called for such a type.
if (R->isFunctionType())
D.setInvalidType();
}
// Build the BindingDecls.
SmallVector<BindingDecl*, 8> Bindings;
// Build the BindingDecls.
for (auto &B : D.getDecompositionDeclarator().bindings()) {
// Check for name conflicts.
DeclarationNameInfo NameInfo(B.Name, B.NameLoc);
LookupResult Previous(*this, NameInfo, LookupOrdinaryName,
ForRedeclaration);
LookupName(Previous, S,
/*CreateBuiltins*/DC->getRedeclContext()->isTranslationUnit());
// It's not permitted to shadow a template parameter name.
if (Previous.isSingleResult() &&
Previous.getFoundDecl()->isTemplateParameter()) {
DiagnoseTemplateParameterShadow(D.getIdentifierLoc(),
Previous.getFoundDecl());
Previous.clear();
}
bool ConsiderLinkage = DC->isFunctionOrMethod() &&
DS.getStorageClassSpec() == DeclSpec::SCS_extern;
FilterLookupForScope(Previous, DC, S, ConsiderLinkage,
/*AllowInlineNamespace*/false);
if (!Previous.empty()) {
auto *Old = Previous.getRepresentativeDecl();
Diag(B.NameLoc, diag::err_redefinition) << B.Name;
Diag(Old->getLocation(), diag::note_previous_definition);
}
auto *BD = BindingDecl::Create(Context, DC, B.NameLoc, B.Name);
PushOnScopeChains(BD, S, true);
Bindings.push_back(BD);
ParsingInitForAutoVars.insert(BD);
}
// There are no prior lookup results for the variable itself, because it
// is unnamed.
DeclarationNameInfo NameInfo((IdentifierInfo *)nullptr,
Decomp.getLSquareLoc());
LookupResult Previous(*this, NameInfo, LookupOrdinaryName, ForRedeclaration);
// Build the variable that holds the non-decomposed object.
bool AddToScope = true;
NamedDecl *New =
ActOnVariableDeclarator(S, D, DC, TInfo, Previous,
MultiTemplateParamsArg(), AddToScope, Bindings);
CurContext->addHiddenDecl(New);
if (isInOpenMPDeclareTargetContext())
checkDeclIsAllowedInOpenMPTarget(nullptr, New);
return New;
}
static bool checkSimpleDecomposition(
Sema &S, ArrayRef<BindingDecl *> Bindings, ValueDecl *Src,
QualType DecompType, const llvm::APSInt &NumElems, QualType ElemType,
llvm::function_ref<ExprResult(SourceLocation, Expr *, unsigned)> GetInit) {
if ((int64_t)Bindings.size() != NumElems) {
S.Diag(Src->getLocation(), diag::err_decomp_decl_wrong_number_bindings)
<< DecompType << (unsigned)Bindings.size() << NumElems.toString(10)
<< (NumElems < Bindings.size());
return true;
}
unsigned I = 0;
for (auto *B : Bindings) {
SourceLocation Loc = B->getLocation();
ExprResult E = S.BuildDeclRefExpr(Src, DecompType, VK_LValue, Loc);
if (E.isInvalid())
return true;
E = GetInit(Loc, E.get(), I++);
if (E.isInvalid())
return true;
B->setBinding(ElemType, E.get());
}
return false;
}
static bool checkArrayLikeDecomposition(Sema &S,
ArrayRef<BindingDecl *> Bindings,
ValueDecl *Src, QualType DecompType,
const llvm::APSInt &NumElems,
QualType ElemType) {
return checkSimpleDecomposition(
S, Bindings, Src, DecompType, NumElems, ElemType,
[&](SourceLocation Loc, Expr *Base, unsigned I) -> ExprResult {
ExprResult E = S.ActOnIntegerConstant(Loc, I);
if (E.isInvalid())
return ExprError();
return S.CreateBuiltinArraySubscriptExpr(Base, Loc, E.get(), Loc);
});
}
static bool checkArrayDecomposition(Sema &S, ArrayRef<BindingDecl*> Bindings,
ValueDecl *Src, QualType DecompType,
const ConstantArrayType *CAT) {
return checkArrayLikeDecomposition(S, Bindings, Src, DecompType,
llvm::APSInt(CAT->getSize()),
CAT->getElementType());
}
static bool checkVectorDecomposition(Sema &S, ArrayRef<BindingDecl*> Bindings,
ValueDecl *Src, QualType DecompType,
const VectorType *VT) {
return checkArrayLikeDecomposition(
S, Bindings, Src, DecompType, llvm::APSInt::get(VT->getNumElements()),
S.Context.getQualifiedType(VT->getElementType(),
DecompType.getQualifiers()));
}
static bool checkComplexDecomposition(Sema &S,
ArrayRef<BindingDecl *> Bindings,
ValueDecl *Src, QualType DecompType,
const ComplexType *CT) {
return checkSimpleDecomposition(
S, Bindings, Src, DecompType, llvm::APSInt::get(2),
S.Context.getQualifiedType(CT->getElementType(),
DecompType.getQualifiers()),
[&](SourceLocation Loc, Expr *Base, unsigned I) -> ExprResult {
return S.CreateBuiltinUnaryOp(Loc, I ? UO_Imag : UO_Real, Base);
});
}
static std::string printTemplateArgs(const PrintingPolicy &PrintingPolicy,
TemplateArgumentListInfo &Args) {
SmallString<128> SS;
llvm::raw_svector_ostream OS(SS);
bool First = true;
for (auto &Arg : Args.arguments()) {
if (!First)
OS << ", ";
Arg.getArgument().print(PrintingPolicy, OS);
First = false;
}
return OS.str();
}
static bool lookupStdTypeTraitMember(Sema &S, LookupResult &TraitMemberLookup,
SourceLocation Loc, StringRef Trait,
TemplateArgumentListInfo &Args,
unsigned DiagID) {
auto DiagnoseMissing = [&] {
if (DiagID)
S.Diag(Loc, DiagID) << printTemplateArgs(S.Context.getPrintingPolicy(),
Args);
return true;
};
// FIXME: Factor out duplication with lookupPromiseType in SemaCoroutine.
NamespaceDecl *Std = S.getStdNamespace();
if (!Std)
return DiagnoseMissing();
// Look up the trait itself, within namespace std. We can diagnose various
// problems with this lookup even if we've been asked to not diagnose a
// missing specialization, because this can only fail if the user has been
// declaring their own names in namespace std or we don't support the
// standard library implementation in use.
LookupResult Result(S, &S.PP.getIdentifierTable().get(Trait),
Loc, Sema::LookupOrdinaryName);
if (!S.LookupQualifiedName(Result, Std))
return DiagnoseMissing();
if (Result.isAmbiguous())
return true;
ClassTemplateDecl *TraitTD = Result.getAsSingle<ClassTemplateDecl>();
if (!TraitTD) {
Result.suppressDiagnostics();
NamedDecl *Found = *Result.begin();
S.Diag(Loc, diag::err_std_type_trait_not_class_template) << Trait;
S.Diag(Found->getLocation(), diag::note_declared_at);
return true;
}
// Build the template-id.
QualType TraitTy = S.CheckTemplateIdType(TemplateName(TraitTD), Loc, Args);
if (TraitTy.isNull())
return true;
if (!S.isCompleteType(Loc, TraitTy)) {
if (DiagID)
S.RequireCompleteType(
Loc, TraitTy, DiagID,
printTemplateArgs(S.Context.getPrintingPolicy(), Args));
return true;
}
CXXRecordDecl *RD = TraitTy->getAsCXXRecordDecl();
assert(RD && "specialization of class template is not a class?");
// Look up the member of the trait type.
S.LookupQualifiedName(TraitMemberLookup, RD);
return TraitMemberLookup.isAmbiguous();
}
static TemplateArgumentLoc
getTrivialIntegralTemplateArgument(Sema &S, SourceLocation Loc, QualType T,
uint64_t I) {
TemplateArgument Arg(S.Context, S.Context.MakeIntValue(I, T), T);
return S.getTrivialTemplateArgumentLoc(Arg, T, Loc);
}
static TemplateArgumentLoc
getTrivialTypeTemplateArgument(Sema &S, SourceLocation Loc, QualType T) {
return S.getTrivialTemplateArgumentLoc(TemplateArgument(T), QualType(), Loc);
}
namespace { enum class IsTupleLike { TupleLike, NotTupleLike, Error }; }
static IsTupleLike isTupleLike(Sema &S, SourceLocation Loc, QualType T,
llvm::APSInt &Size) {
EnterExpressionEvaluationContext ContextRAII(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
DeclarationName Value = S.PP.getIdentifierInfo("value");
LookupResult R(S, Value, Loc, Sema::LookupOrdinaryName);
// Form template argument list for tuple_size<T>.
TemplateArgumentListInfo Args(Loc, Loc);
Args.addArgument(getTrivialTypeTemplateArgument(S, Loc, T));
// If there's no tuple_size specialization, it's not tuple-like.
if (lookupStdTypeTraitMember(S, R, Loc, "tuple_size", Args, /*DiagID*/0))
return IsTupleLike::NotTupleLike;
// If we get this far, we've committed to the tuple interpretation, but
// we can still fail if there actually isn't a usable ::value.
struct ICEDiagnoser : Sema::VerifyICEDiagnoser {
LookupResult &R;
TemplateArgumentListInfo &Args;
ICEDiagnoser(LookupResult &R, TemplateArgumentListInfo &Args)
: R(R), Args(Args) {}
void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) {
S.Diag(Loc, diag::err_decomp_decl_std_tuple_size_not_constant)
<< printTemplateArgs(S.Context.getPrintingPolicy(), Args);
}
} Diagnoser(R, Args);
if (R.empty()) {
Diagnoser.diagnoseNotICE(S, Loc, SourceRange());
return IsTupleLike::Error;
}
ExprResult E =
S.BuildDeclarationNameExpr(CXXScopeSpec(), R, /*NeedsADL*/false);
if (E.isInvalid())
return IsTupleLike::Error;
E = S.VerifyIntegerConstantExpression(E.get(), &Size, Diagnoser, false);
if (E.isInvalid())
return IsTupleLike::Error;
return IsTupleLike::TupleLike;
}
/// \return std::tuple_element<I, T>::type.
static QualType getTupleLikeElementType(Sema &S, SourceLocation Loc,
unsigned I, QualType T) {
// Form template argument list for tuple_element<I, T>.
TemplateArgumentListInfo Args(Loc, Loc);
Args.addArgument(
getTrivialIntegralTemplateArgument(S, Loc, S.Context.getSizeType(), I));
Args.addArgument(getTrivialTypeTemplateArgument(S, Loc, T));
DeclarationName TypeDN = S.PP.getIdentifierInfo("type");
LookupResult R(S, TypeDN, Loc, Sema::LookupOrdinaryName);
if (lookupStdTypeTraitMember(
S, R, Loc, "tuple_element", Args,
diag::err_decomp_decl_std_tuple_element_not_specialized))
return QualType();
auto *TD = R.getAsSingle<TypeDecl>();
if (!TD) {
R.suppressDiagnostics();
S.Diag(Loc, diag::err_decomp_decl_std_tuple_element_not_specialized)
<< printTemplateArgs(S.Context.getPrintingPolicy(), Args);
if (!R.empty())
S.Diag(R.getRepresentativeDecl()->getLocation(), diag::note_declared_at);
return QualType();
}
return S.Context.getTypeDeclType(TD);
}
namespace {
struct BindingDiagnosticTrap {
Sema &S;
DiagnosticErrorTrap Trap;
BindingDecl *BD;
BindingDiagnosticTrap(Sema &S, BindingDecl *BD)
: S(S), Trap(S.Diags), BD(BD) {}
~BindingDiagnosticTrap() {
if (Trap.hasErrorOccurred())
S.Diag(BD->getLocation(), diag::note_in_binding_decl_init) << BD;
}
};
}
static bool checkTupleLikeDecomposition(Sema &S,
ArrayRef<BindingDecl *> Bindings,
VarDecl *Src, QualType DecompType,
const llvm::APSInt &TupleSize) {
if ((int64_t)Bindings.size() != TupleSize) {
S.Diag(Src->getLocation(), diag::err_decomp_decl_wrong_number_bindings)
<< DecompType << (unsigned)Bindings.size() << TupleSize.toString(10)
<< (TupleSize < Bindings.size());
return true;
}
if (Bindings.empty())
return false;
DeclarationName GetDN = S.PP.getIdentifierInfo("get");
// [dcl.decomp]p3:
// The unqualified-id get is looked up in the scope of E by class member
// access lookup
LookupResult MemberGet(S, GetDN, Src->getLocation(), Sema::LookupMemberName);
bool UseMemberGet = false;
if (S.isCompleteType(Src->getLocation(), DecompType)) {
if (auto *RD = DecompType->getAsCXXRecordDecl())
S.LookupQualifiedName(MemberGet, RD);
if (MemberGet.isAmbiguous())
return true;
UseMemberGet = !MemberGet.empty();
S.FilterAcceptableTemplateNames(MemberGet);
}
unsigned I = 0;
for (auto *B : Bindings) {
BindingDiagnosticTrap Trap(S, B);
SourceLocation Loc = B->getLocation();
ExprResult E = S.BuildDeclRefExpr(Src, DecompType, VK_LValue, Loc);
if (E.isInvalid())
return true;
// e is an lvalue if the type of the entity is an lvalue reference and
// an xvalue otherwise
if (!Src->getType()->isLValueReferenceType())
E = ImplicitCastExpr::Create(S.Context, E.get()->getType(), CK_NoOp,
E.get(), nullptr, VK_XValue);
TemplateArgumentListInfo Args(Loc, Loc);
Args.addArgument(
getTrivialIntegralTemplateArgument(S, Loc, S.Context.getSizeType(), I));
if (UseMemberGet) {
// if [lookup of member get] finds at least one declaration, the
// initializer is e.get<i-1>().
E = S.BuildMemberReferenceExpr(E.get(), DecompType, Loc, false,
CXXScopeSpec(), SourceLocation(), nullptr,
MemberGet, &Args, nullptr);
if (E.isInvalid())
return true;
E = S.ActOnCallExpr(nullptr, E.get(), Loc, None, Loc);
} else {
// Otherwise, the initializer is get<i-1>(e), where get is looked up
// in the associated namespaces.
Expr *Get = UnresolvedLookupExpr::Create(
S.Context, nullptr, NestedNameSpecifierLoc(), SourceLocation(),
DeclarationNameInfo(GetDN, Loc), /*RequiresADL*/true, &Args,
UnresolvedSetIterator(), UnresolvedSetIterator());
Expr *Arg = E.get();
E = S.ActOnCallExpr(nullptr, Get, Loc, Arg, Loc);
}
if (E.isInvalid())
return true;
Expr *Init = E.get();
// Given the type T designated by std::tuple_element<i - 1, E>::type,
QualType T = getTupleLikeElementType(S, Loc, I, DecompType);
if (T.isNull())
return true;
// each vi is a variable of type "reference to T" initialized with the
// initializer, where the reference is an lvalue reference if the
// initializer is an lvalue and an rvalue reference otherwise
QualType RefType =
S.BuildReferenceType(T, E.get()->isLValue(), Loc, B->getDeclName());
if (RefType.isNull())
return true;
auto *RefVD = VarDecl::Create(
S.Context, Src->getDeclContext(), Loc, Loc,
B->getDeclName().getAsIdentifierInfo(), RefType,
S.Context.getTrivialTypeSourceInfo(T, Loc), Src->getStorageClass());
RefVD->setLexicalDeclContext(Src->getLexicalDeclContext());
RefVD->setTSCSpec(Src->getTSCSpec());
RefVD->setImplicit();
if (Src->isInlineSpecified())
RefVD->setInlineSpecified();
RefVD->getLexicalDeclContext()->addHiddenDecl(RefVD);
InitializedEntity Entity = InitializedEntity::InitializeBinding(RefVD);
InitializationKind Kind = InitializationKind::CreateCopy(Loc, Loc);
InitializationSequence Seq(S, Entity, Kind, Init);
E = Seq.Perform(S, Entity, Kind, Init);
if (E.isInvalid())
return true;
E = S.ActOnFinishFullExpr(E.get(), Loc);
if (E.isInvalid())
return true;
RefVD->setInit(E.get());
RefVD->checkInitIsICE();
E = S.BuildDeclarationNameExpr(CXXScopeSpec(),
DeclarationNameInfo(B->getDeclName(), Loc),
RefVD);
if (E.isInvalid())
return true;
B->setBinding(T, E.get());
I++;
}
return false;
}
/// Find the base class to decompose in a built-in decomposition of a class type.
/// This base class search is, unfortunately, not quite like any other that we
/// perform anywhere else in C++.
static const CXXRecordDecl *findDecomposableBaseClass(Sema &S,
SourceLocation Loc,
const CXXRecordDecl *RD,
CXXCastPath &BasePath) {
auto BaseHasFields = [](const CXXBaseSpecifier *Specifier,
CXXBasePath &Path) {
return Specifier->getType()->getAsCXXRecordDecl()->hasDirectFields();
};
const CXXRecordDecl *ClassWithFields = nullptr;
if (RD->hasDirectFields())
// [dcl.decomp]p4:
// Otherwise, all of E's non-static data members shall be public direct
// members of E ...
ClassWithFields = RD;
else {
// ... or of ...
CXXBasePaths Paths;
Paths.setOrigin(const_cast<CXXRecordDecl*>(RD));
if (!RD->lookupInBases(BaseHasFields, Paths)) {
// If no classes have fields, just decompose RD itself. (This will work
// if and only if zero bindings were provided.)
return RD;
}
CXXBasePath *BestPath = nullptr;
for (auto &P : Paths) {
if (!BestPath)
BestPath = &P;
else if (!S.Context.hasSameType(P.back().Base->getType(),
BestPath->back().Base->getType())) {
// ... the same ...
S.Diag(Loc, diag::err_decomp_decl_multiple_bases_with_members)
<< false << RD << BestPath->back().Base->getType()
<< P.back().Base->getType();
return nullptr;
} else if (P.Access < BestPath->Access) {
BestPath = &P;
}
}
// ... unambiguous ...
QualType BaseType = BestPath->back().Base->getType();
if (Paths.isAmbiguous(S.Context.getCanonicalType(BaseType))) {
S.Diag(Loc, diag::err_decomp_decl_ambiguous_base)
<< RD << BaseType << S.getAmbiguousPathsDisplayString(Paths);
return nullptr;
}
// ... public base class of E.
if (BestPath->Access != AS_public) {
S.Diag(Loc, diag::err_decomp_decl_non_public_base)
<< RD << BaseType;
for (auto &BS : *BestPath) {
if (BS.Base->getAccessSpecifier() != AS_public) {
S.Diag(BS.Base->getLocStart(), diag::note_access_constrained_by_path)
<< (BS.Base->getAccessSpecifier() == AS_protected)
<< (BS.Base->getAccessSpecifierAsWritten() == AS_none);
break;
}
}
return nullptr;
}
ClassWithFields = BaseType->getAsCXXRecordDecl();
S.BuildBasePathArray(Paths, BasePath);
}
// The above search did not check whether the selected class itself has base
// classes with fields, so check that now.
CXXBasePaths Paths;
if (ClassWithFields->lookupInBases(BaseHasFields, Paths)) {
S.Diag(Loc, diag::err_decomp_decl_multiple_bases_with_members)
<< (ClassWithFields == RD) << RD << ClassWithFields
<< Paths.front().back().Base->getType();
return nullptr;
}
return ClassWithFields;
}
static bool checkMemberDecomposition(Sema &S, ArrayRef<BindingDecl*> Bindings,
ValueDecl *Src, QualType DecompType,
const CXXRecordDecl *RD) {
CXXCastPath BasePath;
RD = findDecomposableBaseClass(S, Src->getLocation(), RD, BasePath);
if (!RD)
return true;
QualType BaseType = S.Context.getQualifiedType(S.Context.getRecordType(RD),
DecompType.getQualifiers());
auto DiagnoseBadNumberOfBindings = [&]() -> bool {
unsigned NumFields =
std::count_if(RD->field_begin(), RD->field_end(),
[](FieldDecl *FD) { return !FD->isUnnamedBitfield(); });
assert(Bindings.size() != NumFields);
S.Diag(Src->getLocation(), diag::err_decomp_decl_wrong_number_bindings)
<< DecompType << (unsigned)Bindings.size() << NumFields
<< (NumFields < Bindings.size());
return true;
};
// all of E's non-static data members shall be public [...] members,
// E shall not have an anonymous union member, ...
unsigned I = 0;
for (auto *FD : RD->fields()) {
if (FD->isUnnamedBitfield())
continue;
if (FD->isAnonymousStructOrUnion()) {
S.Diag(Src->getLocation(), diag::err_decomp_decl_anon_union_member)
<< DecompType << FD->getType()->isUnionType();
S.Diag(FD->getLocation(), diag::note_declared_at);
return true;
}
// We have a real field to bind.
if (I >= Bindings.size())
return DiagnoseBadNumberOfBindings();
auto *B = Bindings[I++];
SourceLocation Loc = B->getLocation();
if (FD->getAccess() != AS_public) {
S.Diag(Loc, diag::err_decomp_decl_non_public_member) << FD << DecompType;
// Determine whether the access specifier was explicit.
bool Implicit = true;
for (const auto *D : RD->decls()) {
if (declaresSameEntity(D, FD))
break;
if (isa<AccessSpecDecl>(D)) {
Implicit = false;
break;
}
}
S.Diag(FD->getLocation(), diag::note_access_natural)
<< (FD->getAccess() == AS_protected) << Implicit;
return true;
}
// Initialize the binding to Src.FD.
ExprResult E = S.BuildDeclRefExpr(Src, DecompType, VK_LValue, Loc);
if (E.isInvalid())
return true;
E = S.ImpCastExprToType(E.get(), BaseType, CK_UncheckedDerivedToBase,
VK_LValue, &BasePath);
if (E.isInvalid())
return true;
E = S.BuildFieldReferenceExpr(E.get(), /*IsArrow*/ false, Loc,
CXXScopeSpec(), FD,
DeclAccessPair::make(FD, FD->getAccess()),
DeclarationNameInfo(FD->getDeclName(), Loc));
if (E.isInvalid())
return true;
// If the type of the member is T, the referenced type is cv T, where cv is
// the cv-qualification of the decomposition expression.
//
// FIXME: We resolve a defect here: if the field is mutable, we do not add
// 'const' to the type of the field.
Qualifiers Q = DecompType.getQualifiers();
if (FD->isMutable())
Q.removeConst();
B->setBinding(S.BuildQualifiedType(FD->getType(), Loc, Q), E.get());
}
if (I != Bindings.size())
return DiagnoseBadNumberOfBindings();
return false;
}
void Sema::CheckCompleteDecompositionDeclaration(DecompositionDecl *DD) {
QualType DecompType = DD->getType();
// If the type of the decomposition is dependent, then so is the type of
// each binding.
if (DecompType->isDependentType()) {
for (auto *B : DD->bindings())
B->setType(Context.DependentTy);
return;
}
DecompType = DecompType.getNonReferenceType();
ArrayRef<BindingDecl*> Bindings = DD->bindings();
// C++1z [dcl.decomp]/2:
// If E is an array type [...]
// As an extension, we also support decomposition of built-in complex and
// vector types.
if (auto *CAT = Context.getAsConstantArrayType(DecompType)) {
if (checkArrayDecomposition(*this, Bindings, DD, DecompType, CAT))
DD->setInvalidDecl();
return;
}
if (auto *VT = DecompType->getAs<VectorType>()) {
if (checkVectorDecomposition(*this, Bindings, DD, DecompType, VT))
DD->setInvalidDecl();
return;
}
if (auto *CT = DecompType->getAs<ComplexType>()) {
if (checkComplexDecomposition(*this, Bindings, DD, DecompType, CT))
DD->setInvalidDecl();
return;
}
// C++1z [dcl.decomp]/3:
// if the expression std::tuple_size<E>::value is a well-formed integral
// constant expression, [...]
llvm::APSInt TupleSize(32);
switch (isTupleLike(*this, DD->getLocation(), DecompType, TupleSize)) {
case IsTupleLike::Error:
DD->setInvalidDecl();
return;
case IsTupleLike::TupleLike:
if (checkTupleLikeDecomposition(*this, Bindings, DD, DecompType, TupleSize))
DD->setInvalidDecl();
return;
case IsTupleLike::NotTupleLike:
break;
}
// C++1z [dcl.dcl]/8:
// [E shall be of array or non-union class type]
CXXRecordDecl *RD = DecompType->getAsCXXRecordDecl();
if (!RD || RD->isUnion()) {
Diag(DD->getLocation(), diag::err_decomp_decl_unbindable_type)
<< DD << !RD << DecompType;
DD->setInvalidDecl();
return;
}
// C++1z [dcl.decomp]/4:
// all of E's non-static data members shall be [...] direct members of
// E or of the same unambiguous public base class of E, ...
if (checkMemberDecomposition(*this, Bindings, DD, DecompType, RD))
DD->setInvalidDecl();
}
/// \brief Merge the exception specifications of two variable declarations.
///
/// This is called when there's a redeclaration of a VarDecl. The function
/// checks if the redeclaration might have an exception specification and
/// validates compatibility and merges the specs if necessary.
void Sema::MergeVarDeclExceptionSpecs(VarDecl *New, VarDecl *Old) {
// Shortcut if exceptions are disabled.
if (!getLangOpts().CXXExceptions)
return;
assert(Context.hasSameType(New->getType(), Old->getType()) &&
"Should only be called if types are otherwise the same.");
QualType NewType = New->getType();
QualType OldType = Old->getType();
// We're only interested in pointers and references to functions, as well
// as pointers to member functions.
if (const ReferenceType *R = NewType->getAs<ReferenceType>()) {
NewType = R->getPointeeType();
OldType = OldType->getAs<ReferenceType>()->getPointeeType();
} else if (const PointerType *P = NewType->getAs<PointerType>()) {
NewType = P->getPointeeType();
OldType = OldType->getAs<PointerType>()->getPointeeType();
} else if (const MemberPointerType *M = NewType->getAs<MemberPointerType>()) {
NewType = M->getPointeeType();
OldType = OldType->getAs<MemberPointerType>()->getPointeeType();
}
if (!NewType->isFunctionProtoType())
return;
// There's lots of special cases for functions. For function pointers, system
// libraries are hopefully not as broken so that we don't need these
// workarounds.
if (CheckEquivalentExceptionSpec(
OldType->getAs<FunctionProtoType>(), Old->getLocation(),
NewType->getAs<FunctionProtoType>(), New->getLocation())) {
New->setInvalidDecl();
}
}
/// CheckCXXDefaultArguments - Verify that the default arguments for a
/// function declaration are well-formed according to C++
/// [dcl.fct.default].
void Sema::CheckCXXDefaultArguments(FunctionDecl *FD) {
unsigned NumParams = FD->getNumParams();
unsigned p;
// Find first parameter with a default argument
for (p = 0; p < NumParams; ++p) {
ParmVarDecl *Param = FD->getParamDecl(p);
if (Param->hasDefaultArg())
break;
}
// C++11 [dcl.fct.default]p4:
// In a given function declaration, each parameter subsequent to a parameter
// with a default argument shall have a default argument supplied in this or
// a previous declaration or shall be a function parameter pack. A default
// argument shall not be redefined by a later declaration (not even to the
// same value).
unsigned LastMissingDefaultArg = 0;
for (; p < NumParams; ++p) {
ParmVarDecl *Param = FD->getParamDecl(p);
if (!Param->hasDefaultArg() && !Param->isParameterPack()) {
if (Param->isInvalidDecl())
/* We already complained about this parameter. */;
else if (Param->getIdentifier())
Diag(Param->getLocation(),
diag::err_param_default_argument_missing_name)
<< Param->getIdentifier();
else
Diag(Param->getLocation(),
diag::err_param_default_argument_missing);
LastMissingDefaultArg = p;
}
}
if (LastMissingDefaultArg > 0) {
// Some default arguments were missing. Clear out all of the
// default arguments up to (and including) the last missing
// default argument, so that we leave the function parameters
// in a semantically valid state.
for (p = 0; p <= LastMissingDefaultArg; ++p) {
ParmVarDecl *Param = FD->getParamDecl(p);
if (Param->hasDefaultArg()) {
Param->setDefaultArg(nullptr);
}
}
}
}
// CheckConstexprParameterTypes - Check whether a function's parameter types
// are all literal types. If so, return true. If not, produce a suitable
// diagnostic and return false.
static bool CheckConstexprParameterTypes(Sema &SemaRef,
const FunctionDecl *FD) {
unsigned ArgIndex = 0;
const FunctionProtoType *FT = FD->getType()->getAs<FunctionProtoType>();
for (FunctionProtoType::param_type_iterator i = FT->param_type_begin(),
e = FT->param_type_end();
i != e; ++i, ++ArgIndex) {
const ParmVarDecl *PD = FD->getParamDecl(ArgIndex);
SourceLocation ParamLoc = PD->getLocation();
if (!(*i)->isDependentType() &&
SemaRef.RequireLiteralType(ParamLoc, *i,
diag::err_constexpr_non_literal_param,
ArgIndex+1, PD->getSourceRange(),
isa<CXXConstructorDecl>(FD)))
return false;
}
return true;
}
/// \brief Get diagnostic %select index for tag kind for
/// record diagnostic message.
/// WARNING: Indexes apply to particular diagnostics only!
///
/// \returns diagnostic %select index.
static unsigned getRecordDiagFromTagKind(TagTypeKind Tag) {
switch (Tag) {
case TTK_Struct: return 0;
case TTK_Interface: return 1;
case TTK_Class: return 2;
default: llvm_unreachable("Invalid tag kind for record diagnostic!");
}
}
// CheckConstexprFunctionDecl - Check whether a function declaration satisfies
// the requirements of a constexpr function definition or a constexpr
// constructor definition. If so, return true. If not, produce appropriate
// diagnostics and return false.
//
// This implements C++11 [dcl.constexpr]p3,4, as amended by DR1360.
bool Sema::CheckConstexprFunctionDecl(const FunctionDecl *NewFD) {
const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(NewFD);
if (MD && MD->isInstance()) {
// C++11 [dcl.constexpr]p4:
// The definition of a constexpr constructor shall satisfy the following
// constraints:
// - the class shall not have any virtual base classes;
const CXXRecordDecl *RD = MD->getParent();
if (RD->getNumVBases()) {
Diag(NewFD->getLocation(), diag::err_constexpr_virtual_base)
<< isa<CXXConstructorDecl>(NewFD)
<< getRecordDiagFromTagKind(RD->getTagKind()) << RD->getNumVBases();
for (const auto &I : RD->vbases())
Diag(I.getLocStart(),
diag::note_constexpr_virtual_base_here) << I.getSourceRange();
return false;
}
}
if (!isa<CXXConstructorDecl>(NewFD)) {
// C++11 [dcl.constexpr]p3:
// The definition of a constexpr function shall satisfy the following
// constraints:
// - it shall not be virtual;
const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(NewFD);
if (Method && Method->isVirtual()) {
Method = Method->getCanonicalDecl();
Diag(Method->getLocation(), diag::err_constexpr_virtual);
// If it's not obvious why this function is virtual, find an overridden
// function which uses the 'virtual' keyword.
const CXXMethodDecl *WrittenVirtual = Method;
while (!WrittenVirtual->isVirtualAsWritten())
WrittenVirtual = *WrittenVirtual->begin_overridden_methods();
if (WrittenVirtual != Method)
Diag(WrittenVirtual->getLocation(),
diag::note_overridden_virtual_function);
return false;
}
// - its return type shall be a literal type;
QualType RT = NewFD->getReturnType();
if (!RT->isDependentType() &&
RequireLiteralType(NewFD->getLocation(), RT,
diag::err_constexpr_non_literal_return))
return false;
}
// - each of its parameter types shall be a literal type;
if (!CheckConstexprParameterTypes(*this, NewFD))
return false;
return true;
}
/// Check the given declaration statement is legal within a constexpr function
/// body. C++11 [dcl.constexpr]p3,p4, and C++1y [dcl.constexpr]p3.
///
/// \return true if the body is OK (maybe only as an extension), false if we
/// have diagnosed a problem.
static bool CheckConstexprDeclStmt(Sema &SemaRef, const FunctionDecl *Dcl,
DeclStmt *DS, SourceLocation &Cxx1yLoc) {
// C++11 [dcl.constexpr]p3 and p4:
// The definition of a constexpr function(p3) or constructor(p4) [...] shall
// contain only
for (const auto *DclIt : DS->decls()) {
switch (DclIt->getKind()) {
case Decl::StaticAssert:
case Decl::Using:
case Decl::UsingShadow:
case Decl::UsingDirective:
case Decl::UnresolvedUsingTypename:
case Decl::UnresolvedUsingValue:
// - static_assert-declarations
// - using-declarations,
// - using-directives,
continue;
case Decl::Typedef:
case Decl::TypeAlias: {
// - typedef declarations and alias-declarations that do not define
// classes or enumerations,
const auto *TN = cast<TypedefNameDecl>(DclIt);
if (TN->getUnderlyingType()->isVariablyModifiedType()) {
// Don't allow variably-modified types in constexpr functions.
TypeLoc TL = TN->getTypeSourceInfo()->getTypeLoc();
SemaRef.Diag(TL.getBeginLoc(), diag::err_constexpr_vla)
<< TL.getSourceRange() << TL.getType()
<< isa<CXXConstructorDecl>(Dcl);
return false;
}
continue;
}
case Decl::Enum:
case Decl::CXXRecord:
// C++1y allows types to be defined, not just declared.
if (cast<TagDecl>(DclIt)->isThisDeclarationADefinition())
SemaRef.Diag(DS->getLocStart(),
SemaRef.getLangOpts().CPlusPlus14
? diag::warn_cxx11_compat_constexpr_type_definition
: diag::ext_constexpr_type_definition)
<< isa<CXXConstructorDecl>(Dcl);
continue;
case Decl::EnumConstant:
case Decl::IndirectField:
case Decl::ParmVar:
// These can only appear with other declarations which are banned in
// C++11 and permitted in C++1y, so ignore them.
continue;
case Decl::Var:
case Decl::Decomposition: {
// C++1y [dcl.constexpr]p3 allows anything except:
// a definition of a variable of non-literal type or of static or
// thread storage duration or for which no initialization is performed.
const auto *VD = cast<VarDecl>(DclIt);
if (VD->isThisDeclarationADefinition()) {
if (VD->isStaticLocal()) {
SemaRef.Diag(VD->getLocation(),
diag::err_constexpr_local_var_static)
<< isa<CXXConstructorDecl>(Dcl)
<< (VD->getTLSKind() == VarDecl::TLS_Dynamic);
return false;
}
if (!VD->getType()->isDependentType() &&
SemaRef.RequireLiteralType(
VD->getLocation(), VD->getType(),
diag::err_constexpr_local_var_non_literal_type,
isa<CXXConstructorDecl>(Dcl)))
return false;
if (!VD->getType()->isDependentType() &&
!VD->hasInit() && !VD->isCXXForRangeDecl()) {
SemaRef.Diag(VD->getLocation(),
diag::err_constexpr_local_var_no_init)
<< isa<CXXConstructorDecl>(Dcl);
return false;
}
}
SemaRef.Diag(VD->getLocation(),
SemaRef.getLangOpts().CPlusPlus14
? diag::warn_cxx11_compat_constexpr_local_var
: diag::ext_constexpr_local_var)
<< isa<CXXConstructorDecl>(Dcl);
continue;
}
case Decl::NamespaceAlias:
case Decl::Function:
// These are disallowed in C++11 and permitted in C++1y. Allow them
// everywhere as an extension.
if (!Cxx1yLoc.isValid())
Cxx1yLoc = DS->getLocStart();
continue;
default:
SemaRef.Diag(DS->getLocStart(), diag::err_constexpr_body_invalid_stmt)
<< isa<CXXConstructorDecl>(Dcl);
return false;
}
}
return true;
}
/// Check that the given field is initialized within a constexpr constructor.
///
/// \param Dcl The constexpr constructor being checked.
/// \param Field The field being checked. This may be a member of an anonymous
/// struct or union nested within the class being checked.
/// \param Inits All declarations, including anonymous struct/union members and
/// indirect members, for which any initialization was provided.
/// \param Diagnosed Set to true if an error is produced.
static void CheckConstexprCtorInitializer(Sema &SemaRef,
const FunctionDecl *Dcl,
FieldDecl *Field,
llvm::SmallSet<Decl*, 16> &Inits,
bool &Diagnosed) {
if (Field->isInvalidDecl())
return;
if (Field->isUnnamedBitfield())
return;
// Anonymous unions with no variant members and empty anonymous structs do not
// need to be explicitly initialized. FIXME: Anonymous structs that contain no
// indirect fields don't need initializing.
if (Field->isAnonymousStructOrUnion() &&
(Field->getType()->isUnionType()
? !Field->getType()->getAsCXXRecordDecl()->hasVariantMembers()
: Field->getType()->getAsCXXRecordDecl()->isEmpty()))
return;
if (!Inits.count(Field)) {
if (!Diagnosed) {
SemaRef.Diag(Dcl->getLocation(), diag::err_constexpr_ctor_missing_init);
Diagnosed = true;
}
SemaRef.Diag(Field->getLocation(), diag::note_constexpr_ctor_missing_init);
} else if (Field->isAnonymousStructOrUnion()) {
const RecordDecl *RD = Field->getType()->castAs<RecordType>()->getDecl();
for (auto *I : RD->fields())
// If an anonymous union contains an anonymous struct of which any member
// is initialized, all members must be initialized.
if (!RD->isUnion() || Inits.count(I))
CheckConstexprCtorInitializer(SemaRef, Dcl, I, Inits, Diagnosed);
}
}
/// Check the provided statement is allowed in a constexpr function
/// definition.
static bool
CheckConstexprFunctionStmt(Sema &SemaRef, const FunctionDecl *Dcl, Stmt *S,
SmallVectorImpl<SourceLocation> &ReturnStmts,
SourceLocation &Cxx1yLoc) {
// - its function-body shall be [...] a compound-statement that contains only
switch (S->getStmtClass()) {
case Stmt::NullStmtClass:
// - null statements,
return true;
case Stmt::DeclStmtClass:
// - static_assert-declarations
// - using-declarations,
// - using-directives,
// - typedef declarations and alias-declarations that do not define
// classes or enumerations,
if (!CheckConstexprDeclStmt(SemaRef, Dcl, cast<DeclStmt>(S), Cxx1yLoc))
return false;
return true;
case Stmt::ReturnStmtClass:
// - and exactly one return statement;
if (isa<CXXConstructorDecl>(Dcl)) {
// C++1y allows return statements in constexpr constructors.
if (!Cxx1yLoc.isValid())
Cxx1yLoc = S->getLocStart();
return true;
}
ReturnStmts.push_back(S->getLocStart());
return true;
case Stmt::CompoundStmtClass: {
// C++1y allows compound-statements.
if (!Cxx1yLoc.isValid())
Cxx1yLoc = S->getLocStart();
CompoundStmt *CompStmt = cast<CompoundStmt>(S);
for (auto *BodyIt : CompStmt->body()) {
if (!CheckConstexprFunctionStmt(SemaRef, Dcl, BodyIt, ReturnStmts,
Cxx1yLoc))
return false;
}
return true;
}
case Stmt::AttributedStmtClass:
if (!Cxx1yLoc.isValid())
Cxx1yLoc = S->getLocStart();
return true;
case Stmt::IfStmtClass: {
// C++1y allows if-statements.
if (!Cxx1yLoc.isValid())
Cxx1yLoc = S->getLocStart();
IfStmt *If = cast<IfStmt>(S);
if (!CheckConstexprFunctionStmt(SemaRef, Dcl, If->getThen(), ReturnStmts,
Cxx1yLoc))
return false;
if (If->getElse() &&
!CheckConstexprFunctionStmt(SemaRef, Dcl, If->getElse(), ReturnStmts,
Cxx1yLoc))
return false;
return true;
}
case Stmt::WhileStmtClass:
case Stmt::DoStmtClass:
case Stmt::ForStmtClass:
case Stmt::CXXForRangeStmtClass:
case Stmt::ContinueStmtClass:
// C++1y allows all of these. We don't allow them as extensions in C++11,
// because they don't make sense without variable mutation.
if (!SemaRef.getLangOpts().CPlusPlus14)
break;
if (!Cxx1yLoc.isValid())
Cxx1yLoc = S->getLocStart();
for (Stmt *SubStmt : S->children())
if (SubStmt &&
!CheckConstexprFunctionStmt(SemaRef, Dcl, SubStmt, ReturnStmts,
Cxx1yLoc))
return false;
return true;
case Stmt::SwitchStmtClass:
case Stmt::CaseStmtClass:
case Stmt::DefaultStmtClass:
case Stmt::BreakStmtClass:
// C++1y allows switch-statements, and since they don't need variable
// mutation, we can reasonably allow them in C++11 as an extension.
if (!Cxx1yLoc.isValid())
Cxx1yLoc = S->getLocStart();
for (Stmt *SubStmt : S->children())
if (SubStmt &&
!CheckConstexprFunctionStmt(SemaRef, Dcl, SubStmt, ReturnStmts,
Cxx1yLoc))
return false;
return true;
default:
if (!isa<Expr>(S))
break;
// C++1y allows expression-statements.
if (!Cxx1yLoc.isValid())
Cxx1yLoc = S->getLocStart();
return true;
}
SemaRef.Diag(S->getLocStart(), diag::err_constexpr_body_invalid_stmt)
<< isa<CXXConstructorDecl>(Dcl);
return false;
}
/// Check the body for the given constexpr function declaration only contains
/// the permitted types of statement. C++11 [dcl.constexpr]p3,p4.
///
/// \return true if the body is OK, false if we have diagnosed a problem.
bool Sema::CheckConstexprFunctionBody(const FunctionDecl *Dcl, Stmt *Body) {
if (isa<CXXTryStmt>(Body)) {
// C++11 [dcl.constexpr]p3:
// The definition of a constexpr function shall satisfy the following
// constraints: [...]
// - its function-body shall be = delete, = default, or a
// compound-statement
//
// C++11 [dcl.constexpr]p4:
// In the definition of a constexpr constructor, [...]
// - its function-body shall not be a function-try-block;
Diag(Body->getLocStart(), diag::err_constexpr_function_try_block)
<< isa<CXXConstructorDecl>(Dcl);
return false;
}
SmallVector<SourceLocation, 4> ReturnStmts;
// - its function-body shall be [...] a compound-statement that contains only
// [... list of cases ...]
CompoundStmt *CompBody = cast<CompoundStmt>(Body);
SourceLocation Cxx1yLoc;
for (auto *BodyIt : CompBody->body()) {
if (!CheckConstexprFunctionStmt(*this, Dcl, BodyIt, ReturnStmts, Cxx1yLoc))
return false;
}
if (Cxx1yLoc.isValid())
Diag(Cxx1yLoc,
getLangOpts().CPlusPlus14
? diag::warn_cxx11_compat_constexpr_body_invalid_stmt
: diag::ext_constexpr_body_invalid_stmt)
<< isa<CXXConstructorDecl>(Dcl);
if (const CXXConstructorDecl *Constructor
= dyn_cast<CXXConstructorDecl>(Dcl)) {
const CXXRecordDecl *RD = Constructor->getParent();
// DR1359:
// - every non-variant non-static data member and base class sub-object
// shall be initialized;
// DR1460:
// - if the class is a union having variant members, exactly one of them
// shall be initialized;
if (RD->isUnion()) {
if (Constructor->getNumCtorInitializers() == 0 &&
RD->hasVariantMembers()) {
Diag(Dcl->getLocation(), diag::err_constexpr_union_ctor_no_init);
return false;
}
} else if (!Constructor->isDependentContext() &&
!Constructor->isDelegatingConstructor()) {
assert(RD->getNumVBases() == 0 && "constexpr ctor with virtual bases");
// Skip detailed checking if we have enough initializers, and we would
// allow at most one initializer per member.
bool AnyAnonStructUnionMembers = false;
unsigned Fields = 0;
for (CXXRecordDecl::field_iterator I = RD->field_begin(),
E = RD->field_end(); I != E; ++I, ++Fields) {
if (I->isAnonymousStructOrUnion()) {
AnyAnonStructUnionMembers = true;
break;
}
}
// DR1460:
// - if the class is a union-like class, but is not a union, for each of
// its anonymous union members having variant members, exactly one of
// them shall be initialized;
if (AnyAnonStructUnionMembers ||
Constructor->getNumCtorInitializers() != RD->getNumBases() + Fields) {
// Check initialization of non-static data members. Base classes are
// always initialized so do not need to be checked. Dependent bases
// might not have initializers in the member initializer list.
llvm::SmallSet<Decl*, 16> Inits;
for (const auto *I: Constructor->inits()) {
if (FieldDecl *FD = I->getMember())
Inits.insert(FD);
else if (IndirectFieldDecl *ID = I->getIndirectMember())
Inits.insert(ID->chain_begin(), ID->chain_end());
}
bool Diagnosed = false;
for (auto *I : RD->fields())
CheckConstexprCtorInitializer(*this, Dcl, I, Inits, Diagnosed);
if (Diagnosed)
return false;
}
}
} else {
if (ReturnStmts.empty()) {
// C++1y doesn't require constexpr functions to contain a 'return'
// statement. We still do, unless the return type might be void, because
// otherwise if there's no return statement, the function cannot
// be used in a core constant expression.
bool OK = getLangOpts().CPlusPlus14 &&
(Dcl->getReturnType()->isVoidType() ||
Dcl->getReturnType()->isDependentType());
Diag(Dcl->getLocation(),
OK ? diag::warn_cxx11_compat_constexpr_body_no_return
: diag::err_constexpr_body_no_return);
if (!OK)
return false;
} else if (ReturnStmts.size() > 1) {
Diag(ReturnStmts.back(),
getLangOpts().CPlusPlus14
? diag::warn_cxx11_compat_constexpr_body_multiple_return
: diag::ext_constexpr_body_multiple_return);
for (unsigned I = 0; I < ReturnStmts.size() - 1; ++I)
Diag(ReturnStmts[I], diag::note_constexpr_body_previous_return);
}
}
// C++11 [dcl.constexpr]p5:
// if no function argument values exist such that the function invocation
// substitution would produce a constant expression, the program is
// ill-formed; no diagnostic required.
// C++11 [dcl.constexpr]p3:
// - every constructor call and implicit conversion used in initializing the
// return value shall be one of those allowed in a constant expression.
// C++11 [dcl.constexpr]p4:
// - every constructor involved in initializing non-static data members and
// base class sub-objects shall be a constexpr constructor.
SmallVector<PartialDiagnosticAt, 8> Diags;
if (!Expr::isPotentialConstantExpr(Dcl, Diags)) {
Diag(Dcl->getLocation(), diag::ext_constexpr_function_never_constant_expr)
<< isa<CXXConstructorDecl>(Dcl);
for (size_t I = 0, N = Diags.size(); I != N; ++I)
Diag(Diags[I].first, Diags[I].second);
// Don't return false here: we allow this for compatibility in
// system headers.
}
return true;
}
/// isCurrentClassName - Determine whether the identifier II is the
/// name of the class type currently being defined. In the case of
/// nested classes, this will only return true if II is the name of
/// the innermost class.
bool Sema::isCurrentClassName(const IdentifierInfo &II, Scope *,
const CXXScopeSpec *SS) {
assert(getLangOpts().CPlusPlus && "No class names in C!");
CXXRecordDecl *CurDecl;
if (SS && SS->isSet() && !SS->isInvalid()) {
DeclContext *DC = computeDeclContext(*SS, true);
CurDecl = dyn_cast_or_null<CXXRecordDecl>(DC);
} else
CurDecl = dyn_cast_or_null<CXXRecordDecl>(CurContext);
if (CurDecl && CurDecl->getIdentifier())
return &II == CurDecl->getIdentifier();
return false;
}
/// \brief Determine whether the identifier II is a typo for the name of
/// the class type currently being defined. If so, update it to the identifier
/// that should have been used.
bool Sema::isCurrentClassNameTypo(IdentifierInfo *&II, const CXXScopeSpec *SS) {
assert(getLangOpts().CPlusPlus && "No class names in C!");
if (!getLangOpts().SpellChecking)
return false;
CXXRecordDecl *CurDecl;
if (SS && SS->isSet() && !SS->isInvalid()) {
DeclContext *DC = computeDeclContext(*SS, true);
CurDecl = dyn_cast_or_null<CXXRecordDecl>(DC);
} else
CurDecl = dyn_cast_or_null<CXXRecordDecl>(CurContext);
if (CurDecl && CurDecl->getIdentifier() && II != CurDecl->getIdentifier() &&
3 * II->getName().edit_distance(CurDecl->getIdentifier()->getName())
< II->getLength()) {
II = CurDecl->getIdentifier();
return true;
}
return false;
}
/// \brief Determine whether the given class is a base class of the given
/// class, including looking at dependent bases.
static bool findCircularInheritance(const CXXRecordDecl *Class,
const CXXRecordDecl *Current) {
SmallVector<const CXXRecordDecl*, 8> Queue;
Class = Class->getCanonicalDecl();
while (true) {
for (const auto &I : Current->bases()) {
CXXRecordDecl *Base = I.getType()->getAsCXXRecordDecl();
if (!Base)
continue;
Base = Base->getDefinition();
if (!Base)
continue;
if (Base->getCanonicalDecl() == Class)
return true;
Queue.push_back(Base);
}
if (Queue.empty())
return false;
Current = Queue.pop_back_val();
}
return false;
}
/// \brief Check the validity of a C++ base class specifier.
///
/// \returns a new CXXBaseSpecifier if well-formed, emits diagnostics
/// and returns NULL otherwise.
CXXBaseSpecifier *
Sema::CheckBaseSpecifier(CXXRecordDecl *Class,
SourceRange SpecifierRange,
bool Virtual, AccessSpecifier Access,
TypeSourceInfo *TInfo,
SourceLocation EllipsisLoc) {
QualType BaseType = TInfo->getType();
// C++ [class.union]p1:
// A union shall not have base classes.
if (Class->isUnion()) {
Diag(Class->getLocation(), diag::err_base_clause_on_union)
<< SpecifierRange;
return nullptr;
}
if (EllipsisLoc.isValid() &&
!TInfo->getType()->containsUnexpandedParameterPack()) {
Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs)
<< TInfo->getTypeLoc().getSourceRange();
EllipsisLoc = SourceLocation();
}
SourceLocation BaseLoc = TInfo->getTypeLoc().getBeginLoc();
if (BaseType->isDependentType()) {
// Make sure that we don't have circular inheritance among our dependent
// bases. For non-dependent bases, the check for completeness below handles
// this.
if (CXXRecordDecl *BaseDecl = BaseType->getAsCXXRecordDecl()) {
if (BaseDecl->getCanonicalDecl() == Class->getCanonicalDecl() ||
((BaseDecl = BaseDecl->getDefinition()) &&
findCircularInheritance(Class, BaseDecl))) {
Diag(BaseLoc, diag::err_circular_inheritance)
<< BaseType << Context.getTypeDeclType(Class);
if (BaseDecl->getCanonicalDecl() != Class->getCanonicalDecl())
Diag(BaseDecl->getLocation(), diag::note_previous_decl)
<< BaseType;
return nullptr;
}
}
return new (Context) CXXBaseSpecifier(SpecifierRange, Virtual,
Class->getTagKind() == TTK_Class,
Access, TInfo, EllipsisLoc);
}
// Base specifiers must be record types.
if (!BaseType->isRecordType()) {
Diag(BaseLoc, diag::err_base_must_be_class) << SpecifierRange;
return nullptr;
}
// C++ [class.union]p1:
// A union shall not be used as a base class.
if (BaseType->isUnionType()) {
Diag(BaseLoc, diag::err_union_as_base_class) << SpecifierRange;
return nullptr;
}
// For the MS ABI, propagate DLL attributes to base class templates.
if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
if (Attr *ClassAttr = getDLLAttr(Class)) {
if (auto *BaseTemplate = dyn_cast_or_null<ClassTemplateSpecializationDecl>(
BaseType->getAsCXXRecordDecl())) {
propagateDLLAttrToBaseClassTemplate(Class, ClassAttr, BaseTemplate,
BaseLoc);
}
}
}
// C++ [class.derived]p2:
// The class-name in a base-specifier shall not be an incompletely
// defined class.
if (RequireCompleteType(BaseLoc, BaseType,
diag::err_incomplete_base_class, SpecifierRange)) {
Class->setInvalidDecl();
return nullptr;
}
// If the base class is polymorphic or isn't empty, the new one is/isn't, too.
RecordDecl *BaseDecl = BaseType->getAs<RecordType>()->getDecl();
assert(BaseDecl && "Record type has no declaration");
BaseDecl = BaseDecl->getDefinition();
assert(BaseDecl && "Base type is not incomplete, but has no definition");
CXXRecordDecl *CXXBaseDecl = cast<CXXRecordDecl>(BaseDecl);
assert(CXXBaseDecl && "Base type is not a C++ type");
// A class which contains a flexible array member is not suitable for use as a
// base class:
// - If the layout determines that a base comes before another base,
// the flexible array member would index into the subsequent base.
// - If the layout determines that base comes before the derived class,
// the flexible array member would index into the derived class.
if (CXXBaseDecl->hasFlexibleArrayMember()) {
Diag(BaseLoc, diag::err_base_class_has_flexible_array_member)
<< CXXBaseDecl->getDeclName();
return nullptr;
}
// C++ [class]p3:
// If a class is marked final and it appears as a base-type-specifier in
// base-clause, the program is ill-formed.
if (FinalAttr *FA = CXXBaseDecl->getAttr<FinalAttr>()) {
Diag(BaseLoc, diag::err_class_marked_final_used_as_base)
<< CXXBaseDecl->getDeclName()
<< FA->isSpelledAsSealed();
Diag(CXXBaseDecl->getLocation(), diag::note_entity_declared_at)
<< CXXBaseDecl->getDeclName() << FA->getRange();
return nullptr;
}
if (BaseDecl->isInvalidDecl())
Class->setInvalidDecl();
// Create the base specifier.
return new (Context) CXXBaseSpecifier(SpecifierRange, Virtual,
Class->getTagKind() == TTK_Class,
Access, TInfo, EllipsisLoc);
}
/// ActOnBaseSpecifier - Parsed a base specifier. A base specifier is
/// one entry in the base class list of a class specifier, for
/// example:
/// class foo : public bar, virtual private baz {
/// 'public bar' and 'virtual private baz' are each base-specifiers.
BaseResult
Sema::ActOnBaseSpecifier(Decl *classdecl, SourceRange SpecifierRange,
ParsedAttributes &Attributes,
bool Virtual, AccessSpecifier Access,
ParsedType basetype, SourceLocation BaseLoc,
SourceLocation EllipsisLoc) {
if (!classdecl)
return true;
AdjustDeclIfTemplate(classdecl);
CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(classdecl);
if (!Class)
return true;
// We haven't yet attached the base specifiers.
Class->setIsParsingBaseSpecifiers();
// We do not support any C++11 attributes on base-specifiers yet.
// Diagnose any attributes we see.
if (!Attributes.empty()) {
for (AttributeList *Attr = Attributes.getList(); Attr;
Attr = Attr->getNext()) {
if (Attr->isInvalid() ||
Attr->getKind() == AttributeList::IgnoredAttribute)
continue;
Diag(Attr->getLoc(),
Attr->getKind() == AttributeList::UnknownAttribute
? diag::warn_unknown_attribute_ignored
: diag::err_base_specifier_attribute)
<< Attr->getName();
}
}
TypeSourceInfo *TInfo = nullptr;
GetTypeFromParser(basetype, &TInfo);
if (EllipsisLoc.isInvalid() &&
DiagnoseUnexpandedParameterPack(SpecifierRange.getBegin(), TInfo,
UPPC_BaseType))
return true;
if (CXXBaseSpecifier *BaseSpec = CheckBaseSpecifier(Class, SpecifierRange,
Virtual, Access, TInfo,
EllipsisLoc))
return BaseSpec;
else
Class->setInvalidDecl();
return true;
}
/// Use small set to collect indirect bases. As this is only used
/// locally, there's no need to abstract the small size parameter.
typedef llvm::SmallPtrSet<QualType, 4> IndirectBaseSet;
/// \brief Recursively add the bases of Type. Don't add Type itself.
static void
NoteIndirectBases(ASTContext &Context, IndirectBaseSet &Set,
const QualType &Type)
{
// Even though the incoming type is a base, it might not be
// a class -- it could be a template parm, for instance.
if (auto Rec = Type->getAs<RecordType>()) {
auto Decl = Rec->getAsCXXRecordDecl();
// Iterate over its bases.
for (const auto &BaseSpec : Decl->bases()) {
QualType Base = Context.getCanonicalType(BaseSpec.getType())
.getUnqualifiedType();
if (Set.insert(Base).second)
// If we've not already seen it, recurse.
NoteIndirectBases(Context, Set, Base);
}
}
}
/// \brief Performs the actual work of attaching the given base class
/// specifiers to a C++ class.
bool Sema::AttachBaseSpecifiers(CXXRecordDecl *Class,
MutableArrayRef<CXXBaseSpecifier *> Bases) {
if (Bases.empty())
return false;
// Used to keep track of which base types we have already seen, so
// that we can properly diagnose redundant direct base types. Note
// that the key is always the unqualified canonical type of the base
// class.
std::map<QualType, CXXBaseSpecifier*, QualTypeOrdering> KnownBaseTypes;
// Used to track indirect bases so we can see if a direct base is
// ambiguous.
IndirectBaseSet IndirectBaseTypes;
// Copy non-redundant base specifiers into permanent storage.
unsigned NumGoodBases = 0;
bool Invalid = false;
for (unsigned idx = 0; idx < Bases.size(); ++idx) {
QualType NewBaseType
= Context.getCanonicalType(Bases[idx]->getType());
NewBaseType = NewBaseType.getLocalUnqualifiedType();
CXXBaseSpecifier *&KnownBase = KnownBaseTypes[NewBaseType];
if (KnownBase) {
// C++ [class.mi]p3:
// A class shall not be specified as a direct base class of a
// derived class more than once.
Diag(Bases[idx]->getLocStart(),
diag::err_duplicate_base_class)
<< KnownBase->getType()
<< Bases[idx]->getSourceRange();
// Delete the duplicate base class specifier; we're going to
// overwrite its pointer later.
Context.Deallocate(Bases[idx]);
Invalid = true;
} else {
// Okay, add this new base class.
KnownBase = Bases[idx];
Bases[NumGoodBases++] = Bases[idx];
// Note this base's direct & indirect bases, if there could be ambiguity.
if (Bases.size() > 1)
NoteIndirectBases(Context, IndirectBaseTypes, NewBaseType);
if (const RecordType *Record = NewBaseType->getAs<RecordType>()) {
const CXXRecordDecl *RD = cast<CXXRecordDecl>(Record->getDecl());
if (Class->isInterface() &&
(!RD->isInterface() ||
KnownBase->getAccessSpecifier() != AS_public)) {
// The Microsoft extension __interface does not permit bases that
// are not themselves public interfaces.
Diag(KnownBase->getLocStart(), diag::err_invalid_base_in_interface)
<< getRecordDiagFromTagKind(RD->getTagKind()) << RD->getName()
<< RD->getSourceRange();
Invalid = true;
}
if (RD->hasAttr<WeakAttr>())
Class->addAttr(WeakAttr::CreateImplicit(Context));
}
}
}
// Attach the remaining base class specifiers to the derived class.
Class->setBases(Bases.data(), NumGoodBases);
for (unsigned idx = 0; idx < NumGoodBases; ++idx) {
// Check whether this direct base is inaccessible due to ambiguity.
QualType BaseType = Bases[idx]->getType();
CanQualType CanonicalBase = Context.getCanonicalType(BaseType)
.getUnqualifiedType();
if (IndirectBaseTypes.count(CanonicalBase)) {
CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
/*DetectVirtual=*/true);
bool found
= Class->isDerivedFrom(CanonicalBase->getAsCXXRecordDecl(), Paths);
assert(found);
(void)found;
if (Paths.isAmbiguous(CanonicalBase))
Diag(Bases[idx]->getLocStart (), diag::warn_inaccessible_base_class)
<< BaseType << getAmbiguousPathsDisplayString(Paths)
<< Bases[idx]->getSourceRange();
else
assert(Bases[idx]->isVirtual());
}
// Delete the base class specifier, since its data has been copied
// into the CXXRecordDecl.
Context.Deallocate(Bases[idx]);
}
return Invalid;
}
/// ActOnBaseSpecifiers - Attach the given base specifiers to the
/// class, after checking whether there are any duplicate base
/// classes.
void Sema::ActOnBaseSpecifiers(Decl *ClassDecl,
MutableArrayRef<CXXBaseSpecifier *> Bases) {
if (!ClassDecl || Bases.empty())
return;
AdjustDeclIfTemplate(ClassDecl);
AttachBaseSpecifiers(cast<CXXRecordDecl>(ClassDecl), Bases);
}
/// \brief Determine whether the type \p Derived is a C++ class that is
/// derived from the type \p Base.
bool Sema::IsDerivedFrom(SourceLocation Loc, QualType Derived, QualType Base) {
if (!getLangOpts().CPlusPlus)
return false;
CXXRecordDecl *DerivedRD = Derived->getAsCXXRecordDecl();
if (!DerivedRD)
return false;
CXXRecordDecl *BaseRD = Base->getAsCXXRecordDecl();
if (!BaseRD)
return false;
// If either the base or the derived type is invalid, don't try to
// check whether one is derived from the other.
if (BaseRD->isInvalidDecl() || DerivedRD->isInvalidDecl())
return false;
// FIXME: In a modules build, do we need the entire path to be visible for us
// to be able to use the inheritance relationship?
if (!isCompleteType(Loc, Derived) && !DerivedRD->isBeingDefined())
return false;
return DerivedRD->isDerivedFrom(BaseRD);
}
/// \brief Determine whether the type \p Derived is a C++ class that is
/// derived from the type \p Base.
bool Sema::IsDerivedFrom(SourceLocation Loc, QualType Derived, QualType Base,
CXXBasePaths &Paths) {
if (!getLangOpts().CPlusPlus)
return false;
CXXRecordDecl *DerivedRD = Derived->getAsCXXRecordDecl();
if (!DerivedRD)
return false;
CXXRecordDecl *BaseRD = Base->getAsCXXRecordDecl();
if (!BaseRD)
return false;
if (!isCompleteType(Loc, Derived) && !DerivedRD->isBeingDefined())
return false;
return DerivedRD->isDerivedFrom(BaseRD, Paths);
}
void Sema::BuildBasePathArray(const CXXBasePaths &Paths,
CXXCastPath &BasePathArray) {
assert(BasePathArray.empty() && "Base path array must be empty!");
assert(Paths.isRecordingPaths() && "Must record paths!");
const CXXBasePath &Path = Paths.front();
// We first go backward and check if we have a virtual base.
// FIXME: It would be better if CXXBasePath had the base specifier for
// the nearest virtual base.
unsigned Start = 0;
for (unsigned I = Path.size(); I != 0; --I) {
if (Path[I - 1].Base->isVirtual()) {
Start = I - 1;
break;
}
}
// Now add all bases.
for (unsigned I = Start, E = Path.size(); I != E; ++I)
BasePathArray.push_back(const_cast<CXXBaseSpecifier*>(Path[I].Base));
}
/// CheckDerivedToBaseConversion - Check whether the Derived-to-Base
/// conversion (where Derived and Base are class types) is
/// well-formed, meaning that the conversion is unambiguous (and
/// that all of the base classes are accessible). Returns true
/// and emits a diagnostic if the code is ill-formed, returns false
/// otherwise. Loc is the location where this routine should point to
/// if there is an error, and Range is the source range to highlight
/// if there is an error.
///
/// If either InaccessibleBaseID or AmbigiousBaseConvID are 0, then the
/// diagnostic for the respective type of error will be suppressed, but the
/// check for ill-formed code will still be performed.
bool
Sema::CheckDerivedToBaseConversion(QualType Derived, QualType Base,
unsigned InaccessibleBaseID,
unsigned AmbigiousBaseConvID,
SourceLocation Loc, SourceRange Range,
DeclarationName Name,
CXXCastPath *BasePath,
bool IgnoreAccess) {
// First, determine whether the path from Derived to Base is
// ambiguous. This is slightly more expensive than checking whether
// the Derived to Base conversion exists, because here we need to
// explore multiple paths to determine if there is an ambiguity.
CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
/*DetectVirtual=*/false);
bool DerivationOkay = IsDerivedFrom(Loc, Derived, Base, Paths);
assert(DerivationOkay &&
"Can only be used with a derived-to-base conversion");
(void)DerivationOkay;
if (!Paths.isAmbiguous(Context.getCanonicalType(Base).getUnqualifiedType())) {
if (!IgnoreAccess) {
// Check that the base class can be accessed.
switch (CheckBaseClassAccess(Loc, Base, Derived, Paths.front(),
InaccessibleBaseID)) {
case AR_inaccessible:
return true;
case AR_accessible:
case AR_dependent:
case AR_delayed:
break;
}
}
// Build a base path if necessary.
if (BasePath)
BuildBasePathArray(Paths, *BasePath);
return false;
}
if (AmbigiousBaseConvID) {
// We know that the derived-to-base conversion is ambiguous, and
// we're going to produce a diagnostic. Perform the derived-to-base
// search just one more time to compute all of the possible paths so
// that we can print them out. This is more expensive than any of
// the previous derived-to-base checks we've done, but at this point
// performance isn't as much of an issue.
Paths.clear();
Paths.setRecordingPaths(true);
bool StillOkay = IsDerivedFrom(Loc, Derived, Base, Paths);
assert(StillOkay && "Can only be used with a derived-to-base conversion");
(void)StillOkay;
// Build up a textual representation of the ambiguous paths, e.g.,
// D -> B -> A, that will be used to illustrate the ambiguous
// conversions in the diagnostic. We only print one of the paths
// to each base class subobject.
std::string PathDisplayStr = getAmbiguousPathsDisplayString(Paths);
Diag(Loc, AmbigiousBaseConvID)
<< Derived << Base << PathDisplayStr << Range << Name;
}
return true;
}
bool
Sema::CheckDerivedToBaseConversion(QualType Derived, QualType Base,
SourceLocation Loc, SourceRange Range,
CXXCastPath *BasePath,
bool IgnoreAccess) {
return CheckDerivedToBaseConversion(
Derived, Base, diag::err_upcast_to_inaccessible_base,
diag::err_ambiguous_derived_to_base_conv, Loc, Range, DeclarationName(),
BasePath, IgnoreAccess);
}
/// @brief Builds a string representing ambiguous paths from a
/// specific derived class to different subobjects of the same base
/// class.
///
/// This function builds a string that can be used in error messages
/// to show the different paths that one can take through the
/// inheritance hierarchy to go from the derived class to different
/// subobjects of a base class. The result looks something like this:
/// @code
/// struct D -> struct B -> struct A
/// struct D -> struct C -> struct A
/// @endcode
std::string Sema::getAmbiguousPathsDisplayString(CXXBasePaths &Paths) {
std::string PathDisplayStr;
std::set<unsigned> DisplayedPaths;
for (CXXBasePaths::paths_iterator Path = Paths.begin();
Path != Paths.end(); ++Path) {
if (DisplayedPaths.insert(Path->back().SubobjectNumber).second) {
// We haven't displayed a path to this particular base
// class subobject yet.
PathDisplayStr += "\n ";
PathDisplayStr += Context.getTypeDeclType(Paths.getOrigin()).getAsString();
for (CXXBasePath::const_iterator Element = Path->begin();
Element != Path->end(); ++Element)
PathDisplayStr += " -> " + Element->Base->getType().getAsString();
}
}
return PathDisplayStr;
}
//===----------------------------------------------------------------------===//
// C++ class member Handling
//===----------------------------------------------------------------------===//
/// ActOnAccessSpecifier - Parsed an access specifier followed by a colon.
bool Sema::ActOnAccessSpecifier(AccessSpecifier Access,
SourceLocation ASLoc,
SourceLocation ColonLoc,
AttributeList *Attrs) {
assert(Access != AS_none && "Invalid kind for syntactic access specifier!");
AccessSpecDecl *ASDecl = AccessSpecDecl::Create(Context, Access, CurContext,
ASLoc, ColonLoc);
CurContext->addHiddenDecl(ASDecl);
return ProcessAccessDeclAttributeList(ASDecl, Attrs);
}
/// CheckOverrideControl - Check C++11 override control semantics.
void Sema::CheckOverrideControl(NamedDecl *D) {
if (D->isInvalidDecl())
return;
// We only care about "override" and "final" declarations.
if (!D->hasAttr<OverrideAttr>() && !D->hasAttr<FinalAttr>())
return;
CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D);
// We can't check dependent instance methods.
if (MD && MD->isInstance() &&
(MD->getParent()->hasAnyDependentBases() ||
MD->getType()->isDependentType()))
return;
if (MD && !MD->isVirtual()) {
// If we have a non-virtual method, check if if hides a virtual method.
// (In that case, it's most likely the method has the wrong type.)
SmallVector<CXXMethodDecl *, 8> OverloadedMethods;
FindHiddenVirtualMethods(MD, OverloadedMethods);
if (!OverloadedMethods.empty()) {
if (OverrideAttr *OA = D->getAttr<OverrideAttr>()) {
Diag(OA->getLocation(),
diag::override_keyword_hides_virtual_member_function)
<< "override" << (OverloadedMethods.size() > 1);
} else if (FinalAttr *FA = D->getAttr<FinalAttr>()) {
Diag(FA->getLocation(),
diag::override_keyword_hides_virtual_member_function)
<< (FA->isSpelledAsSealed() ? "sealed" : "final")
<< (OverloadedMethods.size() > 1);
}
NoteHiddenVirtualMethods(MD, OverloadedMethods);
MD->setInvalidDecl();
return;
}
// Fall through into the general case diagnostic.
// FIXME: We might want to attempt typo correction here.
}
if (!MD || !MD->isVirtual()) {
if (OverrideAttr *OA = D->getAttr<OverrideAttr>()) {
Diag(OA->getLocation(),
diag::override_keyword_only_allowed_on_virtual_member_functions)
<< "override" << FixItHint::CreateRemoval(OA->getLocation());
D->dropAttr<OverrideAttr>();
}
if (FinalAttr *FA = D->getAttr<FinalAttr>()) {
Diag(FA->getLocation(),
diag::override_keyword_only_allowed_on_virtual_member_functions)
<< (FA->isSpelledAsSealed() ? "sealed" : "final")
<< FixItHint::CreateRemoval(FA->getLocation());
D->dropAttr<FinalAttr>();
}
return;
}
// C++11 [class.virtual]p5:
// If a function is marked with the virt-specifier override and
// does not override a member function of a base class, the program is
// ill-formed.
bool HasOverriddenMethods =
MD->begin_overridden_methods() != MD->end_overridden_methods();
if (MD->hasAttr<OverrideAttr>() && !HasOverriddenMethods)
Diag(MD->getLocation(), diag::err_function_marked_override_not_overriding)
<< MD->getDeclName();
}
void Sema::DiagnoseAbsenceOfOverrideControl(NamedDecl *D) {
if (D->isInvalidDecl() || D->hasAttr<OverrideAttr>())
return;
CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D);
if (!MD || MD->isImplicit() || MD->hasAttr<FinalAttr>())
return;
SourceLocation Loc = MD->getLocation();
SourceLocation SpellingLoc = Loc;
if (getSourceManager().isMacroArgExpansion(Loc))
SpellingLoc = getSourceManager().getImmediateExpansionRange(Loc).first;
SpellingLoc = getSourceManager().getSpellingLoc(SpellingLoc);
if (SpellingLoc.isValid() && getSourceManager().isInSystemHeader(SpellingLoc))
return;
if (MD->size_overridden_methods() > 0) {
unsigned DiagID = isa<CXXDestructorDecl>(MD)
? diag::warn_destructor_marked_not_override_overriding
: diag::warn_function_marked_not_override_overriding;
Diag(MD->getLocation(), DiagID) << MD->getDeclName();
const CXXMethodDecl *OMD = *MD->begin_overridden_methods();
Diag(OMD->getLocation(), diag::note_overridden_virtual_function);
}
}
/// CheckIfOverriddenFunctionIsMarkedFinal - Checks whether a virtual member
/// function overrides a virtual member function marked 'final', according to
/// C++11 [class.virtual]p4.
bool Sema::CheckIfOverriddenFunctionIsMarkedFinal(const CXXMethodDecl *New,
const CXXMethodDecl *Old) {
FinalAttr *FA = Old->getAttr<FinalAttr>();
if (!FA)
return false;
Diag(New->getLocation(), diag::err_final_function_overridden)
<< New->getDeclName()
<< FA->isSpelledAsSealed();
Diag(Old->getLocation(), diag::note_overridden_virtual_function);
return true;
}
static bool InitializationHasSideEffects(const FieldDecl &FD) {
const Type *T = FD.getType()->getBaseElementTypeUnsafe();
// FIXME: Destruction of ObjC lifetime types has side-effects.
if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
return !RD->isCompleteDefinition() ||
!RD->hasTrivialDefaultConstructor() ||
!RD->hasTrivialDestructor();
return false;
}
static AttributeList *getMSPropertyAttr(AttributeList *list) {
for (AttributeList *it = list; it != nullptr; it = it->getNext())
if (it->isDeclspecPropertyAttribute())
return it;
return nullptr;
}
// Check if there is a field shadowing.
void Sema::CheckShadowInheritedFields(const SourceLocation &Loc,
DeclarationName FieldName,
const CXXRecordDecl *RD) {
if (Diags.isIgnored(diag::warn_shadow_field, Loc))
return;
// To record a shadowed field in a base
std::map<CXXRecordDecl*, NamedDecl*> Bases;
auto FieldShadowed = [&](const CXXBaseSpecifier *Specifier,
CXXBasePath &Path) {
const auto Base = Specifier->getType()->getAsCXXRecordDecl();
// Record an ambiguous path directly
if (Bases.find(Base) != Bases.end())
return true;
for (const auto Field : Base->lookup(FieldName)) {
if ((isa<FieldDecl>(Field) || isa<IndirectFieldDecl>(Field)) &&
Field->getAccess() != AS_private) {
assert(Field->getAccess() != AS_none);
assert(Bases.find(Base) == Bases.end());
Bases[Base] = Field;
return true;
}
}
return false;
};
CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
/*DetectVirtual=*/true);
if (!RD->lookupInBases(FieldShadowed, Paths))
return;
for (const auto &P : Paths) {
auto Base = P.back().Base->getType()->getAsCXXRecordDecl();
auto It = Bases.find(Base);
// Skip duplicated bases
if (It == Bases.end())
continue;
auto BaseField = It->second;
assert(BaseField->getAccess() != AS_private);
if (AS_none !=
CXXRecordDecl::MergeAccess(P.Access, BaseField->getAccess())) {
Diag(Loc, diag::warn_shadow_field)
<< FieldName.getAsString() << RD->getName() << Base->getName();
Diag(BaseField->getLocation(), diag::note_shadow_field);
Bases.erase(It);
}
}
}
/// ActOnCXXMemberDeclarator - This is invoked when a C++ class member
/// declarator is parsed. 'AS' is the access specifier, 'BW' specifies the
/// bitfield width if there is one, 'InitExpr' specifies the initializer if
/// one has been parsed, and 'InitStyle' is set if an in-class initializer is
/// present (but parsing it has been deferred).
NamedDecl *
Sema::ActOnCXXMemberDeclarator(Scope *S, AccessSpecifier AS, Declarator &D,
MultiTemplateParamsArg TemplateParameterLists,
Expr *BW, const VirtSpecifiers &VS,
InClassInitStyle InitStyle) {
const DeclSpec &DS = D.getDeclSpec();
DeclarationNameInfo NameInfo = GetNameForDeclarator(D);
DeclarationName Name = NameInfo.getName();
SourceLocation Loc = NameInfo.getLoc();
// For anonymous bitfields, the location should point to the type.
if (Loc.isInvalid())
Loc = D.getLocStart();
Expr *BitWidth = static_cast<Expr*>(BW);
assert(isa<CXXRecordDecl>(CurContext));
assert(!DS.isFriendSpecified());
bool isFunc = D.isDeclarationOfFunction();
if (cast<CXXRecordDecl>(CurContext)->isInterface()) {
// The Microsoft extension __interface only permits public member functions
// and prohibits constructors, destructors, operators, non-public member
// functions, static methods and data members.
unsigned InvalidDecl;
bool ShowDeclName = true;
if (!isFunc)
InvalidDecl = (DS.getStorageClassSpec() == DeclSpec::SCS_typedef) ? 0 : 1;
else if (AS != AS_public)
InvalidDecl = 2;
else if (DS.getStorageClassSpec() == DeclSpec::SCS_static)
InvalidDecl = 3;
else switch (Name.getNameKind()) {
case DeclarationName::CXXConstructorName:
InvalidDecl = 4;
ShowDeclName = false;
break;
case DeclarationName::CXXDestructorName:
InvalidDecl = 5;
ShowDeclName = false;
break;
case DeclarationName::CXXOperatorName:
case DeclarationName::CXXConversionFunctionName:
InvalidDecl = 6;
break;
default:
InvalidDecl = 0;
break;
}
if (InvalidDecl) {
if (ShowDeclName)
Diag(Loc, diag::err_invalid_member_in_interface)
<< (InvalidDecl-1) << Name;
else
Diag(Loc, diag::err_invalid_member_in_interface)
<< (InvalidDecl-1) << "";
return nullptr;
}
}
// C++ 9.2p6: A member shall not be declared to have automatic storage
// duration (auto, register) or with the extern storage-class-specifier.
// C++ 7.1.1p8: The mutable specifier can be applied only to names of class
// data members and cannot be applied to names declared const or static,
// and cannot be applied to reference members.
switch (DS.getStorageClassSpec()) {
case DeclSpec::SCS_unspecified:
case DeclSpec::SCS_typedef:
case DeclSpec::SCS_static:
break;
case DeclSpec::SCS_mutable:
if (isFunc) {
Diag(DS.getStorageClassSpecLoc(), diag::err_mutable_function);
// FIXME: It would be nicer if the keyword was ignored only for this
// declarator. Otherwise we could get follow-up errors.
D.getMutableDeclSpec().ClearStorageClassSpecs();
}
break;
default:
Diag(DS.getStorageClassSpecLoc(),
diag::err_storageclass_invalid_for_member);
D.getMutableDeclSpec().ClearStorageClassSpecs();
break;
}
bool isInstField = ((DS.getStorageClassSpec() == DeclSpec::SCS_unspecified ||
DS.getStorageClassSpec() == DeclSpec::SCS_mutable) &&
!isFunc);
if (DS.isConstexprSpecified() && isInstField) {
SemaDiagnosticBuilder B =
Diag(DS.getConstexprSpecLoc(), diag::err_invalid_constexpr_member);
SourceLocation ConstexprLoc = DS.getConstexprSpecLoc();
if (InitStyle == ICIS_NoInit) {
B << 0 << 0;
if (D.getDeclSpec().getTypeQualifiers() & DeclSpec::TQ_const)
B << FixItHint::CreateRemoval(ConstexprLoc);
else {
B << FixItHint::CreateReplacement(ConstexprLoc, "const");
D.getMutableDeclSpec().ClearConstexprSpec();
const char *PrevSpec;
unsigned DiagID;
bool Failed = D.getMutableDeclSpec().SetTypeQual(
DeclSpec::TQ_const, ConstexprLoc, PrevSpec, DiagID, getLangOpts());
(void)Failed;
assert(!Failed && "Making a constexpr member const shouldn't fail");
}
} else {
B << 1;
const char *PrevSpec;
unsigned DiagID;
if (D.getMutableDeclSpec().SetStorageClassSpec(
*this, DeclSpec::SCS_static, ConstexprLoc, PrevSpec, DiagID,
Context.getPrintingPolicy())) {
assert(DS.getStorageClassSpec() == DeclSpec::SCS_mutable &&
"This is the only DeclSpec that should fail to be applied");
B << 1;
} else {
B << 0 << FixItHint::CreateInsertion(ConstexprLoc, "static ");
isInstField = false;
}
}
}
NamedDecl *Member;
if (isInstField) {
CXXScopeSpec &SS = D.getCXXScopeSpec();
// Data members must have identifiers for names.
if (!Name.isIdentifier()) {
Diag(Loc, diag::err_bad_variable_name)
<< Name;
return nullptr;
}
IdentifierInfo *II = Name.getAsIdentifierInfo();
// Member field could not be with "template" keyword.
// So TemplateParameterLists should be empty in this case.
if (TemplateParameterLists.size()) {
TemplateParameterList* TemplateParams = TemplateParameterLists[0];
if (TemplateParams->size()) {
// There is no such thing as a member field template.
Diag(D.getIdentifierLoc(), diag::err_template_member)
<< II
<< SourceRange(TemplateParams->getTemplateLoc(),
TemplateParams->getRAngleLoc());
} else {
// There is an extraneous 'template<>' for this member.
Diag(TemplateParams->getTemplateLoc(),
diag::err_template_member_noparams)
<< II
<< SourceRange(TemplateParams->getTemplateLoc(),
TemplateParams->getRAngleLoc());
}
return nullptr;
}
if (SS.isSet() && !SS.isInvalid()) {
// The user provided a superfluous scope specifier inside a class
// definition:
//
// class X {
// int X::member;
// };
if (DeclContext *DC = computeDeclContext(SS, false))
diagnoseQualifiedDeclaration(SS, DC, Name, D.getIdentifierLoc());
else
Diag(D.getIdentifierLoc(), diag::err_member_qualification)
<< Name << SS.getRange();
SS.clear();
}
AttributeList *MSPropertyAttr =
getMSPropertyAttr(D.getDeclSpec().getAttributes().getList());
if (MSPropertyAttr) {
Member = HandleMSProperty(S, cast<CXXRecordDecl>(CurContext), Loc, D,
BitWidth, InitStyle, AS, MSPropertyAttr);
if (!Member)
return nullptr;
isInstField = false;
} else {
Member = HandleField(S, cast<CXXRecordDecl>(CurContext), Loc, D,
BitWidth, InitStyle, AS);
if (!Member)
return nullptr;
}
CheckShadowInheritedFields(Loc, Name, cast<CXXRecordDecl>(CurContext));
} else {
Member = HandleDeclarator(S, D, TemplateParameterLists);
if (!Member)
return nullptr;
// Non-instance-fields can't have a bitfield.
if (BitWidth) {
if (Member->isInvalidDecl()) {
// don't emit another diagnostic.
} else if (isa<VarDecl>(Member) || isa<VarTemplateDecl>(Member)) {
// C++ 9.6p3: A bit-field shall not be a static member.
// "static member 'A' cannot be a bit-field"
Diag(Loc, diag::err_static_not_bitfield)
<< Name << BitWidth->getSourceRange();
} else if (isa<TypedefDecl>(Member)) {
// "typedef member 'x' cannot be a bit-field"
Diag(Loc, diag::err_typedef_not_bitfield)
<< Name << BitWidth->getSourceRange();
} else {
// A function typedef ("typedef int f(); f a;").
// C++ 9.6p3: A bit-field shall have integral or enumeration type.
Diag(Loc, diag::err_not_integral_type_bitfield)
<< Name << cast<ValueDecl>(Member)->getType()
<< BitWidth->getSourceRange();
}
BitWidth = nullptr;
Member->setInvalidDecl();
}
Member->setAccess(AS);
// If we have declared a member function template or static data member
// template, set the access of the templated declaration as well.
if (FunctionTemplateDecl *FunTmpl = dyn_cast<FunctionTemplateDecl>(Member))
FunTmpl->getTemplatedDecl()->setAccess(AS);
else if (VarTemplateDecl *VarTmpl = dyn_cast<VarTemplateDecl>(Member))
VarTmpl->getTemplatedDecl()->setAccess(AS);
}
if (VS.isOverrideSpecified())
Member->addAttr(new (Context) OverrideAttr(VS.getOverrideLoc(), Context, 0));
if (VS.isFinalSpecified())
Member->addAttr(new (Context) FinalAttr(VS.getFinalLoc(), Context,
VS.isFinalSpelledSealed()));
if (VS.getLastLocation().isValid()) {
// Update the end location of a method that has a virt-specifiers.
if (CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(Member))
MD->setRangeEnd(VS.getLastLocation());
}
CheckOverrideControl(Member);
assert((Name || isInstField) && "No identifier for non-field ?");
if (isInstField) {
FieldDecl *FD = cast<FieldDecl>(Member);
FieldCollector->Add(FD);
if (!Diags.isIgnored(diag::warn_unused_private_field, FD->getLocation())) {
// Remember all explicit private FieldDecls that have a name, no side
// effects and are not part of a dependent type declaration.
if (!FD->isImplicit() && FD->getDeclName() &&
FD->getAccess() == AS_private &&
!FD->hasAttr<UnusedAttr>() &&
!FD->getParent()->isDependentContext() &&
!InitializationHasSideEffects(*FD))
UnusedPrivateFields.insert(FD);
}
}
return Member;
}
namespace {
class UninitializedFieldVisitor
: public EvaluatedExprVisitor<UninitializedFieldVisitor> {
Sema &S;
// List of Decls to generate a warning on. Also remove Decls that become
// initialized.
llvm::SmallPtrSetImpl<ValueDecl*> &Decls;
// List of base classes of the record. Classes are removed after their
// initializers.
llvm::SmallPtrSetImpl<QualType> &BaseClasses;
// Vector of decls to be removed from the Decl set prior to visiting the
// nodes. These Decls may have been initialized in the prior initializer.
llvm::SmallVector<ValueDecl*, 4> DeclsToRemove;
// If non-null, add a note to the warning pointing back to the constructor.
const CXXConstructorDecl *Constructor;
// Variables to hold state when processing an initializer list. When
// InitList is true, special case initialization of FieldDecls matching
// InitListFieldDecl.
bool InitList;
FieldDecl *InitListFieldDecl;
llvm::SmallVector<unsigned, 4> InitFieldIndex;
public:
typedef EvaluatedExprVisitor<UninitializedFieldVisitor> Inherited;
UninitializedFieldVisitor(Sema &S,
llvm::SmallPtrSetImpl<ValueDecl*> &Decls,
llvm::SmallPtrSetImpl<QualType> &BaseClasses)
: Inherited(S.Context), S(S), Decls(Decls), BaseClasses(BaseClasses),
Constructor(nullptr), InitList(false), InitListFieldDecl(nullptr) {}
// Returns true if the use of ME is not an uninitialized use.
bool IsInitListMemberExprInitialized(MemberExpr *ME,
bool CheckReferenceOnly) {
llvm::SmallVector<FieldDecl*, 4> Fields;
bool ReferenceField = false;
while (ME) {
FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl());
if (!FD)
return false;
Fields.push_back(FD);
if (FD->getType()->isReferenceType())
ReferenceField = true;
ME = dyn_cast<MemberExpr>(ME->getBase()->IgnoreParenImpCasts());
}
// Binding a reference to an unintialized field is not an
// uninitialized use.
if (CheckReferenceOnly && !ReferenceField)
return true;
llvm::SmallVector<unsigned, 4> UsedFieldIndex;
// Discard the first field since it is the field decl that is being
// initialized.
for (auto I = Fields.rbegin() + 1, E = Fields.rend(); I != E; ++I) {
UsedFieldIndex.push_back((*I)->getFieldIndex());
}
for (auto UsedIter = UsedFieldIndex.begin(),
UsedEnd = UsedFieldIndex.end(),
OrigIter = InitFieldIndex.begin(),
OrigEnd = InitFieldIndex.end();
UsedIter != UsedEnd && OrigIter != OrigEnd; ++UsedIter, ++OrigIter) {
if (*UsedIter < *OrigIter)
return true;
if (*UsedIter > *OrigIter)
break;
}
return false;
}
void HandleMemberExpr(MemberExpr *ME, bool CheckReferenceOnly,
bool AddressOf) {
if (isa<EnumConstantDecl>(ME->getMemberDecl()))
return;
// FieldME is the inner-most MemberExpr that is not an anonymous struct
// or union.
MemberExpr *FieldME = ME;
bool AllPODFields = FieldME->getType().isPODType(S.Context);
Expr *Base = ME;
while (MemberExpr *SubME =
dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) {
if (isa<VarDecl>(SubME->getMemberDecl()))
return;
if (FieldDecl *FD = dyn_cast<FieldDecl>(SubME->getMemberDecl()))
if (!FD->isAnonymousStructOrUnion())
FieldME = SubME;
if (!FieldME->getType().isPODType(S.Context))
AllPODFields = false;
Base = SubME->getBase();
}
if (!isa<CXXThisExpr>(Base->IgnoreParenImpCasts()))
return;
if (AddressOf && AllPODFields)
return;
ValueDecl* FoundVD = FieldME->getMemberDecl();
if (ImplicitCastExpr *BaseCast = dyn_cast<ImplicitCastExpr>(Base)) {
while (isa<ImplicitCastExpr>(BaseCast->getSubExpr())) {
BaseCast = cast<ImplicitCastExpr>(BaseCast->getSubExpr());
}
if (BaseCast->getCastKind() == CK_UncheckedDerivedToBase) {
QualType T = BaseCast->getType();
if (T->isPointerType() &&
BaseClasses.count(T->getPointeeType())) {
S.Diag(FieldME->getExprLoc(), diag::warn_base_class_is_uninit)
<< T->getPointeeType() << FoundVD;
}
}
}
if (!Decls.count(FoundVD))
return;
const bool IsReference = FoundVD->getType()->isReferenceType();
if (InitList && !AddressOf && FoundVD == InitListFieldDecl) {
// Special checking for initializer lists.
if (IsInitListMemberExprInitialized(ME, CheckReferenceOnly)) {
return;
}
} else {
// Prevent double warnings on use of unbounded references.
if (CheckReferenceOnly && !IsReference)
return;
}
unsigned diag = IsReference
? diag::warn_reference_field_is_uninit
: diag::warn_field_is_uninit;
S.Diag(FieldME->getExprLoc(), diag) << FoundVD;
if (Constructor)
S.Diag(Constructor->getLocation(),
diag::note_uninit_in_this_constructor)
<< (Constructor->isDefaultConstructor() && Constructor->isImplicit());
}
void HandleValue(Expr *E, bool AddressOf) {
E = E->IgnoreParens();
if (MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
HandleMemberExpr(ME, false /*CheckReferenceOnly*/,
AddressOf /*AddressOf*/);
return;
}
if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) {
Visit(CO->getCond());
HandleValue(CO->getTrueExpr(), AddressOf);
HandleValue(CO->getFalseExpr(), AddressOf);
return;
}
if (BinaryConditionalOperator *BCO =
dyn_cast<BinaryConditionalOperator>(E)) {
Visit(BCO->getCond());
HandleValue(BCO->getFalseExpr(), AddressOf);
return;
}
if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) {
HandleValue(OVE->getSourceExpr(), AddressOf);
return;
}
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) {
switch (BO->getOpcode()) {
default:
break;
case(BO_PtrMemD):
case(BO_PtrMemI):
HandleValue(BO->getLHS(), AddressOf);
Visit(BO->getRHS());
return;
case(BO_Comma):
Visit(BO->getLHS());
HandleValue(BO->getRHS(), AddressOf);
return;
}
}
Visit(E);
}
void CheckInitListExpr(InitListExpr *ILE) {
InitFieldIndex.push_back(0);
for (auto Child : ILE->children()) {
if (InitListExpr *SubList = dyn_cast<InitListExpr>(Child)) {
CheckInitListExpr(SubList);
} else {
Visit(Child);
}
++InitFieldIndex.back();
}
InitFieldIndex.pop_back();
}
void CheckInitializer(Expr *E, const CXXConstructorDecl *FieldConstructor,
FieldDecl *Field, const Type *BaseClass) {
// Remove Decls that may have been initialized in the previous
// initializer.
for (ValueDecl* VD : DeclsToRemove)
Decls.erase(VD);
DeclsToRemove.clear();
Constructor = FieldConstructor;
InitListExpr *ILE = dyn_cast<InitListExpr>(E);
if (ILE && Field) {
InitList = true;
InitListFieldDecl = Field;
InitFieldIndex.clear();
CheckInitListExpr(ILE);
} else {
InitList = false;
Visit(E);
}
if (Field)
Decls.erase(Field);
if (BaseClass)
BaseClasses.erase(BaseClass->getCanonicalTypeInternal());
}
void VisitMemberExpr(MemberExpr *ME) {
// All uses of unbounded reference fields will warn.
HandleMemberExpr(ME, true /*CheckReferenceOnly*/, false /*AddressOf*/);
}
void VisitImplicitCastExpr(ImplicitCastExpr *E) {
if (E->getCastKind() == CK_LValueToRValue) {
HandleValue(E->getSubExpr(), false /*AddressOf*/);
return;
}
Inherited::VisitImplicitCastExpr(E);
}
void VisitCXXConstructExpr(CXXConstructExpr *E) {
if (E->getConstructor()->isCopyConstructor()) {
Expr *ArgExpr = E->getArg(0);
if (InitListExpr *ILE = dyn_cast<InitListExpr>(ArgExpr))
if (ILE->getNumInits() == 1)
ArgExpr = ILE->getInit(0);
if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(ArgExpr))
if (ICE->getCastKind() == CK_NoOp)
ArgExpr = ICE->getSubExpr();
HandleValue(ArgExpr, false /*AddressOf*/);
return;
}
Inherited::VisitCXXConstructExpr(E);
}
void VisitCXXMemberCallExpr(CXXMemberCallExpr *E) {
Expr *Callee = E->getCallee();
if (isa<MemberExpr>(Callee)) {
HandleValue(Callee, false /*AddressOf*/);
for (auto Arg : E->arguments())
Visit(Arg);
return;
}
Inherited::VisitCXXMemberCallExpr(E);
}
void VisitCallExpr(CallExpr *E) {
// Treat std::move as a use.
if (E->getNumArgs() == 1) {
if (FunctionDecl *FD = E->getDirectCallee()) {
if (FD->isInStdNamespace() && FD->getIdentifier() &&
FD->getIdentifier()->isStr("move")) {
HandleValue(E->getArg(0), false /*AddressOf*/);
return;
}
}
}
Inherited::VisitCallExpr(E);
}
void VisitCXXOperatorCallExpr(CXXOperatorCallExpr *E) {
Expr *Callee = E->getCallee();
if (isa<UnresolvedLookupExpr>(Callee))
return Inherited::VisitCXXOperatorCallExpr(E);
Visit(Callee);
for (auto Arg : E->arguments())
HandleValue(Arg->IgnoreParenImpCasts(), false /*AddressOf*/);
}
void VisitBinaryOperator(BinaryOperator *E) {
// If a field assignment is detected, remove the field from the
// uninitiailized field set.
if (E->getOpcode() == BO_Assign)
if (MemberExpr *ME = dyn_cast<MemberExpr>(E->getLHS()))
if (FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
if (!FD->getType()->isReferenceType())
DeclsToRemove.push_back(FD);
if (E->isCompoundAssignmentOp()) {
HandleValue(E->getLHS(), false /*AddressOf*/);
Visit(E->getRHS());
return;
}
Inherited::VisitBinaryOperator(E);
}
void VisitUnaryOperator(UnaryOperator *E) {
if (E->isIncrementDecrementOp()) {
HandleValue(E->getSubExpr(), false /*AddressOf*/);
return;
}
if (E->getOpcode() == UO_AddrOf) {
if (MemberExpr *ME = dyn_cast<MemberExpr>(E->getSubExpr())) {
HandleValue(ME->getBase(), true /*AddressOf*/);
return;
}
}
Inherited::VisitUnaryOperator(E);
}
};
// Diagnose value-uses of fields to initialize themselves, e.g.
// foo(foo)
// where foo is not also a parameter to the constructor.
// Also diagnose across field uninitialized use such as
// x(y), y(x)
// TODO: implement -Wuninitialized and fold this into that framework.
static void DiagnoseUninitializedFields(
Sema &SemaRef, const CXXConstructorDecl *Constructor) {
if (SemaRef.getDiagnostics().isIgnored(diag::warn_field_is_uninit,
Constructor->getLocation())) {
return;
}
if (Constructor->isInvalidDecl())
return;
const CXXRecordDecl *RD = Constructor->getParent();
if (RD->getDescribedClassTemplate())
return;
// Holds fields that are uninitialized.
llvm::SmallPtrSet<ValueDecl*, 4> UninitializedFields;
// At the beginning, all fields are uninitialized.
for (auto *I : RD->decls()) {
if (auto *FD = dyn_cast<FieldDecl>(I)) {
UninitializedFields.insert(FD);
} else if (auto *IFD = dyn_cast<IndirectFieldDecl>(I)) {
UninitializedFields.insert(IFD->getAnonField());
}
}
llvm::SmallPtrSet<QualType, 4> UninitializedBaseClasses;
for (auto I : RD->bases())
UninitializedBaseClasses.insert(I.getType().getCanonicalType());
if (UninitializedFields.empty() && UninitializedBaseClasses.empty())
return;
UninitializedFieldVisitor UninitializedChecker(SemaRef,
UninitializedFields,
UninitializedBaseClasses);
for (const auto *FieldInit : Constructor->inits()) {
if (UninitializedFields.empty() && UninitializedBaseClasses.empty())
break;
Expr *InitExpr = FieldInit->getInit();
if (!InitExpr)
continue;
if (CXXDefaultInitExpr *Default =
dyn_cast<CXXDefaultInitExpr>(InitExpr)) {
InitExpr = Default->getExpr();
if (!InitExpr)
continue;
// In class initializers will point to the constructor.
UninitializedChecker.CheckInitializer(InitExpr, Constructor,
FieldInit->getAnyMember(),
FieldInit->getBaseClass());
} else {
UninitializedChecker.CheckInitializer(InitExpr, nullptr,
FieldInit->getAnyMember(),
FieldInit->getBaseClass());
}
}
}
} // namespace
/// \brief Enter a new C++ default initializer scope. After calling this, the
/// caller must call \ref ActOnFinishCXXInClassMemberInitializer, even if
/// parsing or instantiating the initializer failed.
void Sema::ActOnStartCXXInClassMemberInitializer() {
// Create a synthetic function scope to represent the call to the constructor
// that notionally surrounds a use of this initializer.
PushFunctionScope();
}
/// \brief This is invoked after parsing an in-class initializer for a
/// non-static C++ class member, and after instantiating an in-class initializer
/// in a class template. Such actions are deferred until the class is complete.
void Sema::ActOnFinishCXXInClassMemberInitializer(Decl *D,
SourceLocation InitLoc,
Expr *InitExpr) {
// Pop the notional constructor scope we created earlier.
PopFunctionScopeInfo(nullptr, D);
FieldDecl *FD = dyn_cast<FieldDecl>(D);
assert((isa<MSPropertyDecl>(D) || FD->getInClassInitStyle() != ICIS_NoInit) &&
"must set init style when field is created");
if (!InitExpr) {
D->setInvalidDecl();
if (FD)
FD->removeInClassInitializer();
return;
}
if (DiagnoseUnexpandedParameterPack(InitExpr, UPPC_Initializer)) {
FD->setInvalidDecl();
FD->removeInClassInitializer();
return;
}
ExprResult Init = InitExpr;
if (!FD->getType()->isDependentType() && !InitExpr->isTypeDependent()) {
InitializedEntity Entity = InitializedEntity::InitializeMember(FD);
InitializationKind Kind = FD->getInClassInitStyle() == ICIS_ListInit
? InitializationKind::CreateDirectList(InitExpr->getLocStart())
: InitializationKind::CreateCopy(InitExpr->getLocStart(), InitLoc);
InitializationSequence Seq(*this, Entity, Kind, InitExpr);
Init = Seq.Perform(*this, Entity, Kind, InitExpr);
if (Init.isInvalid()) {
FD->setInvalidDecl();
return;
}
}
// C++11 [class.base.init]p7:
// The initialization of each base and member constitutes a
// full-expression.
Init = ActOnFinishFullExpr(Init.get(), InitLoc);
if (Init.isInvalid()) {
FD->setInvalidDecl();
return;
}
InitExpr = Init.get();
FD->setInClassInitializer(InitExpr);
}
/// \brief Find the direct and/or virtual base specifiers that
/// correspond to the given base type, for use in base initialization
/// within a constructor.
static bool FindBaseInitializer(Sema &SemaRef,
CXXRecordDecl *ClassDecl,
QualType BaseType,
const CXXBaseSpecifier *&DirectBaseSpec,
const CXXBaseSpecifier *&VirtualBaseSpec) {
// First, check for a direct base class.
DirectBaseSpec = nullptr;
for (const auto &Base : ClassDecl->bases()) {
if (SemaRef.Context.hasSameUnqualifiedType(BaseType, Base.getType())) {
// We found a direct base of this type. That's what we're
// initializing.
DirectBaseSpec = &Base;
break;
}
}
// Check for a virtual base class.
// FIXME: We might be able to short-circuit this if we know in advance that
// there are no virtual bases.
VirtualBaseSpec = nullptr;
if (!DirectBaseSpec || !DirectBaseSpec->isVirtual()) {
// We haven't found a base yet; search the class hierarchy for a
// virtual base class.
CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
/*DetectVirtual=*/false);
if (SemaRef.IsDerivedFrom(ClassDecl->getLocation(),
SemaRef.Context.getTypeDeclType(ClassDecl),
BaseType, Paths)) {
for (CXXBasePaths::paths_iterator Path = Paths.begin();
Path != Paths.end(); ++Path) {
if (Path->back().Base->isVirtual()) {
VirtualBaseSpec = Path->back().Base;
break;
}
}
}
}
return DirectBaseSpec || VirtualBaseSpec;
}
/// \brief Handle a C++ member initializer using braced-init-list syntax.
MemInitResult
Sema::ActOnMemInitializer(Decl *ConstructorD,
Scope *S,
CXXScopeSpec &SS,
IdentifierInfo *MemberOrBase,
ParsedType TemplateTypeTy,
const DeclSpec &DS,
SourceLocation IdLoc,
Expr *InitList,
SourceLocation EllipsisLoc) {
return BuildMemInitializer(ConstructorD, S, SS, MemberOrBase, TemplateTypeTy,
DS, IdLoc, InitList,
EllipsisLoc);
}
/// \brief Handle a C++ member initializer using parentheses syntax.
MemInitResult
Sema::ActOnMemInitializer(Decl *ConstructorD,
Scope *S,
CXXScopeSpec &SS,
IdentifierInfo *MemberOrBase,
ParsedType TemplateTypeTy,
const DeclSpec &DS,
SourceLocation IdLoc,
SourceLocation LParenLoc,
ArrayRef<Expr *> Args,
SourceLocation RParenLoc,
SourceLocation EllipsisLoc) {
Expr *List = new (Context) ParenListExpr(Context, LParenLoc,
Args, RParenLoc);
return BuildMemInitializer(ConstructorD, S, SS, MemberOrBase, TemplateTypeTy,
DS, IdLoc, List, EllipsisLoc);
}
namespace {
// Callback to only accept typo corrections that can be a valid C++ member
// intializer: either a non-static field member or a base class.
class MemInitializerValidatorCCC : public CorrectionCandidateCallback {
public:
explicit MemInitializerValidatorCCC(CXXRecordDecl *ClassDecl)
: ClassDecl(ClassDecl) {}
bool ValidateCandidate(const TypoCorrection &candidate) override {
if (NamedDecl *ND = candidate.getCorrectionDecl()) {
if (FieldDecl *Member = dyn_cast<FieldDecl>(ND))
return Member->getDeclContext()->getRedeclContext()->Equals(ClassDecl);
return isa<TypeDecl>(ND);
}
return false;
}
private:
CXXRecordDecl *ClassDecl;
};
}
/// \brief Handle a C++ member initializer.
MemInitResult
Sema::BuildMemInitializer(Decl *ConstructorD,
Scope *S,
CXXScopeSpec &SS,
IdentifierInfo *MemberOrBase,
ParsedType TemplateTypeTy,
const DeclSpec &DS,
SourceLocation IdLoc,
Expr *Init,
SourceLocation EllipsisLoc) {
ExprResult Res = CorrectDelayedTyposInExpr(Init);
if (!Res.isUsable())
return true;
Init = Res.get();
if (!ConstructorD)
return true;
AdjustDeclIfTemplate(ConstructorD);
CXXConstructorDecl *Constructor
= dyn_cast<CXXConstructorDecl>(ConstructorD);
if (!Constructor) {
// The user wrote a constructor initializer on a function that is
// not a C++ constructor. Ignore the error for now, because we may
// have more member initializers coming; we'll diagnose it just
// once in ActOnMemInitializers.
return true;
}
CXXRecordDecl *ClassDecl = Constructor->getParent();
// C++ [class.base.init]p2:
// Names in a mem-initializer-id are looked up in the scope of the
// constructor's class and, if not found in that scope, are looked
// up in the scope containing the constructor's definition.
// [Note: if the constructor's class contains a member with the
// same name as a direct or virtual base class of the class, a
// mem-initializer-id naming the member or base class and composed
// of a single identifier refers to the class member. A
// mem-initializer-id for the hidden base class may be specified
// using a qualified name. ]
if (!SS.getScopeRep() && !TemplateTypeTy) {
// Look for a member, first.
DeclContext::lookup_result Result = ClassDecl->lookup(MemberOrBase);
if (!Result.empty()) {
ValueDecl *Member;
if ((Member = dyn_cast<FieldDecl>(Result.front())) ||
(Member = dyn_cast<IndirectFieldDecl>(Result.front()))) {
if (EllipsisLoc.isValid())
Diag(EllipsisLoc, diag::err_pack_expansion_member_init)
<< MemberOrBase
<< SourceRange(IdLoc, Init->getSourceRange().getEnd());
return BuildMemberInitializer(Member, Init, IdLoc);
}
}
}
// It didn't name a member, so see if it names a class.
QualType BaseType;
TypeSourceInfo *TInfo = nullptr;
if (TemplateTypeTy) {
BaseType = GetTypeFromParser(TemplateTypeTy, &TInfo);
} else if (DS.getTypeSpecType() == TST_decltype) {
BaseType = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc());
} else if (DS.getTypeSpecType() == TST_decltype_auto) {
Diag(DS.getTypeSpecTypeLoc(), diag::err_decltype_auto_invalid);
return true;
} else {
LookupResult R(*this, MemberOrBase, IdLoc, LookupOrdinaryName);
LookupParsedName(R, S, &SS);
TypeDecl *TyD = R.getAsSingle<TypeDecl>();
if (!TyD) {
if (R.isAmbiguous()) return true;
// We don't want access-control diagnostics here.
R.suppressDiagnostics();
if (SS.isSet() && isDependentScopeSpecifier(SS)) {
bool NotUnknownSpecialization = false;
DeclContext *DC = computeDeclContext(SS, false);
if (CXXRecordDecl *Record = dyn_cast_or_null<CXXRecordDecl>(DC))
NotUnknownSpecialization = !Record->hasAnyDependentBases();
if (!NotUnknownSpecialization) {
// When the scope specifier can refer to a member of an unknown
// specialization, we take it as a type name.
BaseType = CheckTypenameType(ETK_None, SourceLocation(),
SS.getWithLocInContext(Context),
*MemberOrBase, IdLoc);
if (BaseType.isNull())
return true;
TInfo = Context.CreateTypeSourceInfo(BaseType);
DependentNameTypeLoc TL =
TInfo->getTypeLoc().castAs<DependentNameTypeLoc>();
if (!TL.isNull()) {
TL.setNameLoc(IdLoc);
TL.setElaboratedKeywordLoc(SourceLocation());
TL.setQualifierLoc(SS.getWithLocInContext(Context));
}
R.clear();
R.setLookupName(MemberOrBase);
}
}
// If no results were found, try to correct typos.
TypoCorrection Corr;
if (R.empty() && BaseType.isNull() &&
(Corr = CorrectTypo(
R.getLookupNameInfo(), R.getLookupKind(), S, &SS,
llvm::make_unique<MemInitializerValidatorCCC>(ClassDecl),
CTK_ErrorRecovery, ClassDecl))) {
if (FieldDecl *Member = Corr.getCorrectionDeclAs<FieldDecl>()) {
// We have found a non-static data member with a similar
// name to what was typed; complain and initialize that
// member.
diagnoseTypo(Corr,
PDiag(diag::err_mem_init_not_member_or_class_suggest)
<< MemberOrBase << true);
return BuildMemberInitializer(Member, Init, IdLoc);
} else if (TypeDecl *Type = Corr.getCorrectionDeclAs<TypeDecl>()) {
const CXXBaseSpecifier *DirectBaseSpec;
const CXXBaseSpecifier *VirtualBaseSpec;
if (FindBaseInitializer(*this, ClassDecl,
Context.getTypeDeclType(Type),
DirectBaseSpec, VirtualBaseSpec)) {
// We have found a direct or virtual base class with a
// similar name to what was typed; complain and initialize
// that base class.
diagnoseTypo(Corr,
PDiag(diag::err_mem_init_not_member_or_class_suggest)
<< MemberOrBase << false,
PDiag() /*Suppress note, we provide our own.*/);
const CXXBaseSpecifier *BaseSpec = DirectBaseSpec ? DirectBaseSpec
: VirtualBaseSpec;
Diag(BaseSpec->getLocStart(),
diag::note_base_class_specified_here)
<< BaseSpec->getType()
<< BaseSpec->getSourceRange();
TyD = Type;
}
}
}
if (!TyD && BaseType.isNull()) {
Diag(IdLoc, diag::err_mem_init_not_member_or_class)
<< MemberOrBase << SourceRange(IdLoc,Init->getSourceRange().getEnd());
return true;
}
}
if (BaseType.isNull()) {
BaseType = Context.getTypeDeclType(TyD);
MarkAnyDeclReferenced(TyD->getLocation(), TyD, /*OdrUse=*/false);
if (SS.isSet()) {
BaseType = Context.getElaboratedType(ETK_None, SS.getScopeRep(),
BaseType);
TInfo = Context.CreateTypeSourceInfo(BaseType);
ElaboratedTypeLoc TL = TInfo->getTypeLoc().castAs<ElaboratedTypeLoc>();
TL.getNamedTypeLoc().castAs<TypeSpecTypeLoc>().setNameLoc(IdLoc);
TL.setElaboratedKeywordLoc(SourceLocation());
TL.setQualifierLoc(SS.getWithLocInContext(Context));
}
}
}
if (!TInfo)
TInfo = Context.getTrivialTypeSourceInfo(BaseType, IdLoc);
return BuildBaseInitializer(BaseType, TInfo, Init, ClassDecl, EllipsisLoc);
}
/// Checks a member initializer expression for cases where reference (or
/// pointer) members are bound to by-value parameters (or their addresses).
static void CheckForDanglingReferenceOrPointer(Sema &S, ValueDecl *Member,
Expr *Init,
SourceLocation IdLoc) {
QualType MemberTy = Member->getType();
// We only handle pointers and references currently.
// FIXME: Would this be relevant for ObjC object pointers? Or block pointers?
if (!MemberTy->isReferenceType() && !MemberTy->isPointerType())
return;
const bool IsPointer = MemberTy->isPointerType();
if (IsPointer) {
if (const UnaryOperator *Op
= dyn_cast<UnaryOperator>(Init->IgnoreParenImpCasts())) {
// The only case we're worried about with pointers requires taking the
// address.
if (Op->getOpcode() != UO_AddrOf)
return;
Init = Op->getSubExpr();
} else {
// We only handle address-of expression initializers for pointers.
return;
}
}
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Init->IgnoreParens())) {
// We only warn when referring to a non-reference parameter declaration.
const ParmVarDecl *Parameter = dyn_cast<ParmVarDecl>(DRE->getDecl());
if (!Parameter || Parameter->getType()->isReferenceType())
return;
S.Diag(Init->getExprLoc(),
IsPointer ? diag::warn_init_ptr_member_to_parameter_addr
: diag::warn_bind_ref_member_to_parameter)
<< Member << Parameter << Init->getSourceRange();
} else {
// Other initializers are fine.
return;
}
S.Diag(Member->getLocation(), diag::note_ref_or_ptr_member_declared_here)
<< (unsigned)IsPointer;
}
MemInitResult
Sema::BuildMemberInitializer(ValueDecl *Member, Expr *Init,
SourceLocation IdLoc) {
FieldDecl *DirectMember = dyn_cast<FieldDecl>(Member);
IndirectFieldDecl *IndirectMember = dyn_cast<IndirectFieldDecl>(Member);
assert((DirectMember || IndirectMember) &&
"Member must be a FieldDecl or IndirectFieldDecl");
if (DiagnoseUnexpandedParameterPack(Init, UPPC_Initializer))
return true;
if (Member->isInvalidDecl())
return true;
MultiExprArg Args;
if (ParenListExpr *ParenList = dyn_cast<ParenListExpr>(Init)) {
Args = MultiExprArg(ParenList->getExprs(), ParenList->getNumExprs());
} else if (InitListExpr *InitList = dyn_cast<InitListExpr>(Init)) {
Args = MultiExprArg(InitList->getInits(), InitList->getNumInits());
} else {
// Template instantiation doesn't reconstruct ParenListExprs for us.
Args = Init;
}
SourceRange InitRange = Init->getSourceRange();
if (Member->getType()->isDependentType() || Init->isTypeDependent()) {
// Can't check initialization for a member of dependent type or when
// any of the arguments are type-dependent expressions.
DiscardCleanupsInEvaluationContext();
} else {
bool InitList = false;
if (isa<InitListExpr>(Init)) {
InitList = true;
Args = Init;
}
// Initialize the member.
InitializedEntity MemberEntity =
DirectMember ? InitializedEntity::InitializeMember(DirectMember, nullptr)
: InitializedEntity::InitializeMember(IndirectMember,
nullptr);
InitializationKind Kind =
InitList ? InitializationKind::CreateDirectList(IdLoc)
: InitializationKind::CreateDirect(IdLoc, InitRange.getBegin(),
InitRange.getEnd());
InitializationSequence InitSeq(*this, MemberEntity, Kind, Args);
ExprResult MemberInit = InitSeq.Perform(*this, MemberEntity, Kind, Args,
nullptr);
if (MemberInit.isInvalid())
return true;
CheckForDanglingReferenceOrPointer(*this, Member, MemberInit.get(), IdLoc);
// C++11 [class.base.init]p7:
// The initialization of each base and member constitutes a
// full-expression.
MemberInit = ActOnFinishFullExpr(MemberInit.get(), InitRange.getBegin());
if (MemberInit.isInvalid())
return true;
Init = MemberInit.get();
}
if (DirectMember) {
return new (Context) CXXCtorInitializer(Context, DirectMember, IdLoc,
InitRange.getBegin(), Init,
InitRange.getEnd());
} else {
return new (Context) CXXCtorInitializer(Context, IndirectMember, IdLoc,
InitRange.getBegin(), Init,
InitRange.getEnd());
}
}
MemInitResult
Sema::BuildDelegatingInitializer(TypeSourceInfo *TInfo, Expr *Init,
CXXRecordDecl *ClassDecl) {
SourceLocation NameLoc = TInfo->getTypeLoc().getLocalSourceRange().getBegin();
if (!LangOpts.CPlusPlus11)
return Diag(NameLoc, diag::err_delegating_ctor)
<< TInfo->getTypeLoc().getLocalSourceRange();
Diag(NameLoc, diag::warn_cxx98_compat_delegating_ctor);
bool InitList = true;
MultiExprArg Args = Init;
if (ParenListExpr *ParenList = dyn_cast<ParenListExpr>(Init)) {
InitList = false;
Args = MultiExprArg(ParenList->getExprs(), ParenList->getNumExprs());
}
SourceRange InitRange = Init->getSourceRange();
// Initialize the object.
InitializedEntity DelegationEntity = InitializedEntity::InitializeDelegation(
QualType(ClassDecl->getTypeForDecl(), 0));
InitializationKind Kind =
InitList ? InitializationKind::CreateDirectList(NameLoc)
: InitializationKind::CreateDirect(NameLoc, InitRange.getBegin(),
InitRange.getEnd());
InitializationSequence InitSeq(*this, DelegationEntity, Kind, Args);
ExprResult DelegationInit = InitSeq.Perform(*this, DelegationEntity, Kind,
Args, nullptr);
if (DelegationInit.isInvalid())
return true;
assert(cast<CXXConstructExpr>(DelegationInit.get())->getConstructor() &&
"Delegating constructor with no target?");
// C++11 [class.base.init]p7:
// The initialization of each base and member constitutes a
// full-expression.
DelegationInit = ActOnFinishFullExpr(DelegationInit.get(),
InitRange.getBegin());
if (DelegationInit.isInvalid())
return true;
// If we are in a dependent context, template instantiation will
// perform this type-checking again. Just save the arguments that we
// received in a ParenListExpr.
// FIXME: This isn't quite ideal, since our ASTs don't capture all
// of the information that we have about the base
// initializer. However, deconstructing the ASTs is a dicey process,
// and this approach is far more likely to get the corner cases right.
if (CurContext->isDependentContext())
DelegationInit = Init;
return new (Context) CXXCtorInitializer(Context, TInfo, InitRange.getBegin(),
DelegationInit.getAs<Expr>(),
InitRange.getEnd());
}
MemInitResult
Sema::BuildBaseInitializer(QualType BaseType, TypeSourceInfo *BaseTInfo,
Expr *Init, CXXRecordDecl *ClassDecl,
SourceLocation EllipsisLoc) {
SourceLocation BaseLoc
= BaseTInfo->getTypeLoc().getLocalSourceRange().getBegin();
if (!BaseType->isDependentType() && !BaseType->isRecordType())
return Diag(BaseLoc, diag::err_base_init_does_not_name_class)
<< BaseType << BaseTInfo->getTypeLoc().getLocalSourceRange();
// C++ [class.base.init]p2:
// [...] Unless the mem-initializer-id names a nonstatic data
// member of the constructor's class or a direct or virtual base
// of that class, the mem-initializer is ill-formed. A
// mem-initializer-list can initialize a base class using any
// name that denotes that base class type.
bool Dependent = BaseType->isDependentType() || Init->isTypeDependent();
SourceRange InitRange = Init->getSourceRange();
if (EllipsisLoc.isValid()) {
// This is a pack expansion.
if (!BaseType->containsUnexpandedParameterPack()) {
Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs)
<< SourceRange(BaseLoc, InitRange.getEnd());
EllipsisLoc = SourceLocation();
}
} else {
// Check for any unexpanded parameter packs.
if (DiagnoseUnexpandedParameterPack(BaseLoc, BaseTInfo, UPPC_Initializer))
return true;
if (DiagnoseUnexpandedParameterPack(Init, UPPC_Initializer))
return true;
}
// Check for direct and virtual base classes.
const CXXBaseSpecifier *DirectBaseSpec = nullptr;
const CXXBaseSpecifier *VirtualBaseSpec = nullptr;
if (!Dependent) {
if (Context.hasSameUnqualifiedType(QualType(ClassDecl->getTypeForDecl(),0),
BaseType))
return BuildDelegatingInitializer(BaseTInfo, Init, ClassDecl);
FindBaseInitializer(*this, ClassDecl, BaseType, DirectBaseSpec,
VirtualBaseSpec);
// C++ [base.class.init]p2:
// Unless the mem-initializer-id names a nonstatic data member of the
// constructor's class or a direct or virtual base of that class, the
// mem-initializer is ill-formed.
if (!DirectBaseSpec && !VirtualBaseSpec) {
// If the class has any dependent bases, then it's possible that
// one of those types will resolve to the same type as
// BaseType. Therefore, just treat this as a dependent base
// class initialization. FIXME: Should we try to check the
// initialization anyway? It seems odd.
if (ClassDecl->hasAnyDependentBases())
Dependent = true;
else
return Diag(BaseLoc, diag::err_not_direct_base_or_virtual)
<< BaseType << Context.getTypeDeclType(ClassDecl)
<< BaseTInfo->getTypeLoc().getLocalSourceRange();
}
}
if (Dependent) {
DiscardCleanupsInEvaluationContext();
return new (Context) CXXCtorInitializer(Context, BaseTInfo,
/*IsVirtual=*/false,
InitRange.getBegin(), Init,
InitRange.getEnd(), EllipsisLoc);
}
// C++ [base.class.init]p2:
// If a mem-initializer-id is ambiguous because it designates both
// a direct non-virtual base class and an inherited virtual base
// class, the mem-initializer is ill-formed.
if (DirectBaseSpec && VirtualBaseSpec)
return Diag(BaseLoc, diag::err_base_init_direct_and_virtual)
<< BaseType << BaseTInfo->getTypeLoc().getLocalSourceRange();
const CXXBaseSpecifier *BaseSpec = DirectBaseSpec;
if (!BaseSpec)
BaseSpec = VirtualBaseSpec;
// Initialize the base.
bool InitList = true;
MultiExprArg Args = Init;
if (ParenListExpr *ParenList = dyn_cast<ParenListExpr>(Init)) {
InitList = false;
Args = MultiExprArg(ParenList->getExprs(), ParenList->getNumExprs());
}
InitializedEntity BaseEntity =
InitializedEntity::InitializeBase(Context, BaseSpec, VirtualBaseSpec);
InitializationKind Kind =
InitList ? InitializationKind::CreateDirectList(BaseLoc)
: InitializationKind::CreateDirect(BaseLoc, InitRange.getBegin(),
InitRange.getEnd());
InitializationSequence InitSeq(*this, BaseEntity, Kind, Args);
ExprResult BaseInit = InitSeq.Perform(*this, BaseEntity, Kind, Args, nullptr);
if (BaseInit.isInvalid())
return true;
// C++11 [class.base.init]p7:
// The initialization of each base and member constitutes a
// full-expression.
BaseInit = ActOnFinishFullExpr(BaseInit.get(), InitRange.getBegin());
if (BaseInit.isInvalid())
return true;
// If we are in a dependent context, template instantiation will
// perform this type-checking again. Just save the arguments that we
// received in a ParenListExpr.
// FIXME: This isn't quite ideal, since our ASTs don't capture all
// of the information that we have about the base
// initializer. However, deconstructing the ASTs is a dicey process,
// and this approach is far more likely to get the corner cases right.
if (CurContext->isDependentContext())
BaseInit = Init;
return new (Context) CXXCtorInitializer(Context, BaseTInfo,
BaseSpec->isVirtual(),
InitRange.getBegin(),
BaseInit.getAs<Expr>(),
InitRange.getEnd(), EllipsisLoc);
}
// Create a static_cast\<T&&>(expr).
static Expr *CastForMoving(Sema &SemaRef, Expr *E, QualType T = QualType()) {
if (T.isNull()) T = E->getType();
QualType TargetType = SemaRef.BuildReferenceType(
T, /*SpelledAsLValue*/false, SourceLocation(), DeclarationName());
SourceLocation ExprLoc = E->getLocStart();
TypeSourceInfo *TargetLoc = SemaRef.Context.getTrivialTypeSourceInfo(
TargetType, ExprLoc);
return SemaRef.BuildCXXNamedCast(ExprLoc, tok::kw_static_cast, TargetLoc, E,
SourceRange(ExprLoc, ExprLoc),
E->getSourceRange()).get();
}
/// ImplicitInitializerKind - How an implicit base or member initializer should
/// initialize its base or member.
enum ImplicitInitializerKind {
IIK_Default,
IIK_Copy,
IIK_Move,
IIK_Inherit
};
static bool
BuildImplicitBaseInitializer(Sema &SemaRef, CXXConstructorDecl *Constructor,
ImplicitInitializerKind ImplicitInitKind,
CXXBaseSpecifier *BaseSpec,
bool IsInheritedVirtualBase,
CXXCtorInitializer *&CXXBaseInit) {
InitializedEntity InitEntity
= InitializedEntity::InitializeBase(SemaRef.Context, BaseSpec,
IsInheritedVirtualBase);
ExprResult BaseInit;
switch (ImplicitInitKind) {
case IIK_Inherit:
case IIK_Default: {
InitializationKind InitKind
= InitializationKind::CreateDefault(Constructor->getLocation());
InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, None);
BaseInit = InitSeq.Perform(SemaRef, InitEntity, InitKind, None);
break;
}
case IIK_Move:
case IIK_Copy: {
bool Moving = ImplicitInitKind == IIK_Move;
ParmVarDecl *Param = Constructor->getParamDecl(0);
QualType ParamType = Param->getType().getNonReferenceType();
Expr *CopyCtorArg =
DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(),
SourceLocation(), Param, false,
Constructor->getLocation(), ParamType,
VK_LValue, nullptr);
SemaRef.MarkDeclRefReferenced(cast<DeclRefExpr>(CopyCtorArg));
// Cast to the base class to avoid ambiguities.
QualType ArgTy =
SemaRef.Context.getQualifiedType(BaseSpec->getType().getUnqualifiedType(),
ParamType.getQualifiers());
if (Moving) {
CopyCtorArg = CastForMoving(SemaRef, CopyCtorArg);
}
CXXCastPath BasePath;
BasePath.push_back(BaseSpec);
CopyCtorArg = SemaRef.ImpCastExprToType(CopyCtorArg, ArgTy,
CK_UncheckedDerivedToBase,
Moving ? VK_XValue : VK_LValue,
&BasePath).get();
InitializationKind InitKind
= InitializationKind::CreateDirect(Constructor->getLocation(),
SourceLocation(), SourceLocation());
InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, CopyCtorArg);
BaseInit = InitSeq.Perform(SemaRef, InitEntity, InitKind, CopyCtorArg);
break;
}
}
BaseInit = SemaRef.MaybeCreateExprWithCleanups(BaseInit);
if (BaseInit.isInvalid())
return true;
CXXBaseInit =
new (SemaRef.Context) CXXCtorInitializer(SemaRef.Context,
SemaRef.Context.getTrivialTypeSourceInfo(BaseSpec->getType(),
SourceLocation()),
BaseSpec->isVirtual(),
SourceLocation(),
BaseInit.getAs<Expr>(),
SourceLocation(),
SourceLocation());
return false;
}
static bool RefersToRValueRef(Expr *MemRef) {
ValueDecl *Referenced = cast<MemberExpr>(MemRef)->getMemberDecl();
return Referenced->getType()->isRValueReferenceType();
}
static bool
BuildImplicitMemberInitializer(Sema &SemaRef, CXXConstructorDecl *Constructor,
ImplicitInitializerKind ImplicitInitKind,
FieldDecl *Field, IndirectFieldDecl *Indirect,
CXXCtorInitializer *&CXXMemberInit) {
if (Field->isInvalidDecl())
return true;
SourceLocation Loc = Constructor->getLocation();
if (ImplicitInitKind == IIK_Copy || ImplicitInitKind == IIK_Move) {
bool Moving = ImplicitInitKind == IIK_Move;
ParmVarDecl *Param = Constructor->getParamDecl(0);
QualType ParamType = Param->getType().getNonReferenceType();
// Suppress copying zero-width bitfields.
if (Field->isBitField() && Field->getBitWidthValue(SemaRef.Context) == 0)
return false;
Expr *MemberExprBase =
DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(),
SourceLocation(), Param, false,
Loc, ParamType, VK_LValue, nullptr);
SemaRef.MarkDeclRefReferenced(cast<DeclRefExpr>(MemberExprBase));
if (Moving) {
MemberExprBase = CastForMoving(SemaRef, MemberExprBase);
}
// Build a reference to this field within the parameter.
CXXScopeSpec SS;
LookupResult MemberLookup(SemaRef, Field->getDeclName(), Loc,
Sema::LookupMemberName);
MemberLookup.addDecl(Indirect ? cast<ValueDecl>(Indirect)
: cast<ValueDecl>(Field), AS_public);
MemberLookup.resolveKind();
ExprResult CtorArg
= SemaRef.BuildMemberReferenceExpr(MemberExprBase,
ParamType, Loc,
/*IsArrow=*/false,
SS,
/*TemplateKWLoc=*/SourceLocation(),
/*FirstQualifierInScope=*/nullptr,
MemberLookup,
/*TemplateArgs=*/nullptr,
/*S*/nullptr);
if (CtorArg.isInvalid())
return true;
// C++11 [class.copy]p15:
// - if a member m has rvalue reference type T&&, it is direct-initialized
// with static_cast<T&&>(x.m);
if (RefersToRValueRef(CtorArg.get())) {
CtorArg = CastForMoving(SemaRef, CtorArg.get());
}
InitializedEntity Entity =
Indirect ? InitializedEntity::InitializeMember(Indirect, nullptr,
/*Implicit*/ true)
: InitializedEntity::InitializeMember(Field, nullptr,
/*Implicit*/ true);
// Direct-initialize to use the copy constructor.
InitializationKind InitKind =
InitializationKind::CreateDirect(Loc, SourceLocation(), SourceLocation());
Expr *CtorArgE = CtorArg.getAs<Expr>();
InitializationSequence InitSeq(SemaRef, Entity, InitKind, CtorArgE);
ExprResult MemberInit =
InitSeq.Perform(SemaRef, Entity, InitKind, MultiExprArg(&CtorArgE, 1));
MemberInit = SemaRef.MaybeCreateExprWithCleanups(MemberInit);
if (MemberInit.isInvalid())
return true;
if (Indirect)
CXXMemberInit = new (SemaRef.Context) CXXCtorInitializer(
SemaRef.Context, Indirect, Loc, Loc, MemberInit.getAs<Expr>(), Loc);
else
CXXMemberInit = new (SemaRef.Context) CXXCtorInitializer(
SemaRef.Context, Field, Loc, Loc, MemberInit.getAs<Expr>(), Loc);
return false;
}
assert((ImplicitInitKind == IIK_Default || ImplicitInitKind == IIK_Inherit) &&
"Unhandled implicit init kind!");
QualType FieldBaseElementType =
SemaRef.Context.getBaseElementType(Field->getType());
if (FieldBaseElementType->isRecordType()) {
InitializedEntity InitEntity =
Indirect ? InitializedEntity::InitializeMember(Indirect, nullptr,
/*Implicit*/ true)
: InitializedEntity::InitializeMember(Field, nullptr,
/*Implicit*/ true);
InitializationKind InitKind =
InitializationKind::CreateDefault(Loc);
InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, None);
ExprResult MemberInit =
InitSeq.Perform(SemaRef, InitEntity, InitKind, None);
MemberInit = SemaRef.MaybeCreateExprWithCleanups(MemberInit);
if (MemberInit.isInvalid())
return true;
if (Indirect)
CXXMemberInit = new (SemaRef.Context) CXXCtorInitializer(SemaRef.Context,
Indirect, Loc,
Loc,
MemberInit.get(),
Loc);
else
CXXMemberInit = new (SemaRef.Context) CXXCtorInitializer(SemaRef.Context,
Field, Loc, Loc,
MemberInit.get(),
Loc);
return false;
}
if (!Field->getParent()->isUnion()) {
if (FieldBaseElementType->isReferenceType()) {
SemaRef.Diag(Constructor->getLocation(),
diag::err_uninitialized_member_in_ctor)
<< (int)Constructor->isImplicit()
<< SemaRef.Context.getTagDeclType(Constructor->getParent())
<< 0 << Field->getDeclName();
SemaRef.Diag(Field->getLocation(), diag::note_declared_at);
return true;
}
if (FieldBaseElementType.isConstQualified()) {
SemaRef.Diag(Constructor->getLocation(),
diag::err_uninitialized_member_in_ctor)
<< (int)Constructor->isImplicit()
<< SemaRef.Context.getTagDeclType(Constructor->getParent())
<< 1 << Field->getDeclName();
SemaRef.Diag(Field->getLocation(), diag::note_declared_at);
return true;
}
}
if (FieldBaseElementType.hasNonTrivialObjCLifetime()) {
// ARC and Weak:
// Default-initialize Objective-C pointers to NULL.
CXXMemberInit
= new (SemaRef.Context) CXXCtorInitializer(SemaRef.Context, Field,
Loc, Loc,
new (SemaRef.Context) ImplicitValueInitExpr(Field->getType()),
Loc);
return false;
}
// Nothing to initialize.
CXXMemberInit = nullptr;
return false;
}
namespace {
struct BaseAndFieldInfo {
Sema &S;
CXXConstructorDecl *Ctor;
bool AnyErrorsInInits;
ImplicitInitializerKind IIK;
llvm::DenseMap<const void *, CXXCtorInitializer*> AllBaseFields;
SmallVector<CXXCtorInitializer*, 8> AllToInit;
llvm::DenseMap<TagDecl*, FieldDecl*> ActiveUnionMember;
BaseAndFieldInfo(Sema &S, CXXConstructorDecl *Ctor, bool ErrorsInInits)
: S(S), Ctor(Ctor), AnyErrorsInInits(ErrorsInInits) {
bool Generated = Ctor->isImplicit() || Ctor->isDefaulted();
if (Ctor->getInheritedConstructor())
IIK = IIK_Inherit;
else if (Generated && Ctor->isCopyConstructor())
IIK = IIK_Copy;
else if (Generated && Ctor->isMoveConstructor())
IIK = IIK_Move;
else
IIK = IIK_Default;
}
bool isImplicitCopyOrMove() const {
switch (IIK) {
case IIK_Copy:
case IIK_Move:
return true;
case IIK_Default:
case IIK_Inherit:
return false;
}
llvm_unreachable("Invalid ImplicitInitializerKind!");
}
bool addFieldInitializer(CXXCtorInitializer *Init) {
AllToInit.push_back(Init);
// Check whether this initializer makes the field "used".
if (Init->getInit()->HasSideEffects(S.Context))
S.UnusedPrivateFields.remove(Init->getAnyMember());
return false;
}
bool isInactiveUnionMember(FieldDecl *Field) {
RecordDecl *Record = Field->getParent();
if (!Record->isUnion())
return false;
if (FieldDecl *Active =
ActiveUnionMember.lookup(Record->getCanonicalDecl()))
return Active != Field->getCanonicalDecl();
// In an implicit copy or move constructor, ignore any in-class initializer.
if (isImplicitCopyOrMove())
return true;
// If there's no explicit initialization, the field is active only if it
// has an in-class initializer...
if (Field->hasInClassInitializer())
return false;
// ... or it's an anonymous struct or union whose class has an in-class
// initializer.
if (!Field->isAnonymousStructOrUnion())
return true;
CXXRecordDecl *FieldRD = Field->getType()->getAsCXXRecordDecl();
return !FieldRD->hasInClassInitializer();
}
/// \brief Determine whether the given field is, or is within, a union member
/// that is inactive (because there was an initializer given for a different
/// member of the union, or because the union was not initialized at all).
bool isWithinInactiveUnionMember(FieldDecl *Field,
IndirectFieldDecl *Indirect) {
if (!Indirect)
return isInactiveUnionMember(Field);
for (auto *C : Indirect->chain()) {
FieldDecl *Field = dyn_cast<FieldDecl>(C);
if (Field && isInactiveUnionMember(Field))
return true;
}
return false;
}
};
}
/// \brief Determine whether the given type is an incomplete or zero-lenfgth
/// array type.
static bool isIncompleteOrZeroLengthArrayType(ASTContext &Context, QualType T) {
if (T->isIncompleteArrayType())
return true;
while (const ConstantArrayType *ArrayT = Context.getAsConstantArrayType(T)) {
if (!ArrayT->getSize())
return true;
T = ArrayT->getElementType();
}
return false;
}
static bool CollectFieldInitializer(Sema &SemaRef, BaseAndFieldInfo &Info,
FieldDecl *Field,
IndirectFieldDecl *Indirect = nullptr) {
if (Field->isInvalidDecl())
return false;
// Overwhelmingly common case: we have a direct initializer for this field.
if (CXXCtorInitializer *Init =
Info.AllBaseFields.lookup(Field->getCanonicalDecl()))
return Info.addFieldInitializer(Init);
// C++11 [class.base.init]p8:
// if the entity is a non-static data member that has a
// brace-or-equal-initializer and either
// -- the constructor's class is a union and no other variant member of that
// union is designated by a mem-initializer-id or
// -- the constructor's class is not a union, and, if the entity is a member
// of an anonymous union, no other member of that union is designated by
// a mem-initializer-id,
// the entity is initialized as specified in [dcl.init].
//
// We also apply the same rules to handle anonymous structs within anonymous
// unions.
if (Info.isWithinInactiveUnionMember(Field, Indirect))
return false;
if (Field->hasInClassInitializer() && !Info.isImplicitCopyOrMove()) {
ExprResult DIE =
SemaRef.BuildCXXDefaultInitExpr(Info.Ctor->getLocation(), Field);
if (DIE.isInvalid())
return true;
CXXCtorInitializer *Init;
if (Indirect)
Init = new (SemaRef.Context)
CXXCtorInitializer(SemaRef.Context, Indirect, SourceLocation(),
SourceLocation(), DIE.get(), SourceLocation());
else
Init = new (SemaRef.Context)
CXXCtorInitializer(SemaRef.Context, Field, SourceLocation(),
SourceLocation(), DIE.get(), SourceLocation());
return Info.addFieldInitializer(Init);
}
// Don't initialize incomplete or zero-length arrays.
if (isIncompleteOrZeroLengthArrayType(SemaRef.Context, Field->getType()))
return false;
// Don't try to build an implicit initializer if there were semantic
// errors in any of the initializers (and therefore we might be
// missing some that the user actually wrote).
if (Info.AnyErrorsInInits)
return false;
CXXCtorInitializer *Init = nullptr;
if (BuildImplicitMemberInitializer(Info.S, Info.Ctor, Info.IIK, Field,
Indirect, Init))
return true;
if (!Init)
return false;
return Info.addFieldInitializer(Init);
}
bool
Sema::SetDelegatingInitializer(CXXConstructorDecl *Constructor,
CXXCtorInitializer *Initializer) {
assert(Initializer->isDelegatingInitializer());
Constructor->setNumCtorInitializers(1);
CXXCtorInitializer **initializer =
new (Context) CXXCtorInitializer*[1];
memcpy(initializer, &Initializer, sizeof (CXXCtorInitializer*));
Constructor->setCtorInitializers(initializer);
if (CXXDestructorDecl *Dtor = LookupDestructor(Constructor->getParent())) {
MarkFunctionReferenced(Initializer->getSourceLocation(), Dtor);
DiagnoseUseOfDecl(Dtor, Initializer->getSourceLocation());
}
DelegatingCtorDecls.push_back(Constructor);
DiagnoseUninitializedFields(*this, Constructor);
return false;
}
bool Sema::SetCtorInitializers(CXXConstructorDecl *Constructor, bool AnyErrors,
ArrayRef<CXXCtorInitializer *> Initializers) {
if (Constructor->isDependentContext()) {
// Just store the initializers as written, they will be checked during
// instantiation.
if (!Initializers.empty()) {
Constructor->setNumCtorInitializers(Initializers.size());
CXXCtorInitializer **baseOrMemberInitializers =
new (Context) CXXCtorInitializer*[Initializers.size()];
memcpy(baseOrMemberInitializers, Initializers.data(),
Initializers.size() * sizeof(CXXCtorInitializer*));
Constructor->setCtorInitializers(baseOrMemberInitializers);
}
// Let template instantiation know whether we had errors.
if (AnyErrors)
Constructor->setInvalidDecl();
return false;
}
BaseAndFieldInfo Info(*this, Constructor, AnyErrors);
// We need to build the initializer AST according to order of construction
// and not what user specified in the Initializers list.
CXXRecordDecl *ClassDecl = Constructor->getParent()->getDefinition();
if (!ClassDecl)
return true;
bool HadError = false;
for (unsigned i = 0; i < Initializers.size(); i++) {
CXXCtorInitializer *Member = Initializers[i];
if (Member->isBaseInitializer())
Info.AllBaseFields[Member->getBaseClass()->getAs<RecordType>()] = Member;
else {
Info.AllBaseFields[Member->getAnyMember()->getCanonicalDecl()] = Member;
if (IndirectFieldDecl *F = Member->getIndirectMember()) {
for (auto *C : F->chain()) {
FieldDecl *FD = dyn_cast<FieldDecl>(C);
if (FD && FD->getParent()->isUnion())
Info.ActiveUnionMember.insert(std::make_pair(
FD->getParent()->getCanonicalDecl(), FD->getCanonicalDecl()));
}
} else if (FieldDecl *FD = Member->getMember()) {
if (FD->getParent()->isUnion())
Info.ActiveUnionMember.insert(std::make_pair(
FD->getParent()->getCanonicalDecl(), FD->getCanonicalDecl()));
}
}
}
// Keep track of the direct virtual bases.
llvm::SmallPtrSet<CXXBaseSpecifier *, 16> DirectVBases;
for (auto &I : ClassDecl->bases()) {
if (I.isVirtual())
DirectVBases.insert(&I);
}
// Push virtual bases before others.
for (auto &VBase : ClassDecl->vbases()) {
if (CXXCtorInitializer *Value
= Info.AllBaseFields.lookup(VBase.getType()->getAs<RecordType>())) {
// [class.base.init]p7, per DR257:
// A mem-initializer where the mem-initializer-id names a virtual base
// class is ignored during execution of a constructor of any class that
// is not the most derived class.
if (ClassDecl->isAbstract()) {
// FIXME: Provide a fixit to remove the base specifier. This requires
// tracking the location of the associated comma for a base specifier.
Diag(Value->getSourceLocation(), diag::warn_abstract_vbase_init_ignored)
<< VBase.getType() << ClassDecl;
DiagnoseAbstractType(ClassDecl);
}
Info.AllToInit.push_back(Value);
} else if (!AnyErrors && !ClassDecl->isAbstract()) {
// [class.base.init]p8, per DR257:
// If a given [...] base class is not named by a mem-initializer-id
// [...] and the entity is not a virtual base class of an abstract
// class, then [...] the entity is default-initialized.
bool IsInheritedVirtualBase = !DirectVBases.count(&VBase);
CXXCtorInitializer *CXXBaseInit;
if (BuildImplicitBaseInitializer(*this, Constructor, Info.IIK,
&VBase, IsInheritedVirtualBase,
CXXBaseInit)) {
HadError = true;
continue;
}
Info.AllToInit.push_back(CXXBaseInit);
}
}
// Non-virtual bases.
for (auto &Base : ClassDecl->bases()) {
// Virtuals are in the virtual base list and already constructed.
if (Base.isVirtual())
continue;
if (CXXCtorInitializer *Value
= Info.AllBaseFields.lookup(Base.getType()->getAs<RecordType>())) {
Info.AllToInit.push_back(Value);
} else if (!AnyErrors) {
CXXCtorInitializer *CXXBaseInit;
if (BuildImplicitBaseInitializer(*this, Constructor, Info.IIK,
&Base, /*IsInheritedVirtualBase=*/false,
CXXBaseInit)) {
HadError = true;
continue;
}
Info.AllToInit.push_back(CXXBaseInit);
}
}
// Fields.
for (auto *Mem : ClassDecl->decls()) {
if (auto *F = dyn_cast<FieldDecl>(Mem)) {
// C++ [class.bit]p2:
// A declaration for a bit-field that omits the identifier declares an
// unnamed bit-field. Unnamed bit-fields are not members and cannot be
// initialized.
if (F->isUnnamedBitfield())
continue;
// If we're not generating the implicit copy/move constructor, then we'll
// handle anonymous struct/union fields based on their individual
// indirect fields.
if (F->isAnonymousStructOrUnion() && !Info.isImplicitCopyOrMove())
continue;
if (CollectFieldInitializer(*this, Info, F))
HadError = true;
continue;
}
// Beyond this point, we only consider default initialization.
if (Info.isImplicitCopyOrMove())
continue;
if (auto *F = dyn_cast<IndirectFieldDecl>(Mem)) {
if (F->getType()->isIncompleteArrayType()) {
assert(ClassDecl->hasFlexibleArrayMember() &&
"Incomplete array type is not valid");
continue;
}
// Initialize each field of an anonymous struct individually.
if (CollectFieldInitializer(*this, Info, F->getAnonField(), F))
HadError = true;
continue;
}
}
unsigned NumInitializers = Info.AllToInit.size();
if (NumInitializers > 0) {
Constructor->setNumCtorInitializers(NumInitializers);
CXXCtorInitializer **baseOrMemberInitializers =
new (Context) CXXCtorInitializer*[NumInitializers];
memcpy(baseOrMemberInitializers, Info.AllToInit.data(),
NumInitializers * sizeof(CXXCtorInitializer*));
Constructor->setCtorInitializers(baseOrMemberInitializers);
// Constructors implicitly reference the base and member
// destructors.
MarkBaseAndMemberDestructorsReferenced(Constructor->getLocation(),
Constructor->getParent());
}
return HadError;
}
static void PopulateKeysForFields(FieldDecl *Field, SmallVectorImpl<const void*> &IdealInits) {
if (const RecordType *RT = Field->getType()->getAs<RecordType>()) {
const RecordDecl *RD = RT->getDecl();
if (RD->isAnonymousStructOrUnion()) {
for (auto *Field : RD->fields())
PopulateKeysForFields(Field, IdealInits);
return;
}
}
IdealInits.push_back(Field->getCanonicalDecl());
}
static const void *GetKeyForBase(ASTContext &Context, QualType BaseType) {
return Context.getCanonicalType(BaseType).getTypePtr();
}
static const void *GetKeyForMember(ASTContext &Context,
CXXCtorInitializer *Member) {
if (!Member->isAnyMemberInitializer())
return GetKeyForBase(Context, QualType(Member->getBaseClass(), 0));
return Member->getAnyMember()->getCanonicalDecl();
}
static void DiagnoseBaseOrMemInitializerOrder(
Sema &SemaRef, const CXXConstructorDecl *Constructor,
ArrayRef<CXXCtorInitializer *> Inits) {
if (Constructor->getDeclContext()->isDependentContext())
return;
// Don't check initializers order unless the warning is enabled at the
// location of at least one initializer.
bool ShouldCheckOrder = false;
for (unsigned InitIndex = 0; InitIndex != Inits.size(); ++InitIndex) {
CXXCtorInitializer *Init = Inits[InitIndex];
if (!SemaRef.Diags.isIgnored(diag::warn_initializer_out_of_order,
Init->getSourceLocation())) {
ShouldCheckOrder = true;
break;
}
}
if (!ShouldCheckOrder)
return;
// Build the list of bases and members in the order that they'll
// actually be initialized. The explicit initializers should be in
// this same order but may be missing things.
SmallVector<const void*, 32> IdealInitKeys;
const CXXRecordDecl *ClassDecl = Constructor->getParent();
// 1. Virtual bases.
for (const auto &VBase : ClassDecl->vbases())
IdealInitKeys.push_back(GetKeyForBase(SemaRef.Context, VBase.getType()));
// 2. Non-virtual bases.
for (const auto &Base : ClassDecl->bases()) {
if (Base.isVirtual())
continue;
IdealInitKeys.push_back(GetKeyForBase(SemaRef.Context, Base.getType()));
}
// 3. Direct fields.
for (auto *Field : ClassDecl->fields()) {
if (Field->isUnnamedBitfield())
continue;
PopulateKeysForFields(Field, IdealInitKeys);
}
unsigned NumIdealInits = IdealInitKeys.size();
unsigned IdealIndex = 0;
CXXCtorInitializer *PrevInit = nullptr;
for (unsigned InitIndex = 0; InitIndex != Inits.size(); ++InitIndex) {
CXXCtorInitializer *Init = Inits[InitIndex];
const void *InitKey = GetKeyForMember(SemaRef.Context, Init);
// Scan forward to try to find this initializer in the idealized
// initializers list.
for (; IdealIndex != NumIdealInits; ++IdealIndex)
if (InitKey == IdealInitKeys[IdealIndex])
break;
// If we didn't find this initializer, it must be because we
// scanned past it on a previous iteration. That can only
// happen if we're out of order; emit a warning.
if (IdealIndex == NumIdealInits && PrevInit) {
Sema::SemaDiagnosticBuilder D =
SemaRef.Diag(PrevInit->getSourceLocation(),
diag::warn_initializer_out_of_order);
if (PrevInit->isAnyMemberInitializer())
D << 0 << PrevInit->getAnyMember()->getDeclName();
else
D << 1 << PrevInit->getTypeSourceInfo()->getType();
if (Init->isAnyMemberInitializer())
D << 0 << Init->getAnyMember()->getDeclName();
else
D << 1 << Init->getTypeSourceInfo()->getType();
// Move back to the initializer's location in the ideal list.
for (IdealIndex = 0; IdealIndex != NumIdealInits; ++IdealIndex)
if (InitKey == IdealInitKeys[IdealIndex])
break;
assert(IdealIndex < NumIdealInits &&
"initializer not found in initializer list");
}
PrevInit = Init;
}
}
namespace {
bool CheckRedundantInit(Sema &S,
CXXCtorInitializer *Init,
CXXCtorInitializer *&PrevInit) {
if (!PrevInit) {
PrevInit = Init;
return false;
}
if (FieldDecl *Field = Init->getAnyMember())
S.Diag(Init->getSourceLocation(),
diag::err_multiple_mem_initialization)
<< Field->getDeclName()
<< Init->getSourceRange();
else {
const Type *BaseClass = Init->getBaseClass();
assert(BaseClass && "neither field nor base");
S.Diag(Init->getSourceLocation(),
diag::err_multiple_base_initialization)
<< QualType(BaseClass, 0)
<< Init->getSourceRange();
}
S.Diag(PrevInit->getSourceLocation(), diag::note_previous_initializer)
<< 0 << PrevInit->getSourceRange();
return true;
}
typedef std::pair<NamedDecl *, CXXCtorInitializer *> UnionEntry;
typedef llvm::DenseMap<RecordDecl*, UnionEntry> RedundantUnionMap;
bool CheckRedundantUnionInit(Sema &S,
CXXCtorInitializer *Init,
RedundantUnionMap &Unions) {
FieldDecl *Field = Init->getAnyMember();
RecordDecl *Parent = Field->getParent();
NamedDecl *Child = Field;
while (Parent->isAnonymousStructOrUnion() || Parent->isUnion()) {
if (Parent->isUnion()) {
UnionEntry &En = Unions[Parent];
if (En.first && En.first != Child) {
S.Diag(Init->getSourceLocation(),
diag::err_multiple_mem_union_initialization)
<< Field->getDeclName()
<< Init->getSourceRange();
S.Diag(En.second->getSourceLocation(), diag::note_previous_initializer)
<< 0 << En.second->getSourceRange();
return true;
}
if (!En.first) {
En.first = Child;
En.second = Init;
}
if (!Parent->isAnonymousStructOrUnion())
return false;
}
Child = Parent;
Parent = cast<RecordDecl>(Parent->getDeclContext());
}
return false;
}
}
/// ActOnMemInitializers - Handle the member initializers for a constructor.
void Sema::ActOnMemInitializers(Decl *ConstructorDecl,
SourceLocation ColonLoc,
ArrayRef<CXXCtorInitializer*> MemInits,
bool AnyErrors) {
if (!ConstructorDecl)
return;
AdjustDeclIfTemplate(ConstructorDecl);
CXXConstructorDecl *Constructor
= dyn_cast<CXXConstructorDecl>(ConstructorDecl);
if (!Constructor) {
Diag(ColonLoc, diag::err_only_constructors_take_base_inits);
return;
}
// Mapping for the duplicate initializers check.
// For member initializers, this is keyed with a FieldDecl*.
// For base initializers, this is keyed with a Type*.
llvm::DenseMap<const void *, CXXCtorInitializer *> Members;
// Mapping for the inconsistent anonymous-union initializers check.
RedundantUnionMap MemberUnions;
bool HadError = false;
for (unsigned i = 0; i < MemInits.size(); i++) {
CXXCtorInitializer *Init = MemInits[i];
// Set the source order index.
Init->setSourceOrder(i);
if (Init->isAnyMemberInitializer()) {
const void *Key = GetKeyForMember(Context, Init);
if (CheckRedundantInit(*this, Init, Members[Key]) ||
CheckRedundantUnionInit(*this, Init, MemberUnions))
HadError = true;
} else if (Init->isBaseInitializer()) {
const void *Key = GetKeyForMember(Context, Init);
if (CheckRedundantInit(*this, Init, Members[Key]))
HadError = true;
} else {
assert(Init->isDelegatingInitializer());
// This must be the only initializer
if (MemInits.size() != 1) {
Diag(Init->getSourceLocation(),
diag::err_delegating_initializer_alone)
<< Init->getSourceRange() << MemInits[i ? 0 : 1]->getSourceRange();
// We will treat this as being the only initializer.
}
SetDelegatingInitializer(Constructor, MemInits[i]);
// Return immediately as the initializer is set.
return;
}
}
if (HadError)
return;
DiagnoseBaseOrMemInitializerOrder(*this, Constructor, MemInits);
SetCtorInitializers(Constructor, AnyErrors, MemInits);
DiagnoseUninitializedFields(*this, Constructor);
}
void
Sema::MarkBaseAndMemberDestructorsReferenced(SourceLocation Location,
CXXRecordDecl *ClassDecl) {
// Ignore dependent contexts. Also ignore unions, since their members never
// have destructors implicitly called.
if (ClassDecl->isDependentContext() || ClassDecl->isUnion())
return;
// FIXME: all the access-control diagnostics are positioned on the
// field/base declaration. That's probably good; that said, the
// user might reasonably want to know why the destructor is being
// emitted, and we currently don't say.
// Non-static data members.
for (auto *Field : ClassDecl->fields()) {
if (Field->isInvalidDecl())
continue;
// Don't destroy incomplete or zero-length arrays.
if (isIncompleteOrZeroLengthArrayType(Context, Field->getType()))
continue;
QualType FieldType = Context.getBaseElementType(Field->getType());
const RecordType* RT = FieldType->getAs<RecordType>();
if (!RT)
continue;
CXXRecordDecl *FieldClassDecl = cast<CXXRecordDecl>(RT->getDecl());
if (FieldClassDecl->isInvalidDecl())
continue;
if (FieldClassDecl->hasIrrelevantDestructor())
continue;
// The destructor for an implicit anonymous union member is never invoked.
if (FieldClassDecl->isUnion() && FieldClassDecl->isAnonymousStructOrUnion())
continue;
CXXDestructorDecl *Dtor = LookupDestructor(FieldClassDecl);
assert(Dtor && "No dtor found for FieldClassDecl!");
CheckDestructorAccess(Field->getLocation(), Dtor,
PDiag(diag::err_access_dtor_field)
<< Field->getDeclName()
<< FieldType);
MarkFunctionReferenced(Location, Dtor);
DiagnoseUseOfDecl(Dtor, Location);
}
// We only potentially invoke the destructors of potentially constructed
// subobjects.
bool VisitVirtualBases = !ClassDecl->isAbstract();
llvm::SmallPtrSet<const RecordType *, 8> DirectVirtualBases;
// Bases.
for (const auto &Base : ClassDecl->bases()) {
// Bases are always records in a well-formed non-dependent class.
const RecordType *RT = Base.getType()->getAs<RecordType>();
// Remember direct virtual bases.
if (Base.isVirtual()) {
if (!VisitVirtualBases)
continue;
DirectVirtualBases.insert(RT);
}
CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(RT->getDecl());
// If our base class is invalid, we probably can't get its dtor anyway.
if (BaseClassDecl->isInvalidDecl())
continue;
if (BaseClassDecl->hasIrrelevantDestructor())
continue;
CXXDestructorDecl *Dtor = LookupDestructor(BaseClassDecl);
assert(Dtor && "No dtor found for BaseClassDecl!");
// FIXME: caret should be on the start of the class name
CheckDestructorAccess(Base.getLocStart(), Dtor,
PDiag(diag::err_access_dtor_base)
<< Base.getType()
<< Base.getSourceRange(),
Context.getTypeDeclType(ClassDecl));
MarkFunctionReferenced(Location, Dtor);
DiagnoseUseOfDecl(Dtor, Location);
}
if (!VisitVirtualBases)
return;
// Virtual bases.
for (const auto &VBase : ClassDecl->vbases()) {
// Bases are always records in a well-formed non-dependent class.
const RecordType *RT = VBase.getType()->castAs<RecordType>();
// Ignore direct virtual bases.
if (DirectVirtualBases.count(RT))
continue;
CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(RT->getDecl());
// If our base class is invalid, we probably can't get its dtor anyway.
if (BaseClassDecl->isInvalidDecl())
continue;
if (BaseClassDecl->hasIrrelevantDestructor())
continue;
CXXDestructorDecl *Dtor = LookupDestructor(BaseClassDecl);
assert(Dtor && "No dtor found for BaseClassDecl!");
if (CheckDestructorAccess(
ClassDecl->getLocation(), Dtor,
PDiag(diag::err_access_dtor_vbase)
<< Context.getTypeDeclType(ClassDecl) << VBase.getType(),
Context.getTypeDeclType(ClassDecl)) ==
AR_accessible) {
CheckDerivedToBaseConversion(
Context.getTypeDeclType(ClassDecl), VBase.getType(),
diag::err_access_dtor_vbase, 0, ClassDecl->getLocation(),
SourceRange(), DeclarationName(), nullptr);
}
MarkFunctionReferenced(Location, Dtor);
DiagnoseUseOfDecl(Dtor, Location);
}
}
void Sema::ActOnDefaultCtorInitializers(Decl *CDtorDecl) {
if (!CDtorDecl)
return;
if (CXXConstructorDecl *Constructor
= dyn_cast<CXXConstructorDecl>(CDtorDecl)) {
SetCtorInitializers(Constructor, /*AnyErrors=*/false);
DiagnoseUninitializedFields(*this, Constructor);
}
}
bool Sema::isAbstractType(SourceLocation Loc, QualType T) {
if (!getLangOpts().CPlusPlus)
return false;
const auto *RD = Context.getBaseElementType(T)->getAsCXXRecordDecl();
if (!RD)
return false;
// FIXME: Per [temp.inst]p1, we are supposed to trigger instantiation of a
// class template specialization here, but doing so breaks a lot of code.
// We can't answer whether something is abstract until it has a
// definition. If it's currently being defined, we'll walk back
// over all the declarations when we have a full definition.
const CXXRecordDecl *Def = RD->getDefinition();
if (!Def || Def->isBeingDefined())
return false;
return RD->isAbstract();
}
bool Sema::RequireNonAbstractType(SourceLocation Loc, QualType T,
TypeDiagnoser &Diagnoser) {
if (!isAbstractType(Loc, T))
return false;
T = Context.getBaseElementType(T);
Diagnoser.diagnose(*this, Loc, T);
DiagnoseAbstractType(T->getAsCXXRecordDecl());
return true;
}
void Sema::DiagnoseAbstractType(const CXXRecordDecl *RD) {
// Check if we've already emitted the list of pure virtual functions
// for this class.
if (PureVirtualClassDiagSet && PureVirtualClassDiagSet->count(RD))
return;
// If the diagnostic is suppressed, don't emit the notes. We're only
// going to emit them once, so try to attach them to a diagnostic we're
// actually going to show.
if (Diags.isLastDiagnosticIgnored())
return;
CXXFinalOverriderMap FinalOverriders;
RD->getFinalOverriders(FinalOverriders);
// Keep a set of seen pure methods so we won't diagnose the same method
// more than once.
llvm::SmallPtrSet<const CXXMethodDecl *, 8> SeenPureMethods;
for (CXXFinalOverriderMap::iterator M = FinalOverriders.begin(),
MEnd = FinalOverriders.end();
M != MEnd;
++M) {
for (OverridingMethods::iterator SO = M->second.begin(),
SOEnd = M->second.end();
SO != SOEnd; ++SO) {
// C++ [class.abstract]p4:
// A class is abstract if it contains or inherits at least one
// pure virtual function for which the final overrider is pure
// virtual.
//
if (SO->second.size() != 1)
continue;
if (!SO->second.front().Method->isPure())
continue;
if (!SeenPureMethods.insert(SO->second.front().Method).second)
continue;
Diag(SO->second.front().Method->getLocation(),
diag::note_pure_virtual_function)
<< SO->second.front().Method->getDeclName() << RD->getDeclName();
}
}
if (!PureVirtualClassDiagSet)
PureVirtualClassDiagSet.reset(new RecordDeclSetTy);
PureVirtualClassDiagSet->insert(RD);
}
namespace {
struct AbstractUsageInfo {
Sema &S;
CXXRecordDecl *Record;
CanQualType AbstractType;
bool Invalid;
AbstractUsageInfo(Sema &S, CXXRecordDecl *Record)
: S(S), Record(Record),
AbstractType(S.Context.getCanonicalType(
S.Context.getTypeDeclType(Record))),
Invalid(false) {}
void DiagnoseAbstractType() {
if (Invalid) return;
S.DiagnoseAbstractType(Record);
Invalid = true;
}
void CheckType(const NamedDecl *D, TypeLoc TL, Sema::AbstractDiagSelID Sel);
};
struct CheckAbstractUsage {
AbstractUsageInfo &Info;
const NamedDecl *Ctx;
CheckAbstractUsage(AbstractUsageInfo &Info, const NamedDecl *Ctx)
: Info(Info), Ctx(Ctx) {}
void Visit(TypeLoc TL, Sema::AbstractDiagSelID Sel) {
switch (TL.getTypeLocClass()) {
#define ABSTRACT_TYPELOC(CLASS, PARENT)
#define TYPELOC(CLASS, PARENT) \
case TypeLoc::CLASS: Check(TL.castAs<CLASS##TypeLoc>(), Sel); break;
#include "clang/AST/TypeLocNodes.def"
}
}
void Check(FunctionProtoTypeLoc TL, Sema::AbstractDiagSelID Sel) {
Visit(TL.getReturnLoc(), Sema::AbstractReturnType);
for (unsigned I = 0, E = TL.getNumParams(); I != E; ++I) {
if (!TL.getParam(I))
continue;
TypeSourceInfo *TSI = TL.getParam(I)->getTypeSourceInfo();
if (TSI) Visit(TSI->getTypeLoc(), Sema::AbstractParamType);
}
}
void Check(ArrayTypeLoc TL, Sema::AbstractDiagSelID Sel) {
Visit(TL.getElementLoc(), Sema::AbstractArrayType);
}
void Check(TemplateSpecializationTypeLoc TL, Sema::AbstractDiagSelID Sel) {
// Visit the type parameters from a permissive context.
for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) {
TemplateArgumentLoc TAL = TL.getArgLoc(I);
if (TAL.getArgument().getKind() == TemplateArgument::Type)
if (TypeSourceInfo *TSI = TAL.getTypeSourceInfo())
Visit(TSI->getTypeLoc(), Sema::AbstractNone);
// TODO: other template argument types?
}
}
// Visit pointee types from a permissive context.
#define CheckPolymorphic(Type) \
void Check(Type TL, Sema::AbstractDiagSelID Sel) { \
Visit(TL.getNextTypeLoc(), Sema::AbstractNone); \
}
CheckPolymorphic(PointerTypeLoc)
CheckPolymorphic(ReferenceTypeLoc)
CheckPolymorphic(MemberPointerTypeLoc)
CheckPolymorphic(BlockPointerTypeLoc)
CheckPolymorphic(AtomicTypeLoc)
/// Handle all the types we haven't given a more specific
/// implementation for above.
void Check(TypeLoc TL, Sema::AbstractDiagSelID Sel) {
// Every other kind of type that we haven't called out already
// that has an inner type is either (1) sugar or (2) contains that
// inner type in some way as a subobject.
if (TypeLoc Next = TL.getNextTypeLoc())
return Visit(Next, Sel);
// If there's no inner type and we're in a permissive context,
// don't diagnose.
if (Sel == Sema::AbstractNone) return;
// Check whether the type matches the abstract type.
QualType T = TL.getType();
if (T->isArrayType()) {
Sel = Sema::AbstractArrayType;
T = Info.S.Context.getBaseElementType(T);
}
CanQualType CT = T->getCanonicalTypeUnqualified().getUnqualifiedType();
if (CT != Info.AbstractType) return;
// It matched; do some magic.
if (Sel == Sema::AbstractArrayType) {
Info.S.Diag(Ctx->getLocation(), diag::err_array_of_abstract_type)
<< T << TL.getSourceRange();
} else {
Info.S.Diag(Ctx->getLocation(), diag::err_abstract_type_in_decl)
<< Sel << T << TL.getSourceRange();
}
Info.DiagnoseAbstractType();
}
};
void AbstractUsageInfo::CheckType(const NamedDecl *D, TypeLoc TL,
Sema::AbstractDiagSelID Sel) {
CheckAbstractUsage(*this, D).Visit(TL, Sel);
}
}
/// Check for invalid uses of an abstract type in a method declaration.
static void CheckAbstractClassUsage(AbstractUsageInfo &Info,
CXXMethodDecl *MD) {
// No need to do the check on definitions, which require that
// the return/param types be complete.
if (MD->doesThisDeclarationHaveABody())
return;
// For safety's sake, just ignore it if we don't have type source
// information. This should never happen for non-implicit methods,
// but...
if (TypeSourceInfo *TSI = MD->getTypeSourceInfo())
Info.CheckType(MD, TSI->getTypeLoc(), Sema::AbstractNone);
}
/// Check for invalid uses of an abstract type within a class definition.
static void CheckAbstractClassUsage(AbstractUsageInfo &Info,
CXXRecordDecl *RD) {
for (auto *D : RD->decls()) {
if (D->isImplicit()) continue;
// Methods and method templates.
if (isa<CXXMethodDecl>(D)) {
CheckAbstractClassUsage(Info, cast<CXXMethodDecl>(D));
} else if (isa<FunctionTemplateDecl>(D)) {
FunctionDecl *FD = cast<FunctionTemplateDecl>(D)->getTemplatedDecl();
CheckAbstractClassUsage(Info, cast<CXXMethodDecl>(FD));
// Fields and static variables.
} else if (isa<FieldDecl>(D)) {
FieldDecl *FD = cast<FieldDecl>(D);
if (TypeSourceInfo *TSI = FD->getTypeSourceInfo())
Info.CheckType(FD, TSI->getTypeLoc(), Sema::AbstractFieldType);
} else if (isa<VarDecl>(D)) {
VarDecl *VD = cast<VarDecl>(D);
if (TypeSourceInfo *TSI = VD->getTypeSourceInfo())
Info.CheckType(VD, TSI->getTypeLoc(), Sema::AbstractVariableType);
// Nested classes and class templates.
} else if (isa<CXXRecordDecl>(D)) {
CheckAbstractClassUsage(Info, cast<CXXRecordDecl>(D));
} else if (isa<ClassTemplateDecl>(D)) {
CheckAbstractClassUsage(Info,
cast<ClassTemplateDecl>(D)->getTemplatedDecl());
}
}
}
static void ReferenceDllExportedMethods(Sema &S, CXXRecordDecl *Class) {
Attr *ClassAttr = getDLLAttr(Class);
if (!ClassAttr)
return;
assert(ClassAttr->getKind() == attr::DLLExport);
TemplateSpecializationKind TSK = Class->getTemplateSpecializationKind();
if (TSK == TSK_ExplicitInstantiationDeclaration)
// Don't go any further if this is just an explicit instantiation
// declaration.
return;
for (Decl *Member : Class->decls()) {
auto *MD = dyn_cast<CXXMethodDecl>(Member);
if (!MD)
continue;
if (Member->getAttr<DLLExportAttr>()) {
if (MD->isUserProvided()) {
// Instantiate non-default class member functions ...
// .. except for certain kinds of template specializations.
if (TSK == TSK_ImplicitInstantiation && !ClassAttr->isInherited())
continue;
S.MarkFunctionReferenced(Class->getLocation(), MD);
// The function will be passed to the consumer when its definition is
// encountered.
} else if (!MD->isTrivial() || MD->isExplicitlyDefaulted() ||
MD->isCopyAssignmentOperator() ||
MD->isMoveAssignmentOperator()) {
// Synthesize and instantiate non-trivial implicit methods, explicitly
// defaulted methods, and the copy and move assignment operators. The
// latter are exported even if they are trivial, because the address of
// an operator can be taken and should compare equal across libraries.
DiagnosticErrorTrap Trap(S.Diags);
S.MarkFunctionReferenced(Class->getLocation(), MD);
if (Trap.hasErrorOccurred()) {
S.Diag(ClassAttr->getLocation(), diag::note_due_to_dllexported_class)
<< Class->getName() << !S.getLangOpts().CPlusPlus11;
break;
}
// There is no later point when we will see the definition of this
// function, so pass it to the consumer now.
S.Consumer.HandleTopLevelDecl(DeclGroupRef(MD));
}
}
}
}
static void checkForMultipleExportedDefaultConstructors(Sema &S,
CXXRecordDecl *Class) {
// Only the MS ABI has default constructor closures, so we don't need to do
// this semantic checking anywhere else.
if (!S.Context.getTargetInfo().getCXXABI().isMicrosoft())
return;
CXXConstructorDecl *LastExportedDefaultCtor = nullptr;
for (Decl *Member : Class->decls()) {
// Look for exported default constructors.
auto *CD = dyn_cast<CXXConstructorDecl>(Member);
if (!CD || !CD->isDefaultConstructor())
continue;
auto *Attr = CD->getAttr<DLLExportAttr>();
if (!Attr)
continue;
// If the class is non-dependent, mark the default arguments as ODR-used so
// that we can properly codegen the constructor closure.
if (!Class->isDependentContext()) {
for (ParmVarDecl *PD : CD->parameters()) {
(void)S.CheckCXXDefaultArgExpr(Attr->getLocation(), CD, PD);
S.DiscardCleanupsInEvaluationContext();
}
}
if (LastExportedDefaultCtor) {
S.Diag(LastExportedDefaultCtor->getLocation(),
diag::err_attribute_dll_ambiguous_default_ctor)
<< Class;
S.Diag(CD->getLocation(), diag::note_entity_declared_at)
<< CD->getDeclName();
return;
}
LastExportedDefaultCtor = CD;
}
}
/// \brief Check class-level dllimport/dllexport attribute.
void Sema::checkClassLevelDLLAttribute(CXXRecordDecl *Class) {
Attr *ClassAttr = getDLLAttr(Class);
// MSVC inherits DLL attributes to partial class template specializations.
if (Context.getTargetInfo().getCXXABI().isMicrosoft() && !ClassAttr) {
if (auto *Spec = dyn_cast<ClassTemplatePartialSpecializationDecl>(Class)) {
if (Attr *TemplateAttr =
getDLLAttr(Spec->getSpecializedTemplate()->getTemplatedDecl())) {
auto *A = cast<InheritableAttr>(TemplateAttr->clone(getASTContext()));
A->setInherited(true);
ClassAttr = A;
}
}
}
if (!ClassAttr)
return;
if (!Class->isExternallyVisible()) {
Diag(Class->getLocation(), diag::err_attribute_dll_not_extern)
<< Class << ClassAttr;
return;
}
if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
!ClassAttr->isInherited()) {
// Diagnose dll attributes on members of class with dll attribute.
for (Decl *Member : Class->decls()) {
if (!isa<VarDecl>(Member) && !isa<CXXMethodDecl>(Member))
continue;
InheritableAttr *MemberAttr = getDLLAttr(Member);
if (!MemberAttr || MemberAttr->isInherited() || Member->isInvalidDecl())
continue;
Diag(MemberAttr->getLocation(),
diag::err_attribute_dll_member_of_dll_class)
<< MemberAttr << ClassAttr;
Diag(ClassAttr->getLocation(), diag::note_previous_attribute);
Member->setInvalidDecl();
}
}
if (Class->getDescribedClassTemplate())
// Don't inherit dll attribute until the template is instantiated.
return;
// The class is either imported or exported.
const bool ClassExported = ClassAttr->getKind() == attr::DLLExport;
TemplateSpecializationKind TSK = Class->getTemplateSpecializationKind();
// Ignore explicit dllexport on explicit class template instantiation declarations.
if (ClassExported && !ClassAttr->isInherited() &&
TSK == TSK_ExplicitInstantiationDeclaration) {
Class->dropAttr<DLLExportAttr>();
return;
}
// Force declaration of implicit members so they can inherit the attribute.
ForceDeclarationOfImplicitMembers(Class);
// FIXME: MSVC's docs say all bases must be exportable, but this doesn't
// seem to be true in practice?
for (Decl *Member : Class->decls()) {
VarDecl *VD = dyn_cast<VarDecl>(Member);
CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Member);
// Only methods and static fields inherit the attributes.
if (!VD && !MD)
continue;
if (MD) {
// Don't process deleted methods.
if (MD->isDeleted())
continue;
if (MD->isInlined()) {
// MinGW does not import or export inline methods.
if (!Context.getTargetInfo().getCXXABI().isMicrosoft() &&
!Context.getTargetInfo().getTriple().isWindowsItaniumEnvironment())
continue;
// MSVC versions before 2015 don't export the move assignment operators
// and move constructor, so don't attempt to import/export them if
// we have a definition.
auto *Ctor = dyn_cast<CXXConstructorDecl>(MD);
if ((MD->isMoveAssignmentOperator() ||
(Ctor && Ctor->isMoveConstructor())) &&
!getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015))
continue;
// MSVC2015 doesn't export trivial defaulted x-tor but copy assign
// operator is exported anyway.
if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015) &&
(Ctor || isa<CXXDestructorDecl>(MD)) && MD->isTrivial())
continue;
}
}
if (!cast<NamedDecl>(Member)->isExternallyVisible())
continue;
if (!getDLLAttr(Member)) {
auto *NewAttr =
cast<InheritableAttr>(ClassAttr->clone(getASTContext()));
NewAttr->setInherited(true);
Member->addAttr(NewAttr);
}
}
if (ClassExported)
DelayedDllExportClasses.push_back(Class);
}
/// \brief Perform propagation of DLL attributes from a derived class to a
/// templated base class for MS compatibility.
void Sema::propagateDLLAttrToBaseClassTemplate(
CXXRecordDecl *Class, Attr *ClassAttr,
ClassTemplateSpecializationDecl *BaseTemplateSpec, SourceLocation BaseLoc) {
if (getDLLAttr(
BaseTemplateSpec->getSpecializedTemplate()->getTemplatedDecl())) {
// If the base class template has a DLL attribute, don't try to change it.
return;
}
auto TSK = BaseTemplateSpec->getSpecializationKind();
if (!getDLLAttr(BaseTemplateSpec) &&
(TSK == TSK_Undeclared || TSK == TSK_ExplicitInstantiationDeclaration ||
TSK == TSK_ImplicitInstantiation)) {
// The template hasn't been instantiated yet (or it has, but only as an
// explicit instantiation declaration or implicit instantiation, which means
// we haven't codegenned any members yet), so propagate the attribute.
auto *NewAttr = cast<InheritableAttr>(ClassAttr->clone(getASTContext()));
NewAttr->setInherited(true);
BaseTemplateSpec->addAttr(NewAttr);
// If the template is already instantiated, checkDLLAttributeRedeclaration()
// needs to be run again to work see the new attribute. Otherwise this will
// get run whenever the template is instantiated.
if (TSK != TSK_Undeclared)
checkClassLevelDLLAttribute(BaseTemplateSpec);
return;
}
if (getDLLAttr(BaseTemplateSpec)) {
// The template has already been specialized or instantiated with an
// attribute, explicitly or through propagation. We should not try to change
// it.
return;
}
// The template was previously instantiated or explicitly specialized without
// a dll attribute, It's too late for us to add an attribute, so warn that
// this is unsupported.
Diag(BaseLoc, diag::warn_attribute_dll_instantiated_base_class)
<< BaseTemplateSpec->isExplicitSpecialization();
Diag(ClassAttr->getLocation(), diag::note_attribute);
if (BaseTemplateSpec->isExplicitSpecialization()) {
Diag(BaseTemplateSpec->getLocation(),
diag::note_template_class_explicit_specialization_was_here)
<< BaseTemplateSpec;
} else {
Diag(BaseTemplateSpec->getPointOfInstantiation(),
diag::note_template_class_instantiation_was_here)
<< BaseTemplateSpec;
}
}
static void DefineImplicitSpecialMember(Sema &S, CXXMethodDecl *MD,
SourceLocation DefaultLoc) {
switch (S.getSpecialMember(MD)) {
case Sema::CXXDefaultConstructor:
S.DefineImplicitDefaultConstructor(DefaultLoc,
cast<CXXConstructorDecl>(MD));
break;
case Sema::CXXCopyConstructor:
S.DefineImplicitCopyConstructor(DefaultLoc, cast<CXXConstructorDecl>(MD));
break;
case Sema::CXXCopyAssignment:
S.DefineImplicitCopyAssignment(DefaultLoc, MD);
break;
case Sema::CXXDestructor:
S.DefineImplicitDestructor(DefaultLoc, cast<CXXDestructorDecl>(MD));
break;
case Sema::CXXMoveConstructor:
S.DefineImplicitMoveConstructor(DefaultLoc, cast<CXXConstructorDecl>(MD));
break;
case Sema::CXXMoveAssignment:
S.DefineImplicitMoveAssignment(DefaultLoc, MD);
break;
case Sema::CXXInvalid:
llvm_unreachable("Invalid special member.");
}
}
+/// Determine whether a type is permitted to be passed or returned in
+/// registers, per C++ [class.temporary]p3.
+static bool computeCanPassInRegisters(Sema &S, CXXRecordDecl *D) {
+ if (D->isDependentType() || D->isInvalidDecl())
+ return false;
+
+ // Per C++ [class.temporary]p3, the relevant condition is:
+ // each copy constructor, move constructor, and destructor of X is
+ // either trivial or deleted, and X has at least one non-deleted copy
+ // or move constructor
+ bool HasNonDeletedCopyOrMove = false;
+
+ if (D->needsImplicitCopyConstructor() &&
+ !D->defaultedCopyConstructorIsDeleted()) {
+ if (!D->hasTrivialCopyConstructor())
+ return false;
+ HasNonDeletedCopyOrMove = true;
+ }
+
+ if (S.getLangOpts().CPlusPlus11 && D->needsImplicitMoveConstructor() &&
+ !D->defaultedMoveConstructorIsDeleted()) {
+ if (!D->hasTrivialMoveConstructor())
+ return false;
+ HasNonDeletedCopyOrMove = true;
+ }
+
+ if (D->needsImplicitDestructor() && !D->defaultedDestructorIsDeleted() &&
+ !D->hasTrivialDestructor())
+ return false;
+
+ for (const CXXMethodDecl *MD : D->methods()) {
+ if (MD->isDeleted())
+ continue;
+
+ auto *CD = dyn_cast<CXXConstructorDecl>(MD);
+ if (CD && CD->isCopyOrMoveConstructor())
+ HasNonDeletedCopyOrMove = true;
+ else if (!isa<CXXDestructorDecl>(MD))
+ continue;
+
+ if (!MD->isTrivial())
+ return false;
+ }
+
+ return HasNonDeletedCopyOrMove;
+}
+
/// \brief Perform semantic checks on a class definition that has been
/// completing, introducing implicitly-declared members, checking for
/// abstract types, etc.
void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) {
if (!Record)
return;
if (Record->isAbstract() && !Record->isInvalidDecl()) {
AbstractUsageInfo Info(*this, Record);
CheckAbstractClassUsage(Info, Record);
}
// If this is not an aggregate type and has no user-declared constructor,
// complain about any non-static data members of reference or const scalar
// type, since they will never get initializers.
if (!Record->isInvalidDecl() && !Record->isDependentType() &&
!Record->isAggregate() && !Record->hasUserDeclaredConstructor() &&
!Record->isLambda()) {
bool Complained = false;
for (const auto *F : Record->fields()) {
if (F->hasInClassInitializer() || F->isUnnamedBitfield())
continue;
if (F->getType()->isReferenceType() ||
(F->getType().isConstQualified() && F->getType()->isScalarType())) {
if (!Complained) {
Diag(Record->getLocation(), diag::warn_no_constructor_for_refconst)
<< Record->getTagKind() << Record;
Complained = true;
}
Diag(F->getLocation(), diag::note_refconst_member_not_initialized)
<< F->getType()->isReferenceType()
<< F->getDeclName();
}
}
}
if (Record->getIdentifier()) {
// C++ [class.mem]p13:
// If T is the name of a class, then each of the following shall have a
// name different from T:
// - every member of every anonymous union that is a member of class T.
//
// C++ [class.mem]p14:
// In addition, if class T has a user-declared constructor (12.1), every
// non-static data member of class T shall have a name different from T.
DeclContext::lookup_result R = Record->lookup(Record->getDeclName());
for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E;
++I) {
NamedDecl *D = *I;
if ((isa<FieldDecl>(D) && Record->hasUserDeclaredConstructor()) ||
isa<IndirectFieldDecl>(D)) {
Diag(D->getLocation(), diag::err_member_name_of_class)
<< D->getDeclName();
break;
}
}
}
// Warn if the class has virtual methods but non-virtual public destructor.
if (Record->isPolymorphic() && !Record->isDependentType()) {
CXXDestructorDecl *dtor = Record->getDestructor();
if ((!dtor || (!dtor->isVirtual() && dtor->getAccess() == AS_public)) &&
!Record->hasAttr<FinalAttr>())
Diag(dtor ? dtor->getLocation() : Record->getLocation(),
diag::warn_non_virtual_dtor) << Context.getRecordType(Record);
}
if (Record->isAbstract()) {
if (FinalAttr *FA = Record->getAttr<FinalAttr>()) {
Diag(Record->getLocation(), diag::warn_abstract_final_class)
<< FA->isSpelledAsSealed();
DiagnoseAbstractType(Record);
}
}
bool HasMethodWithOverrideControl = false,
HasOverridingMethodWithoutOverrideControl = false;
if (!Record->isDependentType()) {
for (auto *M : Record->methods()) {
// See if a method overloads virtual methods in a base
// class without overriding any.
if (!M->isStatic())
DiagnoseHiddenVirtualMethods(M);
if (M->hasAttr<OverrideAttr>())
HasMethodWithOverrideControl = true;
else if (M->size_overridden_methods() > 0)
HasOverridingMethodWithoutOverrideControl = true;
// Check whether the explicitly-defaulted special members are valid.
if (!M->isInvalidDecl() && M->isExplicitlyDefaulted())
CheckExplicitlyDefaultedSpecialMember(M);
// For an explicitly defaulted or deleted special member, we defer
// determining triviality until the class is complete. That time is now!
CXXSpecialMember CSM = getSpecialMember(M);
if (!M->isImplicit() && !M->isUserProvided()) {
if (CSM != CXXInvalid) {
M->setTrivial(SpecialMemberIsTrivial(M, CSM));
// Inform the class that we've finished declaring this member.
Record->finishedDefaultedOrDeletedMember(M);
}
}
if (!M->isInvalidDecl() && M->isExplicitlyDefaulted() &&
M->hasAttr<DLLExportAttr>()) {
if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015) &&
M->isTrivial() &&
(CSM == CXXDefaultConstructor || CSM == CXXCopyConstructor ||
CSM == CXXDestructor))
M->dropAttr<DLLExportAttr>();
if (M->hasAttr<DLLExportAttr>()) {
DefineImplicitSpecialMember(*this, M, M->getLocation());
ActOnFinishInlineFunctionDef(M);
}
}
}
}
if (HasMethodWithOverrideControl &&
HasOverridingMethodWithoutOverrideControl) {
// At least one method has the 'override' control declared.
// Diagnose all other overridden methods which do not have 'override' specified on them.
for (auto *M : Record->methods())
DiagnoseAbsenceOfOverrideControl(M);
}
// ms_struct is a request to use the same ABI rules as MSVC. Check
// whether this class uses any C++ features that are implemented
// completely differently in MSVC, and if so, emit a diagnostic.
// That diagnostic defaults to an error, but we allow projects to
// map it down to a warning (or ignore it). It's a fairly common
// practice among users of the ms_struct pragma to mass-annotate
// headers, sweeping up a bunch of types that the project doesn't
// really rely on MSVC-compatible layout for. We must therefore
// support "ms_struct except for C++ stuff" as a secondary ABI.
if (Record->isMsStruct(Context) &&
(Record->isPolymorphic() || Record->getNumBases())) {
Diag(Record->getLocation(), diag::warn_cxx_ms_struct);
}
checkClassLevelDLLAttribute(Record);
+
+ Record->setCanPassInRegisters(computeCanPassInRegisters(*this, Record));
}
/// Look up the special member function that would be called by a special
/// member function for a subobject of class type.
///
/// \param Class The class type of the subobject.
/// \param CSM The kind of special member function.
/// \param FieldQuals If the subobject is a field, its cv-qualifiers.
/// \param ConstRHS True if this is a copy operation with a const object
/// on its RHS, that is, if the argument to the outer special member
/// function is 'const' and this is not a field marked 'mutable'.
static Sema::SpecialMemberOverloadResult lookupCallFromSpecialMember(
Sema &S, CXXRecordDecl *Class, Sema::CXXSpecialMember CSM,
unsigned FieldQuals, bool ConstRHS) {
unsigned LHSQuals = 0;
if (CSM == Sema::CXXCopyAssignment || CSM == Sema::CXXMoveAssignment)
LHSQuals = FieldQuals;
unsigned RHSQuals = FieldQuals;
if (CSM == Sema::CXXDefaultConstructor || CSM == Sema::CXXDestructor)
RHSQuals = 0;
else if (ConstRHS)
RHSQuals |= Qualifiers::Const;
return S.LookupSpecialMember(Class, CSM,
RHSQuals & Qualifiers::Const,
RHSQuals & Qualifiers::Volatile,
false,
LHSQuals & Qualifiers::Const,
LHSQuals & Qualifiers::Volatile);
}
class Sema::InheritedConstructorInfo {
Sema &S;
SourceLocation UseLoc;
/// A mapping from the base classes through which the constructor was
/// inherited to the using shadow declaration in that base class (or a null
/// pointer if the constructor was declared in that base class).
llvm::DenseMap<CXXRecordDecl *, ConstructorUsingShadowDecl *>
InheritedFromBases;
public:
InheritedConstructorInfo(Sema &S, SourceLocation UseLoc,
ConstructorUsingShadowDecl *Shadow)
: S(S), UseLoc(UseLoc) {
bool DiagnosedMultipleConstructedBases = false;
CXXRecordDecl *ConstructedBase = nullptr;
UsingDecl *ConstructedBaseUsing = nullptr;
// Find the set of such base class subobjects and check that there's a
// unique constructed subobject.
for (auto *D : Shadow->redecls()) {
auto *DShadow = cast<ConstructorUsingShadowDecl>(D);
auto *DNominatedBase = DShadow->getNominatedBaseClass();
auto *DConstructedBase = DShadow->getConstructedBaseClass();
InheritedFromBases.insert(
std::make_pair(DNominatedBase->getCanonicalDecl(),
DShadow->getNominatedBaseClassShadowDecl()));
if (DShadow->constructsVirtualBase())
InheritedFromBases.insert(
std::make_pair(DConstructedBase->getCanonicalDecl(),
DShadow->getConstructedBaseClassShadowDecl()));
else
assert(DNominatedBase == DConstructedBase);
// [class.inhctor.init]p2:
// If the constructor was inherited from multiple base class subobjects
// of type B, the program is ill-formed.
if (!ConstructedBase) {
ConstructedBase = DConstructedBase;
ConstructedBaseUsing = D->getUsingDecl();
} else if (ConstructedBase != DConstructedBase &&
!Shadow->isInvalidDecl()) {
if (!DiagnosedMultipleConstructedBases) {
S.Diag(UseLoc, diag::err_ambiguous_inherited_constructor)
<< Shadow->getTargetDecl();
S.Diag(ConstructedBaseUsing->getLocation(),
diag::note_ambiguous_inherited_constructor_using)
<< ConstructedBase;
DiagnosedMultipleConstructedBases = true;
}
S.Diag(D->getUsingDecl()->getLocation(),
diag::note_ambiguous_inherited_constructor_using)
<< DConstructedBase;
}
}
if (DiagnosedMultipleConstructedBases)
Shadow->setInvalidDecl();
}
/// Find the constructor to use for inherited construction of a base class,
/// and whether that base class constructor inherits the constructor from a
/// virtual base class (in which case it won't actually invoke it).
std::pair<CXXConstructorDecl *, bool>
findConstructorForBase(CXXRecordDecl *Base, CXXConstructorDecl *Ctor) const {
auto It = InheritedFromBases.find(Base->getCanonicalDecl());
if (It == InheritedFromBases.end())
return std::make_pair(nullptr, false);
// This is an intermediary class.
if (It->second)
return std::make_pair(
S.findInheritingConstructor(UseLoc, Ctor, It->second),
It->second->constructsVirtualBase());
// This is the base class from which the constructor was inherited.
return std::make_pair(Ctor, false);
}
};
/// Is the special member function which would be selected to perform the
/// specified operation on the specified class type a constexpr constructor?
static bool
specialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
Sema::CXXSpecialMember CSM, unsigned Quals,
bool ConstRHS,
CXXConstructorDecl *InheritedCtor = nullptr,
Sema::InheritedConstructorInfo *Inherited = nullptr) {
// If we're inheriting a constructor, see if we need to call it for this base
// class.
if (InheritedCtor) {
assert(CSM == Sema::CXXDefaultConstructor);
auto BaseCtor =
Inherited->findConstructorForBase(ClassDecl, InheritedCtor).first;
if (BaseCtor)
return BaseCtor->isConstexpr();
}
if (CSM == Sema::CXXDefaultConstructor)
return ClassDecl->hasConstexprDefaultConstructor();
Sema::SpecialMemberOverloadResult SMOR =
lookupCallFromSpecialMember(S, ClassDecl, CSM, Quals, ConstRHS);
if (!SMOR.getMethod())
// A constructor we wouldn't select can't be "involved in initializing"
// anything.
return true;
return SMOR.getMethod()->isConstexpr();
}
/// Determine whether the specified special member function would be constexpr
/// if it were implicitly defined.
static bool defaultedSpecialMemberIsConstexpr(
Sema &S, CXXRecordDecl *ClassDecl, Sema::CXXSpecialMember CSM,
bool ConstArg, CXXConstructorDecl *InheritedCtor = nullptr,
Sema::InheritedConstructorInfo *Inherited = nullptr) {
if (!S.getLangOpts().CPlusPlus11)
return false;
// C++11 [dcl.constexpr]p4:
// In the definition of a constexpr constructor [...]
bool Ctor = true;
switch (CSM) {
case Sema::CXXDefaultConstructor:
if (Inherited)
break;
// Since default constructor lookup is essentially trivial (and cannot
// involve, for instance, template instantiation), we compute whether a
// defaulted default constructor is constexpr directly within CXXRecordDecl.
//
// This is important for performance; we need to know whether the default
// constructor is constexpr to determine whether the type is a literal type.
return ClassDecl->defaultedDefaultConstructorIsConstexpr();
case Sema::CXXCopyConstructor:
case Sema::CXXMoveConstructor:
// For copy or move constructors, we need to perform overload resolution.
break;
case Sema::CXXCopyAssignment:
case Sema::CXXMoveAssignment:
if (!S.getLangOpts().CPlusPlus14)
return false;
// In C++1y, we need to perform overload resolution.
Ctor = false;
break;
case Sema::CXXDestructor:
case Sema::CXXInvalid:
return false;
}
// -- if the class is a non-empty union, or for each non-empty anonymous
// union member of a non-union class, exactly one non-static data member
// shall be initialized; [DR1359]
//
// If we squint, this is guaranteed, since exactly one non-static data member
// will be initialized (if the constructor isn't deleted), we just don't know
// which one.
if (Ctor && ClassDecl->isUnion())
return CSM == Sema::CXXDefaultConstructor
? ClassDecl->hasInClassInitializer() ||
!ClassDecl->hasVariantMembers()
: true;
// -- the class shall not have any virtual base classes;
if (Ctor && ClassDecl->getNumVBases())
return false;
// C++1y [class.copy]p26:
// -- [the class] is a literal type, and
if (!Ctor && !ClassDecl->isLiteral())
return false;
// -- every constructor involved in initializing [...] base class
// sub-objects shall be a constexpr constructor;
// -- the assignment operator selected to copy/move each direct base
// class is a constexpr function, and
for (const auto &B : ClassDecl->bases()) {
const RecordType *BaseType = B.getType()->getAs<RecordType>();
if (!BaseType) continue;
CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
if (!specialMemberIsConstexpr(S, BaseClassDecl, CSM, 0, ConstArg,
InheritedCtor, Inherited))
return false;
}
// -- every constructor involved in initializing non-static data members
// [...] shall be a constexpr constructor;
// -- every non-static data member and base class sub-object shall be
// initialized
// -- for each non-static data member of X that is of class type (or array
// thereof), the assignment operator selected to copy/move that member is
// a constexpr function
for (const auto *F : ClassDecl->fields()) {
if (F->isInvalidDecl())
continue;
if (CSM == Sema::CXXDefaultConstructor && F->hasInClassInitializer())
continue;
QualType BaseType = S.Context.getBaseElementType(F->getType());
if (const RecordType *RecordTy = BaseType->getAs<RecordType>()) {
CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(RecordTy->getDecl());
if (!specialMemberIsConstexpr(S, FieldRecDecl, CSM,
BaseType.getCVRQualifiers(),
ConstArg && !F->isMutable()))
return false;
} else if (CSM == Sema::CXXDefaultConstructor) {
return false;
}
}
// All OK, it's constexpr!
return true;
}
static Sema::ImplicitExceptionSpecification
ComputeDefaultedSpecialMemberExceptionSpec(
Sema &S, SourceLocation Loc, CXXMethodDecl *MD, Sema::CXXSpecialMember CSM,
Sema::InheritedConstructorInfo *ICI);
static Sema::ImplicitExceptionSpecification
computeImplicitExceptionSpec(Sema &S, SourceLocation Loc, CXXMethodDecl *MD) {
auto CSM = S.getSpecialMember(MD);
if (CSM != Sema::CXXInvalid)
return ComputeDefaultedSpecialMemberExceptionSpec(S, Loc, MD, CSM, nullptr);
auto *CD = cast<CXXConstructorDecl>(MD);
assert(CD->getInheritedConstructor() &&
"only special members have implicit exception specs");
Sema::InheritedConstructorInfo ICI(
S, Loc, CD->getInheritedConstructor().getShadowDecl());
return ComputeDefaultedSpecialMemberExceptionSpec(
S, Loc, CD, Sema::CXXDefaultConstructor, &ICI);
}
static FunctionProtoType::ExtProtoInfo getImplicitMethodEPI(Sema &S,
CXXMethodDecl *MD) {
FunctionProtoType::ExtProtoInfo EPI;
// Build an exception specification pointing back at this member.
EPI.ExceptionSpec.Type = EST_Unevaluated;
EPI.ExceptionSpec.SourceDecl = MD;
// Set the calling convention to the default for C++ instance methods.
EPI.ExtInfo = EPI.ExtInfo.withCallingConv(
S.Context.getDefaultCallingConvention(/*IsVariadic=*/false,
/*IsCXXMethod=*/true));
return EPI;
}
void Sema::EvaluateImplicitExceptionSpec(SourceLocation Loc, CXXMethodDecl *MD) {
const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
if (FPT->getExceptionSpecType() != EST_Unevaluated)
return;
// Evaluate the exception specification.
auto IES = computeImplicitExceptionSpec(*this, Loc, MD);
auto ESI = IES.getExceptionSpec();
// Update the type of the special member to use it.
UpdateExceptionSpec(MD, ESI);
// A user-provided destructor can be defined outside the class. When that
// happens, be sure to update the exception specification on both
// declarations.
const FunctionProtoType *CanonicalFPT =
MD->getCanonicalDecl()->getType()->castAs<FunctionProtoType>();
if (CanonicalFPT->getExceptionSpecType() == EST_Unevaluated)
UpdateExceptionSpec(MD->getCanonicalDecl(), ESI);
}
void Sema::CheckExplicitlyDefaultedSpecialMember(CXXMethodDecl *MD) {
CXXRecordDecl *RD = MD->getParent();
CXXSpecialMember CSM = getSpecialMember(MD);
assert(MD->isExplicitlyDefaulted() && CSM != CXXInvalid &&
"not an explicitly-defaulted special member");
// Whether this was the first-declared instance of the constructor.
// This affects whether we implicitly add an exception spec and constexpr.
bool First = MD == MD->getCanonicalDecl();
bool HadError = false;
// C++11 [dcl.fct.def.default]p1:
// A function that is explicitly defaulted shall
// -- be a special member function (checked elsewhere),
// -- have the same type (except for ref-qualifiers, and except that a
// copy operation can take a non-const reference) as an implicit
// declaration, and
// -- not have default arguments.
unsigned ExpectedParams = 1;
if (CSM == CXXDefaultConstructor || CSM == CXXDestructor)
ExpectedParams = 0;
if (MD->getNumParams() != ExpectedParams) {
// This also checks for default arguments: a copy or move constructor with a
// default argument is classified as a default constructor, and assignment
// operations and destructors can't have default arguments.
Diag(MD->getLocation(), diag::err_defaulted_special_member_params)
<< CSM << MD->getSourceRange();
HadError = true;
} else if (MD->isVariadic()) {
Diag(MD->getLocation(), diag::err_defaulted_special_member_variadic)
<< CSM << MD->getSourceRange();
HadError = true;
}
const FunctionProtoType *Type = MD->getType()->getAs<FunctionProtoType>();
bool CanHaveConstParam = false;
if (CSM == CXXCopyConstructor)
CanHaveConstParam = RD->implicitCopyConstructorHasConstParam();
else if (CSM == CXXCopyAssignment)
CanHaveConstParam = RD->implicitCopyAssignmentHasConstParam();
QualType ReturnType = Context.VoidTy;
if (CSM == CXXCopyAssignment || CSM == CXXMoveAssignment) {
// Check for return type matching.
ReturnType = Type->getReturnType();
QualType ExpectedReturnType =
Context.getLValueReferenceType(Context.getTypeDeclType(RD));
if (!Context.hasSameType(ReturnType, ExpectedReturnType)) {
Diag(MD->getLocation(), diag::err_defaulted_special_member_return_type)
<< (CSM == CXXMoveAssignment) << ExpectedReturnType;
HadError = true;
}
// A defaulted special member cannot have cv-qualifiers.
if (Type->getTypeQuals()) {
Diag(MD->getLocation(), diag::err_defaulted_special_member_quals)
<< (CSM == CXXMoveAssignment) << getLangOpts().CPlusPlus14;
HadError = true;
}
}
// Check for parameter type matching.
QualType ArgType = ExpectedParams ? Type->getParamType(0) : QualType();
bool HasConstParam = false;
if (ExpectedParams && ArgType->isReferenceType()) {
// Argument must be reference to possibly-const T.
QualType ReferentType = ArgType->getPointeeType();
HasConstParam = ReferentType.isConstQualified();
if (ReferentType.isVolatileQualified()) {
Diag(MD->getLocation(),
diag::err_defaulted_special_member_volatile_param) << CSM;
HadError = true;
}
if (HasConstParam && !CanHaveConstParam) {
if (CSM == CXXCopyConstructor || CSM == CXXCopyAssignment) {
Diag(MD->getLocation(),
diag::err_defaulted_special_member_copy_const_param)
<< (CSM == CXXCopyAssignment);
// FIXME: Explain why this special member can't be const.
} else {
Diag(MD->getLocation(),
diag::err_defaulted_special_member_move_const_param)
<< (CSM == CXXMoveAssignment);
}
HadError = true;
}
} else if (ExpectedParams) {
// A copy assignment operator can take its argument by value, but a
// defaulted one cannot.
assert(CSM == CXXCopyAssignment && "unexpected non-ref argument");
Diag(MD->getLocation(), diag::err_defaulted_copy_assign_not_ref);
HadError = true;
}
// C++11 [dcl.fct.def.default]p2:
// An explicitly-defaulted function may be declared constexpr only if it
// would have been implicitly declared as constexpr,
// Do not apply this rule to members of class templates, since core issue 1358
// makes such functions always instantiate to constexpr functions. For
// functions which cannot be constexpr (for non-constructors in C++11 and for
// destructors in C++1y), this is checked elsewhere.
bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, RD, CSM,
HasConstParam);
if ((getLangOpts().CPlusPlus14 ? !isa<CXXDestructorDecl>(MD)
: isa<CXXConstructorDecl>(MD)) &&
MD->isConstexpr() && !Constexpr &&
MD->getTemplatedKind() == FunctionDecl::TK_NonTemplate) {
Diag(MD->getLocStart(), diag::err_incorrect_defaulted_constexpr) << CSM;
// FIXME: Explain why the special member can't be constexpr.
HadError = true;
}
// and may have an explicit exception-specification only if it is compatible
// with the exception-specification on the implicit declaration.
if (Type->hasExceptionSpec()) {
// Delay the check if this is the first declaration of the special member,
// since we may not have parsed some necessary in-class initializers yet.
if (First) {
// If the exception specification needs to be instantiated, do so now,
// before we clobber it with an EST_Unevaluated specification below.
if (Type->getExceptionSpecType() == EST_Uninstantiated) {
InstantiateExceptionSpec(MD->getLocStart(), MD);
Type = MD->getType()->getAs<FunctionProtoType>();
}
DelayedDefaultedMemberExceptionSpecs.push_back(std::make_pair(MD, Type));
} else
CheckExplicitlyDefaultedMemberExceptionSpec(MD, Type);
}
// If a function is explicitly defaulted on its first declaration,
if (First) {
// -- it is implicitly considered to be constexpr if the implicit
// definition would be,
MD->setConstexpr(Constexpr);
// -- it is implicitly considered to have the same exception-specification
// as if it had been implicitly declared,
FunctionProtoType::ExtProtoInfo EPI = Type->getExtProtoInfo();
EPI.ExceptionSpec.Type = EST_Unevaluated;
EPI.ExceptionSpec.SourceDecl = MD;
MD->setType(Context.getFunctionType(ReturnType,
llvm::makeArrayRef(&ArgType,
ExpectedParams),
EPI));
}
if (ShouldDeleteSpecialMember(MD, CSM)) {
if (First) {
SetDeclDeleted(MD, MD->getLocation());
} else {
// C++11 [dcl.fct.def.default]p4:
// [For a] user-provided explicitly-defaulted function [...] if such a
// function is implicitly defined as deleted, the program is ill-formed.
Diag(MD->getLocation(), diag::err_out_of_line_default_deletes) << CSM;
ShouldDeleteSpecialMember(MD, CSM, nullptr, /*Diagnose*/true);
HadError = true;
}
}
if (HadError)
MD->setInvalidDecl();
}
/// Check whether the exception specification provided for an
/// explicitly-defaulted special member matches the exception specification
/// that would have been generated for an implicit special member, per
/// C++11 [dcl.fct.def.default]p2.
void Sema::CheckExplicitlyDefaultedMemberExceptionSpec(
CXXMethodDecl *MD, const FunctionProtoType *SpecifiedType) {
// If the exception specification was explicitly specified but hadn't been
// parsed when the method was defaulted, grab it now.
if (SpecifiedType->getExceptionSpecType() == EST_Unparsed)
SpecifiedType =
MD->getTypeSourceInfo()->getType()->castAs<FunctionProtoType>();
// Compute the implicit exception specification.
CallingConv CC = Context.getDefaultCallingConvention(/*IsVariadic=*/false,
/*IsCXXMethod=*/true);
FunctionProtoType::ExtProtoInfo EPI(CC);
auto IES = computeImplicitExceptionSpec(*this, MD->getLocation(), MD);
EPI.ExceptionSpec = IES.getExceptionSpec();
const FunctionProtoType *ImplicitType = cast<FunctionProtoType>(
Context.getFunctionType(Context.VoidTy, None, EPI));
// Ensure that it matches.
CheckEquivalentExceptionSpec(
PDiag(diag::err_incorrect_defaulted_exception_spec)
<< getSpecialMember(MD), PDiag(),
ImplicitType, SourceLocation(),
SpecifiedType, MD->getLocation());
}
void Sema::CheckDelayedMemberExceptionSpecs() {
decltype(DelayedExceptionSpecChecks) Checks;
decltype(DelayedDefaultedMemberExceptionSpecs) Specs;
std::swap(Checks, DelayedExceptionSpecChecks);
std::swap(Specs, DelayedDefaultedMemberExceptionSpecs);
// Perform any deferred checking of exception specifications for virtual
// destructors.
for (auto &Check : Checks)
CheckOverridingFunctionExceptionSpec(Check.first, Check.second);
// Check that any explicitly-defaulted methods have exception specifications
// compatible with their implicit exception specifications.
for (auto &Spec : Specs)
CheckExplicitlyDefaultedMemberExceptionSpec(Spec.first, Spec.second);
}
namespace {
/// CRTP base class for visiting operations performed by a special member
/// function (or inherited constructor).
template<typename Derived>
struct SpecialMemberVisitor {
Sema &S;
CXXMethodDecl *MD;
Sema::CXXSpecialMember CSM;
Sema::InheritedConstructorInfo *ICI;
// Properties of the special member, computed for convenience.
bool IsConstructor = false, IsAssignment = false, ConstArg = false;
SpecialMemberVisitor(Sema &S, CXXMethodDecl *MD, Sema::CXXSpecialMember CSM,
Sema::InheritedConstructorInfo *ICI)
: S(S), MD(MD), CSM(CSM), ICI(ICI) {
switch (CSM) {
case Sema::CXXDefaultConstructor:
case Sema::CXXCopyConstructor:
case Sema::CXXMoveConstructor:
IsConstructor = true;
break;
case Sema::CXXCopyAssignment:
case Sema::CXXMoveAssignment:
IsAssignment = true;
break;
case Sema::CXXDestructor:
break;
case Sema::CXXInvalid:
llvm_unreachable("invalid special member kind");
}
if (MD->getNumParams()) {
if (const ReferenceType *RT =
MD->getParamDecl(0)->getType()->getAs<ReferenceType>())
ConstArg = RT->getPointeeType().isConstQualified();
}
}
Derived &getDerived() { return static_cast<Derived&>(*this); }
/// Is this a "move" special member?
bool isMove() const {
return CSM == Sema::CXXMoveConstructor || CSM == Sema::CXXMoveAssignment;
}
/// Look up the corresponding special member in the given class.
Sema::SpecialMemberOverloadResult lookupIn(CXXRecordDecl *Class,
unsigned Quals, bool IsMutable) {
return lookupCallFromSpecialMember(S, Class, CSM, Quals,
ConstArg && !IsMutable);
}
/// Look up the constructor for the specified base class to see if it's
/// overridden due to this being an inherited constructor.
Sema::SpecialMemberOverloadResult lookupInheritedCtor(CXXRecordDecl *Class) {
if (!ICI)
return {};
assert(CSM == Sema::CXXDefaultConstructor);
auto *BaseCtor =
cast<CXXConstructorDecl>(MD)->getInheritedConstructor().getConstructor();
if (auto *MD = ICI->findConstructorForBase(Class, BaseCtor).first)
return MD;
return {};
}
/// A base or member subobject.
typedef llvm::PointerUnion<CXXBaseSpecifier*, FieldDecl*> Subobject;
/// Get the location to use for a subobject in diagnostics.
static SourceLocation getSubobjectLoc(Subobject Subobj) {
// FIXME: For an indirect virtual base, the direct base leading to
// the indirect virtual base would be a more useful choice.
if (auto *B = Subobj.dyn_cast<CXXBaseSpecifier*>())
return B->getBaseTypeLoc();
else
return Subobj.get<FieldDecl*>()->getLocation();
}
enum BasesToVisit {
/// Visit all non-virtual (direct) bases.
VisitNonVirtualBases,
/// Visit all direct bases, virtual or not.
VisitDirectBases,
/// Visit all non-virtual bases, and all virtual bases if the class
/// is not abstract.
VisitPotentiallyConstructedBases,
/// Visit all direct or virtual bases.
VisitAllBases
};
// Visit the bases and members of the class.
bool visit(BasesToVisit Bases) {
CXXRecordDecl *RD = MD->getParent();
if (Bases == VisitPotentiallyConstructedBases)
Bases = RD->isAbstract() ? VisitNonVirtualBases : VisitAllBases;
for (auto &B : RD->bases())
if ((Bases == VisitDirectBases || !B.isVirtual()) &&
getDerived().visitBase(&B))
return true;
if (Bases == VisitAllBases)
for (auto &B : RD->vbases())
if (getDerived().visitBase(&B))
return true;
for (auto *F : RD->fields())
if (!F->isInvalidDecl() && !F->isUnnamedBitfield() &&
getDerived().visitField(F))
return true;
return false;
}
};
}
namespace {
struct SpecialMemberDeletionInfo
: SpecialMemberVisitor<SpecialMemberDeletionInfo> {
bool Diagnose;
SourceLocation Loc;
bool AllFieldsAreConst;
SpecialMemberDeletionInfo(Sema &S, CXXMethodDecl *MD,
Sema::CXXSpecialMember CSM,
Sema::InheritedConstructorInfo *ICI, bool Diagnose)
: SpecialMemberVisitor(S, MD, CSM, ICI), Diagnose(Diagnose),
Loc(MD->getLocation()), AllFieldsAreConst(true) {}
bool inUnion() const { return MD->getParent()->isUnion(); }
Sema::CXXSpecialMember getEffectiveCSM() {
return ICI ? Sema::CXXInvalid : CSM;
}
bool visitBase(CXXBaseSpecifier *Base) { return shouldDeleteForBase(Base); }
bool visitField(FieldDecl *Field) { return shouldDeleteForField(Field); }
bool shouldDeleteForBase(CXXBaseSpecifier *Base);
bool shouldDeleteForField(FieldDecl *FD);
bool shouldDeleteForAllConstMembers();
bool shouldDeleteForClassSubobject(CXXRecordDecl *Class, Subobject Subobj,
unsigned Quals);
bool shouldDeleteForSubobjectCall(Subobject Subobj,
Sema::SpecialMemberOverloadResult SMOR,
bool IsDtorCallInCtor);
bool isAccessible(Subobject Subobj, CXXMethodDecl *D);
};
}
/// Is the given special member inaccessible when used on the given
/// sub-object.
bool SpecialMemberDeletionInfo::isAccessible(Subobject Subobj,
CXXMethodDecl *target) {
/// If we're operating on a base class, the object type is the
/// type of this special member.
QualType objectTy;
AccessSpecifier access = target->getAccess();
if (CXXBaseSpecifier *base = Subobj.dyn_cast<CXXBaseSpecifier*>()) {
objectTy = S.Context.getTypeDeclType(MD->getParent());
access = CXXRecordDecl::MergeAccess(base->getAccessSpecifier(), access);
// If we're operating on a field, the object type is the type of the field.
} else {
objectTy = S.Context.getTypeDeclType(target->getParent());
}
return S.isSpecialMemberAccessibleForDeletion(target, access, objectTy);
}
/// Check whether we should delete a special member due to the implicit
/// definition containing a call to a special member of a subobject.
bool SpecialMemberDeletionInfo::shouldDeleteForSubobjectCall(
Subobject Subobj, Sema::SpecialMemberOverloadResult SMOR,
bool IsDtorCallInCtor) {
CXXMethodDecl *Decl = SMOR.getMethod();
FieldDecl *Field = Subobj.dyn_cast<FieldDecl*>();
int DiagKind = -1;
if (SMOR.getKind() == Sema::SpecialMemberOverloadResult::NoMemberOrDeleted)
DiagKind = !Decl ? 0 : 1;
else if (SMOR.getKind() == Sema::SpecialMemberOverloadResult::Ambiguous)
DiagKind = 2;
else if (!isAccessible(Subobj, Decl))
DiagKind = 3;
else if (!IsDtorCallInCtor && Field && Field->getParent()->isUnion() &&
!Decl->isTrivial()) {
// A member of a union must have a trivial corresponding special member.
// As a weird special case, a destructor call from a union's constructor
// must be accessible and non-deleted, but need not be trivial. Such a
// destructor is never actually called, but is semantically checked as
// if it were.
DiagKind = 4;
}
if (DiagKind == -1)
return false;
if (Diagnose) {
if (Field) {
S.Diag(Field->getLocation(),
diag::note_deleted_special_member_class_subobject)
<< getEffectiveCSM() << MD->getParent() << /*IsField*/true
<< Field << DiagKind << IsDtorCallInCtor;
} else {
CXXBaseSpecifier *Base = Subobj.get<CXXBaseSpecifier*>();
S.Diag(Base->getLocStart(),
diag::note_deleted_special_member_class_subobject)
<< getEffectiveCSM() << MD->getParent() << /*IsField*/false
<< Base->getType() << DiagKind << IsDtorCallInCtor;
}
if (DiagKind == 1)
S.NoteDeletedFunction(Decl);
// FIXME: Explain inaccessibility if DiagKind == 3.
}
return true;
}
/// Check whether we should delete a special member function due to having a
/// direct or virtual base class or non-static data member of class type M.
bool SpecialMemberDeletionInfo::shouldDeleteForClassSubobject(
CXXRecordDecl *Class, Subobject Subobj, unsigned Quals) {
FieldDecl *Field = Subobj.dyn_cast<FieldDecl*>();
bool IsMutable = Field && Field->isMutable();
// C++11 [class.ctor]p5:
// -- any direct or virtual base class, or non-static data member with no
// brace-or-equal-initializer, has class type M (or array thereof) and
// either M has no default constructor or overload resolution as applied
// to M's default constructor results in an ambiguity or in a function
// that is deleted or inaccessible
// C++11 [class.copy]p11, C++11 [class.copy]p23:
// -- a direct or virtual base class B that cannot be copied/moved because
// overload resolution, as applied to B's corresponding special member,
// results in an ambiguity or a function that is deleted or inaccessible
// from the defaulted special member
// C++11 [class.dtor]p5:
// -- any direct or virtual base class [...] has a type with a destructor
// that is deleted or inaccessible
if (!(CSM == Sema::CXXDefaultConstructor &&
Field && Field->hasInClassInitializer()) &&
shouldDeleteForSubobjectCall(Subobj, lookupIn(Class, Quals, IsMutable),
false))
return true;
// C++11 [class.ctor]p5, C++11 [class.copy]p11:
// -- any direct or virtual base class or non-static data member has a
// type with a destructor that is deleted or inaccessible
if (IsConstructor) {
Sema::SpecialMemberOverloadResult SMOR =
S.LookupSpecialMember(Class, Sema::CXXDestructor,
false, false, false, false, false);
if (shouldDeleteForSubobjectCall(Subobj, SMOR, true))
return true;
}
return false;
}
/// Check whether we should delete a special member function due to the class
/// having a particular direct or virtual base class.
bool SpecialMemberDeletionInfo::shouldDeleteForBase(CXXBaseSpecifier *Base) {
CXXRecordDecl *BaseClass = Base->getType()->getAsCXXRecordDecl();
// If program is correct, BaseClass cannot be null, but if it is, the error
// must be reported elsewhere.
if (!BaseClass)
return false;
// If we have an inheriting constructor, check whether we're calling an
// inherited constructor instead of a default constructor.
Sema::SpecialMemberOverloadResult SMOR = lookupInheritedCtor(BaseClass);
if (auto *BaseCtor = SMOR.getMethod()) {
// Note that we do not check access along this path; other than that,
// this is the same as shouldDeleteForSubobjectCall(Base, BaseCtor, false);
// FIXME: Check that the base has a usable destructor! Sink this into
// shouldDeleteForClassSubobject.
if (BaseCtor->isDeleted() && Diagnose) {
S.Diag(Base->getLocStart(),
diag::note_deleted_special_member_class_subobject)
<< getEffectiveCSM() << MD->getParent() << /*IsField*/false
<< Base->getType() << /*Deleted*/1 << /*IsDtorCallInCtor*/false;
S.NoteDeletedFunction(BaseCtor);
}
return BaseCtor->isDeleted();
}
return shouldDeleteForClassSubobject(BaseClass, Base, 0);
}
/// Check whether we should delete a special member function due to the class
/// having a particular non-static data member.
bool SpecialMemberDeletionInfo::shouldDeleteForField(FieldDecl *FD) {
QualType FieldType = S.Context.getBaseElementType(FD->getType());
CXXRecordDecl *FieldRecord = FieldType->getAsCXXRecordDecl();
if (CSM == Sema::CXXDefaultConstructor) {
// For a default constructor, all references must be initialized in-class
// and, if a union, it must have a non-const member.
if (FieldType->isReferenceType() && !FD->hasInClassInitializer()) {
if (Diagnose)
S.Diag(FD->getLocation(), diag::note_deleted_default_ctor_uninit_field)
<< !!ICI << MD->getParent() << FD << FieldType << /*Reference*/0;
return true;
}
// C++11 [class.ctor]p5: any non-variant non-static data member of
// const-qualified type (or array thereof) with no
// brace-or-equal-initializer does not have a user-provided default
// constructor.
if (!inUnion() && FieldType.isConstQualified() &&
!FD->hasInClassInitializer() &&
(!FieldRecord || !FieldRecord->hasUserProvidedDefaultConstructor())) {
if (Diagnose)
S.Diag(FD->getLocation(), diag::note_deleted_default_ctor_uninit_field)
<< !!ICI << MD->getParent() << FD << FD->getType() << /*Const*/1;
return true;
}
if (inUnion() && !FieldType.isConstQualified())
AllFieldsAreConst = false;
} else if (CSM == Sema::CXXCopyConstructor) {
// For a copy constructor, data members must not be of rvalue reference
// type.
if (FieldType->isRValueReferenceType()) {
if (Diagnose)
S.Diag(FD->getLocation(), diag::note_deleted_copy_ctor_rvalue_reference)
<< MD->getParent() << FD << FieldType;
return true;
}
} else if (IsAssignment) {
// For an assignment operator, data members must not be of reference type.
if (FieldType->isReferenceType()) {
if (Diagnose)
S.Diag(FD->getLocation(), diag::note_deleted_assign_field)
<< isMove() << MD->getParent() << FD << FieldType << /*Reference*/0;
return true;
}
if (!FieldRecord && FieldType.isConstQualified()) {
// C++11 [class.copy]p23:
// -- a non-static data member of const non-class type (or array thereof)
if (Diagnose)
S.Diag(FD->getLocation(), diag::note_deleted_assign_field)
<< isMove() << MD->getParent() << FD << FD->getType() << /*Const*/1;
return true;
}
}
if (FieldRecord) {
// Some additional restrictions exist on the variant members.
if (!inUnion() && FieldRecord->isUnion() &&
FieldRecord->isAnonymousStructOrUnion()) {
bool AllVariantFieldsAreConst = true;
// FIXME: Handle anonymous unions declared within anonymous unions.
for (auto *UI : FieldRecord->fields()) {
QualType UnionFieldType = S.Context.getBaseElementType(UI->getType());
if (!UnionFieldType.isConstQualified())
AllVariantFieldsAreConst = false;
CXXRecordDecl *UnionFieldRecord = UnionFieldType->getAsCXXRecordDecl();
if (UnionFieldRecord &&
shouldDeleteForClassSubobject(UnionFieldRecord, UI,
UnionFieldType.getCVRQualifiers()))
return true;
}
// At least one member in each anonymous union must be non-const
if (CSM == Sema::CXXDefaultConstructor && AllVariantFieldsAreConst &&
!FieldRecord->field_empty()) {
if (Diagnose)
S.Diag(FieldRecord->getLocation(),
diag::note_deleted_default_ctor_all_const)
<< !!ICI << MD->getParent() << /*anonymous union*/1;
return true;
}
// Don't check the implicit member of the anonymous union type.
// This is technically non-conformant, but sanity demands it.
return false;
}
if (shouldDeleteForClassSubobject(FieldRecord, FD,
FieldType.getCVRQualifiers()))
return true;
}
return false;
}
/// C++11 [class.ctor] p5:
/// A defaulted default constructor for a class X is defined as deleted if
/// X is a union and all of its variant members are of const-qualified type.
bool SpecialMemberDeletionInfo::shouldDeleteForAllConstMembers() {
// This is a silly definition, because it gives an empty union a deleted
// default constructor. Don't do that.
if (CSM == Sema::CXXDefaultConstructor && inUnion() && AllFieldsAreConst) {
bool AnyFields = false;
for (auto *F : MD->getParent()->fields())
if ((AnyFields = !F->isUnnamedBitfield()))
break;
if (!AnyFields)
return false;
if (Diagnose)
S.Diag(MD->getParent()->getLocation(),
diag::note_deleted_default_ctor_all_const)
<< !!ICI << MD->getParent() << /*not anonymous union*/0;
return true;
}
return false;
}
/// Determine whether a defaulted special member function should be defined as
/// deleted, as specified in C++11 [class.ctor]p5, C++11 [class.copy]p11,
/// C++11 [class.copy]p23, and C++11 [class.dtor]p5.
bool Sema::ShouldDeleteSpecialMember(CXXMethodDecl *MD, CXXSpecialMember CSM,
InheritedConstructorInfo *ICI,
bool Diagnose) {
if (MD->isInvalidDecl())
return false;
CXXRecordDecl *RD = MD->getParent();
assert(!RD->isDependentType() && "do deletion after instantiation");
if (!LangOpts.CPlusPlus11 || RD->isInvalidDecl())
return false;
// C++11 [expr.lambda.prim]p19:
// The closure type associated with a lambda-expression has a
// deleted (8.4.3) default constructor and a deleted copy
// assignment operator.
if (RD->isLambda() &&
(CSM == CXXDefaultConstructor || CSM == CXXCopyAssignment)) {
if (Diagnose)
Diag(RD->getLocation(), diag::note_lambda_decl);
return true;
}
// For an anonymous struct or union, the copy and assignment special members
// will never be used, so skip the check. For an anonymous union declared at
// namespace scope, the constructor and destructor are used.
if (CSM != CXXDefaultConstructor && CSM != CXXDestructor &&
RD->isAnonymousStructOrUnion())
return false;
// C++11 [class.copy]p7, p18:
// If the class definition declares a move constructor or move assignment
// operator, an implicitly declared copy constructor or copy assignment
// operator is defined as deleted.
if (MD->isImplicit() &&
(CSM == CXXCopyConstructor || CSM == CXXCopyAssignment)) {
CXXMethodDecl *UserDeclaredMove = nullptr;
// In Microsoft mode up to MSVC 2013, a user-declared move only causes the
// deletion of the corresponding copy operation, not both copy operations.
// MSVC 2015 has adopted the standards conforming behavior.
bool DeletesOnlyMatchingCopy =
getLangOpts().MSVCCompat &&
!getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015);
if (RD->hasUserDeclaredMoveConstructor() &&
(!DeletesOnlyMatchingCopy || CSM == CXXCopyConstructor)) {
if (!Diagnose) return true;
// Find any user-declared move constructor.
for (auto *I : RD->ctors()) {
if (I->isMoveConstructor()) {
UserDeclaredMove = I;
break;
}
}
assert(UserDeclaredMove);
} else if (RD->hasUserDeclaredMoveAssignment() &&
(!DeletesOnlyMatchingCopy || CSM == CXXCopyAssignment)) {
if (!Diagnose) return true;
// Find any user-declared move assignment operator.
for (auto *I : RD->methods()) {
if (I->isMoveAssignmentOperator()) {
UserDeclaredMove = I;
break;
}
}
assert(UserDeclaredMove);
}
if (UserDeclaredMove) {
Diag(UserDeclaredMove->getLocation(),
diag::note_deleted_copy_user_declared_move)
<< (CSM == CXXCopyAssignment) << RD
<< UserDeclaredMove->isMoveAssignmentOperator();
return true;
}
}
// Do access control from the special member function
ContextRAII MethodContext(*this, MD);
// C++11 [class.dtor]p5:
// -- for a virtual destructor, lookup of the non-array deallocation function
// results in an ambiguity or in a function that is deleted or inaccessible
if (CSM == CXXDestructor && MD->isVirtual()) {
FunctionDecl *OperatorDelete = nullptr;
DeclarationName Name =
Context.DeclarationNames.getCXXOperatorName(OO_Delete);
if (FindDeallocationFunction(MD->getLocation(), MD->getParent(), Name,
OperatorDelete, /*Diagnose*/false)) {
if (Diagnose)
Diag(RD->getLocation(), diag::note_deleted_dtor_no_operator_delete);
return true;
}
}
SpecialMemberDeletionInfo SMI(*this, MD, CSM, ICI, Diagnose);
// Per DR1611, do not consider virtual bases of constructors of abstract
// classes, since we are not going to construct them.
// Per DR1658, do not consider virtual bases of destructors of abstract
// classes either.
// Per DR2180, for assignment operators we only assign (and thus only
// consider) direct bases.
if (SMI.visit(SMI.IsAssignment ? SMI.VisitDirectBases
: SMI.VisitPotentiallyConstructedBases))
return true;
if (SMI.shouldDeleteForAllConstMembers())
return true;
if (getLangOpts().CUDA) {
// We should delete the special member in CUDA mode if target inference
// failed.
return inferCUDATargetForImplicitSpecialMember(RD, CSM, MD, SMI.ConstArg,
Diagnose);
}
return false;
}
/// Perform lookup for a special member of the specified kind, and determine
/// whether it is trivial. If the triviality can be determined without the
/// lookup, skip it. This is intended for use when determining whether a
/// special member of a containing object is trivial, and thus does not ever
/// perform overload resolution for default constructors.
///
/// If \p Selected is not \c NULL, \c *Selected will be filled in with the
/// member that was most likely to be intended to be trivial, if any.
static bool findTrivialSpecialMember(Sema &S, CXXRecordDecl *RD,
Sema::CXXSpecialMember CSM, unsigned Quals,
bool ConstRHS, CXXMethodDecl **Selected) {
if (Selected)
*Selected = nullptr;
switch (CSM) {
case Sema::CXXInvalid:
llvm_unreachable("not a special member");
case Sema::CXXDefaultConstructor:
// C++11 [class.ctor]p5:
// A default constructor is trivial if:
// - all the [direct subobjects] have trivial default constructors
//
// Note, no overload resolution is performed in this case.
if (RD->hasTrivialDefaultConstructor())
return true;
if (Selected) {
// If there's a default constructor which could have been trivial, dig it
// out. Otherwise, if there's any user-provided default constructor, point
// to that as an example of why there's not a trivial one.
CXXConstructorDecl *DefCtor = nullptr;
if (RD->needsImplicitDefaultConstructor())
S.DeclareImplicitDefaultConstructor(RD);
for (auto *CI : RD->ctors()) {
if (!CI->isDefaultConstructor())
continue;
DefCtor = CI;
if (!DefCtor->isUserProvided())
break;
}
*Selected = DefCtor;
}
return false;
case Sema::CXXDestructor:
// C++11 [class.dtor]p5:
// A destructor is trivial if:
// - all the direct [subobjects] have trivial destructors
if (RD->hasTrivialDestructor())
return true;
if (Selected) {
if (RD->needsImplicitDestructor())
S.DeclareImplicitDestructor(RD);
*Selected = RD->getDestructor();
}
return false;
case Sema::CXXCopyConstructor:
// C++11 [class.copy]p12:
// A copy constructor is trivial if:
// - the constructor selected to copy each direct [subobject] is trivial
if (RD->hasTrivialCopyConstructor()) {
if (Quals == Qualifiers::Const)
// We must either select the trivial copy constructor or reach an
// ambiguity; no need to actually perform overload resolution.
return true;
} else if (!Selected) {
return false;
}
// In C++98, we are not supposed to perform overload resolution here, but we
// treat that as a language defect, as suggested on cxx-abi-dev, to treat
// cases like B as having a non-trivial copy constructor:
// struct A { template<typename T> A(T&); };
// struct B { mutable A a; };
goto NeedOverloadResolution;
case Sema::CXXCopyAssignment:
// C++11 [class.copy]p25:
// A copy assignment operator is trivial if:
// - the assignment operator selected to copy each direct [subobject] is
// trivial
if (RD->hasTrivialCopyAssignment()) {
if (Quals == Qualifiers::Const)
return true;
} else if (!Selected) {
return false;
}
// In C++98, we are not supposed to perform overload resolution here, but we
// treat that as a language defect.
goto NeedOverloadResolution;
case Sema::CXXMoveConstructor:
case Sema::CXXMoveAssignment:
NeedOverloadResolution:
Sema::SpecialMemberOverloadResult SMOR =
lookupCallFromSpecialMember(S, RD, CSM, Quals, ConstRHS);
// The standard doesn't describe how to behave if the lookup is ambiguous.
// We treat it as not making the member non-trivial, just like the standard
// mandates for the default constructor. This should rarely matter, because
// the member will also be deleted.
if (SMOR.getKind() == Sema::SpecialMemberOverloadResult::Ambiguous)
return true;
if (!SMOR.getMethod()) {
assert(SMOR.getKind() ==
Sema::SpecialMemberOverloadResult::NoMemberOrDeleted);
return false;
}
// We deliberately don't check if we found a deleted special member. We're
// not supposed to!
if (Selected)
*Selected = SMOR.getMethod();
return SMOR.getMethod()->isTrivial();
}
llvm_unreachable("unknown special method kind");
}
static CXXConstructorDecl *findUserDeclaredCtor(CXXRecordDecl *RD) {
for (auto *CI : RD->ctors())
if (!CI->isImplicit())
return CI;
// Look for constructor templates.
typedef CXXRecordDecl::specific_decl_iterator<FunctionTemplateDecl> tmpl_iter;
for (tmpl_iter TI(RD->decls_begin()), TE(RD->decls_end()); TI != TE; ++TI) {
if (CXXConstructorDecl *CD =
dyn_cast<CXXConstructorDecl>(TI->getTemplatedDecl()))
return CD;
}
return nullptr;
}
/// The kind of subobject we are checking for triviality. The values of this
/// enumeration are used in diagnostics.
enum TrivialSubobjectKind {
/// The subobject is a base class.
TSK_BaseClass,
/// The subobject is a non-static data member.
TSK_Field,
/// The object is actually the complete object.
TSK_CompleteObject
};
/// Check whether the special member selected for a given type would be trivial.
static bool checkTrivialSubobjectCall(Sema &S, SourceLocation SubobjLoc,
QualType SubType, bool ConstRHS,
Sema::CXXSpecialMember CSM,
TrivialSubobjectKind Kind,
bool Diagnose) {
CXXRecordDecl *SubRD = SubType->getAsCXXRecordDecl();
if (!SubRD)
return true;
CXXMethodDecl *Selected;
if (findTrivialSpecialMember(S, SubRD, CSM, SubType.getCVRQualifiers(),
ConstRHS, Diagnose ? &Selected : nullptr))
return true;
if (Diagnose) {
if (ConstRHS)
SubType.addConst();
if (!Selected && CSM == Sema::CXXDefaultConstructor) {
S.Diag(SubobjLoc, diag::note_nontrivial_no_def_ctor)
<< Kind << SubType.getUnqualifiedType();
if (CXXConstructorDecl *CD = findUserDeclaredCtor(SubRD))
S.Diag(CD->getLocation(), diag::note_user_declared_ctor);
} else if (!Selected)
S.Diag(SubobjLoc, diag::note_nontrivial_no_copy)
<< Kind << SubType.getUnqualifiedType() << CSM << SubType;
else if (Selected->isUserProvided()) {
if (Kind == TSK_CompleteObject)
S.Diag(Selected->getLocation(), diag::note_nontrivial_user_provided)
<< Kind << SubType.getUnqualifiedType() << CSM;
else {
S.Diag(SubobjLoc, diag::note_nontrivial_user_provided)
<< Kind << SubType.getUnqualifiedType() << CSM;
S.Diag(Selected->getLocation(), diag::note_declared_at);
}
} else {
if (Kind != TSK_CompleteObject)
S.Diag(SubobjLoc, diag::note_nontrivial_subobject)
<< Kind << SubType.getUnqualifiedType() << CSM;
// Explain why the defaulted or deleted special member isn't trivial.
S.SpecialMemberIsTrivial(Selected, CSM, Diagnose);
}
}
return false;
}
/// Check whether the members of a class type allow a special member to be
/// trivial.
static bool checkTrivialClassMembers(Sema &S, CXXRecordDecl *RD,
Sema::CXXSpecialMember CSM,
bool ConstArg, bool Diagnose) {
for (const auto *FI : RD->fields()) {
if (FI->isInvalidDecl() || FI->isUnnamedBitfield())
continue;
QualType FieldType = S.Context.getBaseElementType(FI->getType());
// Pretend anonymous struct or union members are members of this class.
if (FI->isAnonymousStructOrUnion()) {
if (!checkTrivialClassMembers(S, FieldType->getAsCXXRecordDecl(),
CSM, ConstArg, Diagnose))
return false;
continue;
}
// C++11 [class.ctor]p5:
// A default constructor is trivial if [...]
// -- no non-static data member of its class has a
// brace-or-equal-initializer
if (CSM == Sema::CXXDefaultConstructor && FI->hasInClassInitializer()) {
if (Diagnose)
S.Diag(FI->getLocation(), diag::note_nontrivial_in_class_init) << FI;
return false;
}
// Objective C ARC 4.3.5:
// [...] nontrivally ownership-qualified types are [...] not trivially
// default constructible, copy constructible, move constructible, copy
// assignable, move assignable, or destructible [...]
if (FieldType.hasNonTrivialObjCLifetime()) {
if (Diagnose)
S.Diag(FI->getLocation(), diag::note_nontrivial_objc_ownership)
<< RD << FieldType.getObjCLifetime();
return false;
}
bool ConstRHS = ConstArg && !FI->isMutable();
if (!checkTrivialSubobjectCall(S, FI->getLocation(), FieldType, ConstRHS,
CSM, TSK_Field, Diagnose))
return false;
}
return true;
}
/// Diagnose why the specified class does not have a trivial special member of
/// the given kind.
void Sema::DiagnoseNontrivial(const CXXRecordDecl *RD, CXXSpecialMember CSM) {
QualType Ty = Context.getRecordType(RD);
bool ConstArg = (CSM == CXXCopyConstructor || CSM == CXXCopyAssignment);
checkTrivialSubobjectCall(*this, RD->getLocation(), Ty, ConstArg, CSM,
TSK_CompleteObject, /*Diagnose*/true);
}
/// Determine whether a defaulted or deleted special member function is trivial,
/// as specified in C++11 [class.ctor]p5, C++11 [class.copy]p12,
/// C++11 [class.copy]p25, and C++11 [class.dtor]p5.
bool Sema::SpecialMemberIsTrivial(CXXMethodDecl *MD, CXXSpecialMember CSM,
bool Diagnose) {
assert(!MD->isUserProvided() && CSM != CXXInvalid && "not special enough");
CXXRecordDecl *RD = MD->getParent();
bool ConstArg = false;
// C++11 [class.copy]p12, p25: [DR1593]
// A [special member] is trivial if [...] its parameter-type-list is
// equivalent to the parameter-type-list of an implicit declaration [...]
switch (CSM) {
case CXXDefaultConstructor:
case CXXDestructor:
// Trivial default constructors and destructors cannot have parameters.
break;
case CXXCopyConstructor:
case CXXCopyAssignment: {
// Trivial copy operations always have const, non-volatile parameter types.
ConstArg = true;
const ParmVarDecl *Param0 = MD->getParamDecl(0);
const ReferenceType *RT = Param0->getType()->getAs<ReferenceType>();
if (!RT || RT->getPointeeType().getCVRQualifiers() != Qualifiers::Const) {
if (Diagnose)
Diag(Param0->getLocation(), diag::note_nontrivial_param_type)
<< Param0->getSourceRange() << Param0->getType()
<< Context.getLValueReferenceType(
Context.getRecordType(RD).withConst());
return false;
}
break;
}
case CXXMoveConstructor:
case CXXMoveAssignment: {
// Trivial move operations always have non-cv-qualified parameters.
const ParmVarDecl *Param0 = MD->getParamDecl(0);
const RValueReferenceType *RT =
Param0->getType()->getAs<RValueReferenceType>();
if (!RT || RT->getPointeeType().getCVRQualifiers()) {
if (Diagnose)
Diag(Param0->getLocation(), diag::note_nontrivial_param_type)
<< Param0->getSourceRange() << Param0->getType()
<< Context.getRValueReferenceType(Context.getRecordType(RD));
return false;
}
break;
}
case CXXInvalid:
llvm_unreachable("not a special member");
}
if (MD->getMinRequiredArguments() < MD->getNumParams()) {
if (Diagnose)
Diag(MD->getParamDecl(MD->getMinRequiredArguments())->getLocation(),
diag::note_nontrivial_default_arg)
<< MD->getParamDecl(MD->getMinRequiredArguments())->getSourceRange();
return false;
}
if (MD->isVariadic()) {
if (Diagnose)
Diag(MD->getLocation(), diag::note_nontrivial_variadic);
return false;
}
// C++11 [class.ctor]p5, C++11 [class.dtor]p5:
// A copy/move [constructor or assignment operator] is trivial if
// -- the [member] selected to copy/move each direct base class subobject
// is trivial
//
// C++11 [class.copy]p12, C++11 [class.copy]p25:
// A [default constructor or destructor] is trivial if
// -- all the direct base classes have trivial [default constructors or
// destructors]
for (const auto &BI : RD->bases())
if (!checkTrivialSubobjectCall(*this, BI.getLocStart(), BI.getType(),
ConstArg, CSM, TSK_BaseClass, Diagnose))
return false;
// C++11 [class.ctor]p5, C++11 [class.dtor]p5:
// A copy/move [constructor or assignment operator] for a class X is
// trivial if
// -- for each non-static data member of X that is of class type (or array
// thereof), the constructor selected to copy/move that member is
// trivial
//
// C++11 [class.copy]p12, C++11 [class.copy]p25:
// A [default constructor or destructor] is trivial if
// -- for all of the non-static data members of its class that are of class
// type (or array thereof), each such class has a trivial [default
// constructor or destructor]
if (!checkTrivialClassMembers(*this, RD, CSM, ConstArg, Diagnose))
return false;
// C++11 [class.dtor]p5:
// A destructor is trivial if [...]
// -- the destructor is not virtual
if (CSM == CXXDestructor && MD->isVirtual()) {
if (Diagnose)
Diag(MD->getLocation(), diag::note_nontrivial_virtual_dtor) << RD;
return false;
}
// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25:
// A [special member] for class X is trivial if [...]
// -- class X has no virtual functions and no virtual base classes
if (CSM != CXXDestructor && MD->getParent()->isDynamicClass()) {
if (!Diagnose)
return false;
if (RD->getNumVBases()) {
// Check for virtual bases. We already know that the corresponding
// member in all bases is trivial, so vbases must all be direct.
CXXBaseSpecifier &BS = *RD->vbases_begin();
assert(BS.isVirtual());
Diag(BS.getLocStart(), diag::note_nontrivial_has_virtual) << RD << 1;
return false;
}
// Must have a virtual method.
for (const auto *MI : RD->methods()) {
if (MI->isVirtual()) {
SourceLocation MLoc = MI->getLocStart();
Diag(MLoc, diag::note_nontrivial_has_virtual) << RD << 0;
return false;
}
}
llvm_unreachable("dynamic class with no vbases and no virtual functions");
}
// Looks like it's trivial!
return true;
}
namespace {
struct FindHiddenVirtualMethod {
Sema *S;
CXXMethodDecl *Method;
llvm::SmallPtrSet<const CXXMethodDecl *, 8> OverridenAndUsingBaseMethods;
SmallVector<CXXMethodDecl *, 8> OverloadedMethods;
private:
/// Check whether any most overriden method from MD in Methods
static bool CheckMostOverridenMethods(
const CXXMethodDecl *MD,
const llvm::SmallPtrSetImpl<const CXXMethodDecl *> &Methods) {
if (MD->size_overridden_methods() == 0)
return Methods.count(MD->getCanonicalDecl());
for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
E = MD->end_overridden_methods();
I != E; ++I)
if (CheckMostOverridenMethods(*I, Methods))
return true;
return false;
}
public:
/// Member lookup function that determines whether a given C++
/// method overloads virtual methods in a base class without overriding any,
/// to be used with CXXRecordDecl::lookupInBases().
bool operator()(const CXXBaseSpecifier *Specifier, CXXBasePath &Path) {
RecordDecl *BaseRecord =
Specifier->getType()->getAs<RecordType>()->getDecl();
DeclarationName Name = Method->getDeclName();
assert(Name.getNameKind() == DeclarationName::Identifier);
bool foundSameNameMethod = false;
SmallVector<CXXMethodDecl *, 8> overloadedMethods;
for (Path.Decls = BaseRecord->lookup(Name); !Path.Decls.empty();
Path.Decls = Path.Decls.slice(1)) {
NamedDecl *D = Path.Decls.front();
if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
MD = MD->getCanonicalDecl();
foundSameNameMethod = true;
// Interested only in hidden virtual methods.
if (!MD->isVirtual())
continue;
// If the method we are checking overrides a method from its base
// don't warn about the other overloaded methods. Clang deviates from
// GCC by only diagnosing overloads of inherited virtual functions that
// do not override any other virtual functions in the base. GCC's
// -Woverloaded-virtual diagnoses any derived function hiding a virtual
// function from a base class. These cases may be better served by a
// warning (not specific to virtual functions) on call sites when the
// call would select a different function from the base class, were it
// visible.
// See FIXME in test/SemaCXX/warn-overload-virtual.cpp for an example.
if (!S->IsOverload(Method, MD, false))
return true;
// Collect the overload only if its hidden.
if (!CheckMostOverridenMethods(MD, OverridenAndUsingBaseMethods))
overloadedMethods.push_back(MD);
}
}
if (foundSameNameMethod)
OverloadedMethods.append(overloadedMethods.begin(),
overloadedMethods.end());
return foundSameNameMethod;
}
};
} // end anonymous namespace
/// \brief Add the most overriden methods from MD to Methods
static void AddMostOverridenMethods(const CXXMethodDecl *MD,
llvm::SmallPtrSetImpl<const CXXMethodDecl *>& Methods) {
if (MD->size_overridden_methods() == 0)
Methods.insert(MD->getCanonicalDecl());
for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
E = MD->end_overridden_methods();
I != E; ++I)
AddMostOverridenMethods(*I, Methods);
}
/// \brief Check if a method overloads virtual methods in a base class without
/// overriding any.
void Sema::FindHiddenVirtualMethods(CXXMethodDecl *MD,
SmallVectorImpl<CXXMethodDecl*> &OverloadedMethods) {
if (!MD->getDeclName().isIdentifier())
return;
CXXBasePaths Paths(/*FindAmbiguities=*/true, // true to look in all bases.
/*bool RecordPaths=*/false,
/*bool DetectVirtual=*/false);
FindHiddenVirtualMethod FHVM;
FHVM.Method = MD;
FHVM.S = this;
// Keep the base methods that were overriden or introduced in the subclass
// by 'using' in a set. A base method not in this set is hidden.
CXXRecordDecl *DC = MD->getParent();
DeclContext::lookup_result R = DC->lookup(MD->getDeclName());
for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) {
NamedDecl *ND = *I;
if (UsingShadowDecl *shad = dyn_cast<UsingShadowDecl>(*I))
ND = shad->getTargetDecl();
if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ND))
AddMostOverridenMethods(MD, FHVM.OverridenAndUsingBaseMethods);
}
if (DC->lookupInBases(FHVM, Paths))
OverloadedMethods = FHVM.OverloadedMethods;
}
void Sema::NoteHiddenVirtualMethods(CXXMethodDecl *MD,
SmallVectorImpl<CXXMethodDecl*> &OverloadedMethods) {
for (unsigned i = 0, e = OverloadedMethods.size(); i != e; ++i) {
CXXMethodDecl *overloadedMD = OverloadedMethods[i];
PartialDiagnostic PD = PDiag(
diag::note_hidden_overloaded_virtual_declared_here) << overloadedMD;
HandleFunctionTypeMismatch(PD, MD->getType(), overloadedMD->getType());
Diag(overloadedMD->getLocation(), PD);
}
}
/// \brief Diagnose methods which overload virtual methods in a base class
/// without overriding any.
void Sema::DiagnoseHiddenVirtualMethods(CXXMethodDecl *MD) {
if (MD->isInvalidDecl())
return;
if (Diags.isIgnored(diag::warn_overloaded_virtual, MD->getLocation()))
return;
SmallVector<CXXMethodDecl *, 8> OverloadedMethods;
FindHiddenVirtualMethods(MD, OverloadedMethods);
if (!OverloadedMethods.empty()) {
Diag(MD->getLocation(), diag::warn_overloaded_virtual)
<< MD << (OverloadedMethods.size() > 1);
NoteHiddenVirtualMethods(MD, OverloadedMethods);
}
}
void Sema::ActOnFinishCXXMemberSpecification(Scope* S, SourceLocation RLoc,
Decl *TagDecl,
SourceLocation LBrac,
SourceLocation RBrac,
AttributeList *AttrList) {
if (!TagDecl)
return;
AdjustDeclIfTemplate(TagDecl);
for (const AttributeList* l = AttrList; l; l = l->getNext()) {
if (l->getKind() != AttributeList::AT_Visibility)
continue;
l->setInvalid();
Diag(l->getLoc(), diag::warn_attribute_after_definition_ignored) <<
l->getName();
}
ActOnFields(S, RLoc, TagDecl, llvm::makeArrayRef(
// strict aliasing violation!
reinterpret_cast<Decl**>(FieldCollector->getCurFields()),
FieldCollector->getCurNumFields()), LBrac, RBrac, AttrList);
- CheckCompletedCXXClass(
- dyn_cast_or_null<CXXRecordDecl>(TagDecl));
+ CheckCompletedCXXClass(dyn_cast_or_null<CXXRecordDecl>(TagDecl));
}
/// AddImplicitlyDeclaredMembersToClass - Adds any implicitly-declared
/// special functions, such as the default constructor, copy
/// constructor, or destructor, to the given C++ class (C++
/// [special]p1). This routine can only be executed just before the
/// definition of the class is complete.
void Sema::AddImplicitlyDeclaredMembersToClass(CXXRecordDecl *ClassDecl) {
if (ClassDecl->needsImplicitDefaultConstructor()) {
++ASTContext::NumImplicitDefaultConstructors;
if (ClassDecl->hasInheritedConstructor())
DeclareImplicitDefaultConstructor(ClassDecl);
}
if (ClassDecl->needsImplicitCopyConstructor()) {
++ASTContext::NumImplicitCopyConstructors;
// If the properties or semantics of the copy constructor couldn't be
// determined while the class was being declared, force a declaration
// of it now.
if (ClassDecl->needsOverloadResolutionForCopyConstructor() ||
ClassDecl->hasInheritedConstructor())
DeclareImplicitCopyConstructor(ClassDecl);
// For the MS ABI we need to know whether the copy ctor is deleted. A
// prerequisite for deleting the implicit copy ctor is that the class has a
// move ctor or move assignment that is either user-declared or whose
// semantics are inherited from a subobject. FIXME: We should provide a more
// direct way for CodeGen to ask whether the constructor was deleted.
else if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
(ClassDecl->hasUserDeclaredMoveConstructor() ||
ClassDecl->needsOverloadResolutionForMoveConstructor() ||
ClassDecl->hasUserDeclaredMoveAssignment() ||
ClassDecl->needsOverloadResolutionForMoveAssignment()))
DeclareImplicitCopyConstructor(ClassDecl);
}
if (getLangOpts().CPlusPlus11 && ClassDecl->needsImplicitMoveConstructor()) {
++ASTContext::NumImplicitMoveConstructors;
if (ClassDecl->needsOverloadResolutionForMoveConstructor() ||
ClassDecl->hasInheritedConstructor())
DeclareImplicitMoveConstructor(ClassDecl);
}
if (ClassDecl->needsImplicitCopyAssignment()) {
++ASTContext::NumImplicitCopyAssignmentOperators;
// If we have a dynamic class, then the copy assignment operator may be
// virtual, so we have to declare it immediately. This ensures that, e.g.,
// it shows up in the right place in the vtable and that we diagnose
// problems with the implicit exception specification.
if (ClassDecl->isDynamicClass() ||
ClassDecl->needsOverloadResolutionForCopyAssignment() ||
ClassDecl->hasInheritedAssignment())
DeclareImplicitCopyAssignment(ClassDecl);
}
if (getLangOpts().CPlusPlus11 && ClassDecl->needsImplicitMoveAssignment()) {
++ASTContext::NumImplicitMoveAssignmentOperators;
// Likewise for the move assignment operator.
if (ClassDecl->isDynamicClass() ||
ClassDecl->needsOverloadResolutionForMoveAssignment() ||
ClassDecl->hasInheritedAssignment())
DeclareImplicitMoveAssignment(ClassDecl);
}
if (ClassDecl->needsImplicitDestructor()) {
++ASTContext::NumImplicitDestructors;
// If we have a dynamic class, then the destructor may be virtual, so we
// have to declare the destructor immediately. This ensures that, e.g., it
// shows up in the right place in the vtable and that we diagnose problems
// with the implicit exception specification.
if (ClassDecl->isDynamicClass() ||
ClassDecl->needsOverloadResolutionForDestructor())
DeclareImplicitDestructor(ClassDecl);
}
}
unsigned Sema::ActOnReenterTemplateScope(Scope *S, Decl *D) {
if (!D)
return 0;
// The order of template parameters is not important here. All names
// get added to the same scope.
SmallVector<TemplateParameterList *, 4> ParameterLists;
if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
D = TD->getTemplatedDecl();
if (auto *PSD = dyn_cast<ClassTemplatePartialSpecializationDecl>(D))
ParameterLists.push_back(PSD->getTemplateParameters());
if (DeclaratorDecl *DD = dyn_cast<DeclaratorDecl>(D)) {
for (unsigned i = 0; i < DD->getNumTemplateParameterLists(); ++i)
ParameterLists.push_back(DD->getTemplateParameterList(i));
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
if (FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate())
ParameterLists.push_back(FTD->getTemplateParameters());
}
}
if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
for (unsigned i = 0; i < TD->getNumTemplateParameterLists(); ++i)
ParameterLists.push_back(TD->getTemplateParameterList(i));
if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(TD)) {
if (ClassTemplateDecl *CTD = RD->getDescribedClassTemplate())
ParameterLists.push_back(CTD->getTemplateParameters());
}
}
unsigned Count = 0;
for (TemplateParameterList *Params : ParameterLists) {
if (Params->size() > 0)
// Ignore explicit specializations; they don't contribute to the template
// depth.
++Count;
for (NamedDecl *Param : *Params) {
if (Param->getDeclName()) {
S->AddDecl(Param);
IdResolver.AddDecl(Param);
}
}
}
return Count;
}
void Sema::ActOnStartDelayedMemberDeclarations(Scope *S, Decl *RecordD) {
if (!RecordD) return;
AdjustDeclIfTemplate(RecordD);
CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordD);
PushDeclContext(S, Record);
}
void Sema::ActOnFinishDelayedMemberDeclarations(Scope *S, Decl *RecordD) {
if (!RecordD) return;
PopDeclContext();
}
/// This is used to implement the constant expression evaluation part of the
/// attribute enable_if extension. There is nothing in standard C++ which would
/// require reentering parameters.
void Sema::ActOnReenterCXXMethodParameter(Scope *S, ParmVarDecl *Param) {
if (!Param)
return;
S->AddDecl(Param);
if (Param->getDeclName())
IdResolver.AddDecl(Param);
}
/// ActOnStartDelayedCXXMethodDeclaration - We have completed
/// parsing a top-level (non-nested) C++ class, and we are now
/// parsing those parts of the given Method declaration that could
/// not be parsed earlier (C++ [class.mem]p2), such as default
/// arguments. This action should enter the scope of the given
/// Method declaration as if we had just parsed the qualified method
/// name. However, it should not bring the parameters into scope;
/// that will be performed by ActOnDelayedCXXMethodParameter.
void Sema::ActOnStartDelayedCXXMethodDeclaration(Scope *S, Decl *MethodD) {
}
/// ActOnDelayedCXXMethodParameter - We've already started a delayed
/// C++ method declaration. We're (re-)introducing the given
/// function parameter into scope for use in parsing later parts of
/// the method declaration. For example, we could see an
/// ActOnParamDefaultArgument event for this parameter.
void Sema::ActOnDelayedCXXMethodParameter(Scope *S, Decl *ParamD) {
if (!ParamD)
return;
ParmVarDecl *Param = cast<ParmVarDecl>(ParamD);
// If this parameter has an unparsed default argument, clear it out
// to make way for the parsed default argument.
if (Param->hasUnparsedDefaultArg())
Param->setDefaultArg(nullptr);
S->AddDecl(Param);
if (Param->getDeclName())
IdResolver.AddDecl(Param);
}
/// ActOnFinishDelayedCXXMethodDeclaration - We have finished
/// processing the delayed method declaration for Method. The method
/// declaration is now considered finished. There may be a separate
/// ActOnStartOfFunctionDef action later (not necessarily
/// immediately!) for this method, if it was also defined inside the
/// class body.
void Sema::ActOnFinishDelayedCXXMethodDeclaration(Scope *S, Decl *MethodD) {
if (!MethodD)
return;
AdjustDeclIfTemplate(MethodD);
FunctionDecl *Method = cast<FunctionDecl>(MethodD);
// Now that we have our default arguments, check the constructor
// again. It could produce additional diagnostics or affect whether
// the class has implicitly-declared destructors, among other
// things.
if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(Method))
CheckConstructor(Constructor);
// Check the default arguments, which we may have added.
if (!Method->isInvalidDecl())
CheckCXXDefaultArguments(Method);
}
/// CheckConstructorDeclarator - Called by ActOnDeclarator to check
/// the well-formedness of the constructor declarator @p D with type @p
/// R. If there are any errors in the declarator, this routine will
/// emit diagnostics and set the invalid bit to true. In any case, the type
/// will be updated to reflect a well-formed type for the constructor and
/// returned.
QualType Sema::CheckConstructorDeclarator(Declarator &D, QualType R,
StorageClass &SC) {
bool isVirtual = D.getDeclSpec().isVirtualSpecified();
// C++ [class.ctor]p3:
// A constructor shall not be virtual (10.3) or static (9.4). A
// constructor can be invoked for a const, volatile or const
// volatile object. A constructor shall not be declared const,
// volatile, or const volatile (9.3.2).
if (isVirtual) {
if (!D.isInvalidType())
Diag(D.getIdentifierLoc(), diag::err_constructor_cannot_be)
<< "virtual" << SourceRange(D.getDeclSpec().getVirtualSpecLoc())
<< SourceRange(D.getIdentifierLoc());
D.setInvalidType();
}
if (SC == SC_Static) {
if (!D.isInvalidType())
Diag(D.getIdentifierLoc(), diag::err_constructor_cannot_be)
<< "static" << SourceRange(D.getDeclSpec().getStorageClassSpecLoc())
<< SourceRange(D.getIdentifierLoc());
D.setInvalidType();
SC = SC_None;
}
if (unsigned TypeQuals = D.getDeclSpec().getTypeQualifiers()) {
diagnoseIgnoredQualifiers(
diag::err_constructor_return_type, TypeQuals, SourceLocation(),
D.getDeclSpec().getConstSpecLoc(), D.getDeclSpec().getVolatileSpecLoc(),
D.getDeclSpec().getRestrictSpecLoc(),
D.getDeclSpec().getAtomicSpecLoc());
D.setInvalidType();
}
DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
if (FTI.TypeQuals != 0) {
if (FTI.TypeQuals & Qualifiers::Const)
Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_constructor)
<< "const" << SourceRange(D.getIdentifierLoc());
if (FTI.TypeQuals & Qualifiers::Volatile)
Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_constructor)
<< "volatile" << SourceRange(D.getIdentifierLoc());
if (FTI.TypeQuals & Qualifiers::Restrict)
Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_constructor)
<< "restrict" << SourceRange(D.getIdentifierLoc());
D.setInvalidType();
}
// C++0x [class.ctor]p4:
// A constructor shall not be declared with a ref-qualifier.
if (FTI.hasRefQualifier()) {
Diag(FTI.getRefQualifierLoc(), diag::err_ref_qualifier_constructor)
<< FTI.RefQualifierIsLValueRef
<< FixItHint::CreateRemoval(FTI.getRefQualifierLoc());
D.setInvalidType();
}
// Rebuild the function type "R" without any type qualifiers (in
// case any of the errors above fired) and with "void" as the
// return type, since constructors don't have return types.
const FunctionProtoType *Proto = R->getAs<FunctionProtoType>();
if (Proto->getReturnType() == Context.VoidTy && !D.isInvalidType())
return R;
FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo();
EPI.TypeQuals = 0;
EPI.RefQualifier = RQ_None;
return Context.getFunctionType(Context.VoidTy, Proto->getParamTypes(), EPI);
}
/// CheckConstructor - Checks a fully-formed constructor for
/// well-formedness, issuing any diagnostics required. Returns true if
/// the constructor declarator is invalid.
void Sema::CheckConstructor(CXXConstructorDecl *Constructor) {
CXXRecordDecl *ClassDecl
= dyn_cast<CXXRecordDecl>(Constructor->getDeclContext());
if (!ClassDecl)
return Constructor->setInvalidDecl();
// C++ [class.copy]p3:
// A declaration of a constructor for a class X is ill-formed if
// its first parameter is of type (optionally cv-qualified) X and
// either there are no other parameters or else all other
// parameters have default arguments.
if (!Constructor->isInvalidDecl() &&
((Constructor->getNumParams() == 1) ||
(Constructor->getNumParams() > 1 &&
Constructor->getParamDecl(1)->hasDefaultArg())) &&
Constructor->getTemplateSpecializationKind()
!= TSK_ImplicitInstantiation) {
QualType ParamType = Constructor->getParamDecl(0)->getType();
QualType ClassTy = Context.getTagDeclType(ClassDecl);
if (Context.getCanonicalType(ParamType).getUnqualifiedType() == ClassTy) {
SourceLocation ParamLoc = Constructor->getParamDecl(0)->getLocation();
const char *ConstRef
= Constructor->getParamDecl(0)->getIdentifier() ? "const &"
: " const &";
Diag(ParamLoc, diag::err_constructor_byvalue_arg)
<< FixItHint::CreateInsertion(ParamLoc, ConstRef);
// FIXME: Rather that making the constructor invalid, we should endeavor
// to fix the type.
Constructor->setInvalidDecl();
}
}
}
/// CheckDestructor - Checks a fully-formed destructor definition for
/// well-formedness, issuing any diagnostics required. Returns true
/// on error.
bool Sema::CheckDestructor(CXXDestructorDecl *Destructor) {
CXXRecordDecl *RD = Destructor->getParent();
if (!Destructor->getOperatorDelete() && Destructor->isVirtual()) {
SourceLocation Loc;
if (!Destructor->isImplicit())
Loc = Destructor->getLocation();
else
Loc = RD->getLocation();
// If we have a virtual destructor, look up the deallocation function
if (FunctionDecl *OperatorDelete =
FindDeallocationFunctionForDestructor(Loc, RD)) {
MarkFunctionReferenced(Loc, OperatorDelete);
Destructor->setOperatorDelete(OperatorDelete);
}
}
return false;
}
/// CheckDestructorDeclarator - Called by ActOnDeclarator to check
/// the well-formednes of the destructor declarator @p D with type @p
/// R. If there are any errors in the declarator, this routine will
/// emit diagnostics and set the declarator to invalid. Even if this happens,
/// will be updated to reflect a well-formed type for the destructor and
/// returned.
QualType Sema::CheckDestructorDeclarator(Declarator &D, QualType R,
StorageClass& SC) {
// C++ [class.dtor]p1:
// [...] A typedef-name that names a class is a class-name
// (7.1.3); however, a typedef-name that names a class shall not
// be used as the identifier in the declarator for a destructor
// declaration.
QualType DeclaratorType = GetTypeFromParser(D.getName().DestructorName);
if (const TypedefType *TT = DeclaratorType->getAs<TypedefType>())
Diag(D.getIdentifierLoc(), diag::err_destructor_typedef_name)
<< DeclaratorType << isa<TypeAliasDecl>(TT->getDecl());
else if (const TemplateSpecializationType *TST =
DeclaratorType->getAs<TemplateSpecializationType>())
if (TST->isTypeAlias())
Diag(D.getIdentifierLoc(), diag::err_destructor_typedef_name)
<< DeclaratorType << 1;
// C++ [class.dtor]p2:
// A destructor is used to destroy objects of its class type. A
// destructor takes no parameters, and no return type can be
// specified for it (not even void). The address of a destructor
// shall not be taken. A destructor shall not be static. A
// destructor can be invoked for a const, volatile or const
// volatile object. A destructor shall not be declared const,
// volatile or const volatile (9.3.2).
if (SC == SC_Static) {
if (!D.isInvalidType())
Diag(D.getIdentifierLoc(), diag::err_destructor_cannot_be)
<< "static" << SourceRange(D.getDeclSpec().getStorageClassSpecLoc())
<< SourceRange(D.getIdentifierLoc())
<< FixItHint::CreateRemoval(D.getDeclSpec().getStorageClassSpecLoc());
SC = SC_None;
}
if (!D.isInvalidType()) {
// Destructors don't have return types, but the parser will
// happily parse something like:
//
// class X {
// float ~X();
// };
//
// The return type will be eliminated later.
if (D.getDeclSpec().hasTypeSpecifier())
Diag(D.getIdentifierLoc(), diag::err_destructor_return_type)
<< SourceRange(D.getDeclSpec().getTypeSpecTypeLoc())
<< SourceRange(D.getIdentifierLoc());
else if (unsigned TypeQuals = D.getDeclSpec().getTypeQualifiers()) {
diagnoseIgnoredQualifiers(diag::err_destructor_return_type, TypeQuals,
SourceLocation(),
D.getDeclSpec().getConstSpecLoc(),
D.getDeclSpec().getVolatileSpecLoc(),
D.getDeclSpec().getRestrictSpecLoc(),
D.getDeclSpec().getAtomicSpecLoc());
D.setInvalidType();
}
}
DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
if (FTI.TypeQuals != 0 && !D.isInvalidType()) {
if (FTI.TypeQuals & Qualifiers::Const)
Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_destructor)
<< "const" << SourceRange(D.getIdentifierLoc());
if (FTI.TypeQuals & Qualifiers::Volatile)
Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_destructor)
<< "volatile" << SourceRange(D.getIdentifierLoc());
if (FTI.TypeQuals & Qualifiers::Restrict)
Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_destructor)
<< "restrict" << SourceRange(D.getIdentifierLoc());
D.setInvalidType();
}
// C++0x [class.dtor]p2:
// A destructor shall not be declared with a ref-qualifier.
if (FTI.hasRefQualifier()) {
Diag(FTI.getRefQualifierLoc(), diag::err_ref_qualifier_destructor)
<< FTI.RefQualifierIsLValueRef
<< FixItHint::CreateRemoval(FTI.getRefQualifierLoc());
D.setInvalidType();
}
// Make sure we don't have any parameters.
if (FTIHasNonVoidParameters(FTI)) {
Diag(D.getIdentifierLoc(), diag::err_destructor_with_params);
// Delete the parameters.
FTI.freeParams();
D.setInvalidType();
}
// Make sure the destructor isn't variadic.
if (FTI.isVariadic) {
Diag(D.getIdentifierLoc(), diag::err_destructor_variadic);
D.setInvalidType();
}
// Rebuild the function type "R" without any type qualifiers or
// parameters (in case any of the errors above fired) and with
// "void" as the return type, since destructors don't have return
// types.
if (!D.isInvalidType())
return R;
const FunctionProtoType *Proto = R->getAs<FunctionProtoType>();
FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo();
EPI.Variadic = false;
EPI.TypeQuals = 0;
EPI.RefQualifier = RQ_None;
return Context.getFunctionType(Context.VoidTy, None, EPI);
}
static void extendLeft(SourceRange &R, SourceRange Before) {
if (Before.isInvalid())
return;
R.setBegin(Before.getBegin());
if (R.getEnd().isInvalid())
R.setEnd(Before.getEnd());
}
static void extendRight(SourceRange &R, SourceRange After) {
if (After.isInvalid())
return;
if (R.getBegin().isInvalid())
R.setBegin(After.getBegin());
R.setEnd(After.getEnd());
}
/// CheckConversionDeclarator - Called by ActOnDeclarator to check the
/// well-formednes of the conversion function declarator @p D with
/// type @p R. If there are any errors in the declarator, this routine
/// will emit diagnostics and return true. Otherwise, it will return
/// false. Either way, the type @p R will be updated to reflect a
/// well-formed type for the conversion operator.
void Sema::CheckConversionDeclarator(Declarator &D, QualType &R,
StorageClass& SC) {
// C++ [class.conv.fct]p1:
// Neither parameter types nor return type can be specified. The
// type of a conversion function (8.3.5) is "function taking no
// parameter returning conversion-type-id."
if (SC == SC_Static) {
if (!D.isInvalidType())
Diag(D.getIdentifierLoc(), diag::err_conv_function_not_member)
<< SourceRange(D.getDeclSpec().getStorageClassSpecLoc())
<< D.getName().getSourceRange();
D.setInvalidType();
SC = SC_None;
}
TypeSourceInfo *ConvTSI = nullptr;
QualType ConvType =
GetTypeFromParser(D.getName().ConversionFunctionId, &ConvTSI);
if (D.getDeclSpec().hasTypeSpecifier() && !D.isInvalidType()) {
// Conversion functions don't have return types, but the parser will
// happily parse something like:
//
// class X {
// float operator bool();
// };
//
// The return type will be changed later anyway.
Diag(D.getIdentifierLoc(), diag::err_conv_function_return_type)
<< SourceRange(D.getDeclSpec().getTypeSpecTypeLoc())
<< SourceRange(D.getIdentifierLoc());
D.setInvalidType();
}
const FunctionProtoType *Proto = R->getAs<FunctionProtoType>();
// Make sure we don't have any parameters.
if (Proto->getNumParams() > 0) {
Diag(D.getIdentifierLoc(), diag::err_conv_function_with_params);
// Delete the parameters.
D.getFunctionTypeInfo().freeParams();
D.setInvalidType();
} else if (Proto->isVariadic()) {
Diag(D.getIdentifierLoc(), diag::err_conv_function_variadic);
D.setInvalidType();
}
// Diagnose "&operator bool()" and other such nonsense. This
// is actually a gcc extension which we don't support.
if (Proto->getReturnType() != ConvType) {
bool NeedsTypedef = false;
SourceRange Before, After;
// Walk the chunks and extract information on them for our diagnostic.
bool PastFunctionChunk = false;
for (auto &Chunk : D.type_objects()) {
switch (Chunk.Kind) {
case DeclaratorChunk::Function:
if (!PastFunctionChunk) {
if (Chunk.Fun.HasTrailingReturnType) {
TypeSourceInfo *TRT = nullptr;
GetTypeFromParser(Chunk.Fun.getTrailingReturnType(), &TRT);
if (TRT) extendRight(After, TRT->getTypeLoc().getSourceRange());
}
PastFunctionChunk = true;
break;
}
// Fall through.
case DeclaratorChunk::Array:
NeedsTypedef = true;
extendRight(After, Chunk.getSourceRange());
break;
case DeclaratorChunk::Pointer:
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::Reference:
case DeclaratorChunk::MemberPointer:
case DeclaratorChunk::Pipe:
extendLeft(Before, Chunk.getSourceRange());
break;
case DeclaratorChunk::Paren:
extendLeft(Before, Chunk.Loc);
extendRight(After, Chunk.EndLoc);
break;
}
}
SourceLocation Loc = Before.isValid() ? Before.getBegin() :
After.isValid() ? After.getBegin() :
D.getIdentifierLoc();
auto &&DB = Diag(Loc, diag::err_conv_function_with_complex_decl);
DB << Before << After;
if (!NeedsTypedef) {
DB << /*don't need a typedef*/0;
// If we can provide a correct fix-it hint, do so.
if (After.isInvalid() && ConvTSI) {
SourceLocation InsertLoc =
getLocForEndOfToken(ConvTSI->getTypeLoc().getLocEnd());
DB << FixItHint::CreateInsertion(InsertLoc, " ")
<< FixItHint::CreateInsertionFromRange(
InsertLoc, CharSourceRange::getTokenRange(Before))
<< FixItHint::CreateRemoval(Before);
}
} else if (!Proto->getReturnType()->isDependentType()) {
DB << /*typedef*/1 << Proto->getReturnType();
} else if (getLangOpts().CPlusPlus11) {
DB << /*alias template*/2 << Proto->getReturnType();
} else {
DB << /*might not be fixable*/3;
}
// Recover by incorporating the other type chunks into the result type.
// Note, this does *not* change the name of the function. This is compatible
// with the GCC extension:
// struct S { &operator int(); } s;
// int &r = s.operator int(); // ok in GCC
// S::operator int&() {} // error in GCC, function name is 'operator int'.
ConvType = Proto->getReturnType();
}
// C++ [class.conv.fct]p4:
// The conversion-type-id shall not represent a function type nor
// an array type.
if (ConvType->isArrayType()) {
Diag(D.getIdentifierLoc(), diag::err_conv_function_to_array);
ConvType = Context.getPointerType(ConvType);
D.setInvalidType();
} else if (ConvType->isFunctionType()) {
Diag(D.getIdentifierLoc(), diag::err_conv_function_to_function);
ConvType = Context.getPointerType(ConvType);
D.setInvalidType();
}
// Rebuild the function type "R" without any parameters (in case any
// of the errors above fired) and with the conversion type as the
// return type.
if (D.isInvalidType())
R = Context.getFunctionType(ConvType, None, Proto->getExtProtoInfo());
// C++0x explicit conversion operators.
if (D.getDeclSpec().isExplicitSpecified())
Diag(D.getDeclSpec().getExplicitSpecLoc(),
getLangOpts().CPlusPlus11 ?
diag::warn_cxx98_compat_explicit_conversion_functions :
diag::ext_explicit_conversion_functions)
<< SourceRange(D.getDeclSpec().getExplicitSpecLoc());
}
/// ActOnConversionDeclarator - Called by ActOnDeclarator to complete
/// the declaration of the given C++ conversion function. This routine
/// is responsible for recording the conversion function in the C++
/// class, if possible.
Decl *Sema::ActOnConversionDeclarator(CXXConversionDecl *Conversion) {
assert(Conversion && "Expected to receive a conversion function declaration");
CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(Conversion->getDeclContext());
// Make sure we aren't redeclaring the conversion function.
QualType ConvType = Context.getCanonicalType(Conversion->getConversionType());
// C++ [class.conv.fct]p1:
// [...] A conversion function is never used to convert a
// (possibly cv-qualified) object to the (possibly cv-qualified)
// same object type (or a reference to it), to a (possibly
// cv-qualified) base class of that type (or a reference to it),
// or to (possibly cv-qualified) void.
// FIXME: Suppress this warning if the conversion function ends up being a
// virtual function that overrides a virtual function in a base class.
QualType ClassType
= Context.getCanonicalType(Context.getTypeDeclType(ClassDecl));
if (const ReferenceType *ConvTypeRef = ConvType->getAs<ReferenceType>())
ConvType = ConvTypeRef->getPointeeType();
if (Conversion->getTemplateSpecializationKind() != TSK_Undeclared &&
Conversion->getTemplateSpecializationKind() != TSK_ExplicitSpecialization)
/* Suppress diagnostics for instantiations. */;
else if (ConvType->isRecordType()) {
ConvType = Context.getCanonicalType(ConvType).getUnqualifiedType();
if (ConvType == ClassType)
Diag(Conversion->getLocation(), diag::warn_conv_to_self_not_used)
<< ClassType;
else if (IsDerivedFrom(Conversion->getLocation(), ClassType, ConvType))
Diag(Conversion->getLocation(), diag::warn_conv_to_base_not_used)
<< ClassType << ConvType;
} else if (ConvType->isVoidType()) {
Diag(Conversion->getLocation(), diag::warn_conv_to_void_not_used)
<< ClassType << ConvType;
}
if (FunctionTemplateDecl *ConversionTemplate
= Conversion->getDescribedFunctionTemplate())
return ConversionTemplate;
return Conversion;
}
namespace {
/// Utility class to accumulate and print a diagnostic listing the invalid
/// specifier(s) on a declaration.
struct BadSpecifierDiagnoser {
BadSpecifierDiagnoser(Sema &S, SourceLocation Loc, unsigned DiagID)
: S(S), Diagnostic(S.Diag(Loc, DiagID)) {}
~BadSpecifierDiagnoser() {
Diagnostic << Specifiers;
}
template<typename T> void check(SourceLocation SpecLoc, T Spec) {
return check(SpecLoc, DeclSpec::getSpecifierName(Spec));
}
void check(SourceLocation SpecLoc, DeclSpec::TST Spec) {
return check(SpecLoc,
DeclSpec::getSpecifierName(Spec, S.getPrintingPolicy()));
}
void check(SourceLocation SpecLoc, const char *Spec) {
if (SpecLoc.isInvalid()) return;
Diagnostic << SourceRange(SpecLoc, SpecLoc);
if (!Specifiers.empty()) Specifiers += " ";
Specifiers += Spec;
}
Sema &S;
Sema::SemaDiagnosticBuilder Diagnostic;
std::string Specifiers;
};
}
/// Check the validity of a declarator that we parsed for a deduction-guide.
/// These aren't actually declarators in the grammar, so we need to check that
/// the user didn't specify any pieces that are not part of the deduction-guide
/// grammar.
void Sema::CheckDeductionGuideDeclarator(Declarator &D, QualType &R,
StorageClass &SC) {
TemplateName GuidedTemplate = D.getName().TemplateName.get().get();
TemplateDecl *GuidedTemplateDecl = GuidedTemplate.getAsTemplateDecl();
assert(GuidedTemplateDecl && "missing template decl for deduction guide");
// C++ [temp.deduct.guide]p3:
// A deduction-gide shall be declared in the same scope as the
// corresponding class template.
if (!CurContext->getRedeclContext()->Equals(
GuidedTemplateDecl->getDeclContext()->getRedeclContext())) {
Diag(D.getIdentifierLoc(), diag::err_deduction_guide_wrong_scope)
<< GuidedTemplateDecl;
Diag(GuidedTemplateDecl->getLocation(), diag::note_template_decl_here);
}
auto &DS = D.getMutableDeclSpec();
// We leave 'friend' and 'virtual' to be rejected in the normal way.
if (DS.hasTypeSpecifier() || DS.getTypeQualifiers() ||
DS.getStorageClassSpecLoc().isValid() || DS.isInlineSpecified() ||
DS.isNoreturnSpecified() || DS.isConstexprSpecified() ||
DS.isConceptSpecified()) {
BadSpecifierDiagnoser Diagnoser(
*this, D.getIdentifierLoc(),
diag::err_deduction_guide_invalid_specifier);
Diagnoser.check(DS.getStorageClassSpecLoc(), DS.getStorageClassSpec());
DS.ClearStorageClassSpecs();
SC = SC_None;
// 'explicit' is permitted.
Diagnoser.check(DS.getInlineSpecLoc(), "inline");
Diagnoser.check(DS.getNoreturnSpecLoc(), "_Noreturn");
Diagnoser.check(DS.getConstexprSpecLoc(), "constexpr");
Diagnoser.check(DS.getConceptSpecLoc(), "concept");
DS.ClearConstexprSpec();
DS.ClearConceptSpec();
Diagnoser.check(DS.getConstSpecLoc(), "const");
Diagnoser.check(DS.getRestrictSpecLoc(), "__restrict");
Diagnoser.check(DS.getVolatileSpecLoc(), "volatile");
Diagnoser.check(DS.getAtomicSpecLoc(), "_Atomic");
Diagnoser.check(DS.getUnalignedSpecLoc(), "__unaligned");
DS.ClearTypeQualifiers();
Diagnoser.check(DS.getTypeSpecComplexLoc(), DS.getTypeSpecComplex());
Diagnoser.check(DS.getTypeSpecSignLoc(), DS.getTypeSpecSign());
Diagnoser.check(DS.getTypeSpecWidthLoc(), DS.getTypeSpecWidth());
Diagnoser.check(DS.getTypeSpecTypeLoc(), DS.getTypeSpecType());
DS.ClearTypeSpecType();
}
if (D.isInvalidType())
return;
// Check the declarator is simple enough.
bool FoundFunction = false;
for (const DeclaratorChunk &Chunk : llvm::reverse(D.type_objects())) {
if (Chunk.Kind == DeclaratorChunk::Paren)
continue;
if (Chunk.Kind != DeclaratorChunk::Function || FoundFunction) {
Diag(D.getDeclSpec().getLocStart(),
diag::err_deduction_guide_with_complex_decl)
<< D.getSourceRange();
break;
}
if (!Chunk.Fun.hasTrailingReturnType()) {
Diag(D.getName().getLocStart(),
diag::err_deduction_guide_no_trailing_return_type);
break;
}
// Check that the return type is written as a specialization of
// the template specified as the deduction-guide's name.
ParsedType TrailingReturnType = Chunk.Fun.getTrailingReturnType();
TypeSourceInfo *TSI = nullptr;
QualType RetTy = GetTypeFromParser(TrailingReturnType, &TSI);
assert(TSI && "deduction guide has valid type but invalid return type?");
bool AcceptableReturnType = false;
bool MightInstantiateToSpecialization = false;
if (auto RetTST =
TSI->getTypeLoc().getAs<TemplateSpecializationTypeLoc>()) {
TemplateName SpecifiedName = RetTST.getTypePtr()->getTemplateName();
bool TemplateMatches =
Context.hasSameTemplateName(SpecifiedName, GuidedTemplate);
if (SpecifiedName.getKind() == TemplateName::Template && TemplateMatches)
AcceptableReturnType = true;
else {
// This could still instantiate to the right type, unless we know it
// names the wrong class template.
auto *TD = SpecifiedName.getAsTemplateDecl();
MightInstantiateToSpecialization = !(TD && isa<ClassTemplateDecl>(TD) &&
!TemplateMatches);
}
} else if (!RetTy.hasQualifiers() && RetTy->isDependentType()) {
MightInstantiateToSpecialization = true;
}
if (!AcceptableReturnType) {
Diag(TSI->getTypeLoc().getLocStart(),
diag::err_deduction_guide_bad_trailing_return_type)
<< GuidedTemplate << TSI->getType() << MightInstantiateToSpecialization
<< TSI->getTypeLoc().getSourceRange();
}
// Keep going to check that we don't have any inner declarator pieces (we
// could still have a function returning a pointer to a function).
FoundFunction = true;
}
if (D.isFunctionDefinition())
Diag(D.getIdentifierLoc(), diag::err_deduction_guide_defines_function);
}
//===----------------------------------------------------------------------===//
// Namespace Handling
//===----------------------------------------------------------------------===//
/// \brief Diagnose a mismatch in 'inline' qualifiers when a namespace is
/// reopened.
static void DiagnoseNamespaceInlineMismatch(Sema &S, SourceLocation KeywordLoc,
SourceLocation Loc,
IdentifierInfo *II, bool *IsInline,
NamespaceDecl *PrevNS) {
assert(*IsInline != PrevNS->isInline());
// HACK: Work around a bug in libstdc++4.6's <atomic>, where
// std::__atomic[0,1,2] are defined as non-inline namespaces, then reopened as
// inline namespaces, with the intention of bringing names into namespace std.
//
// We support this just well enough to get that case working; this is not
// sufficient to support reopening namespaces as inline in general.
if (*IsInline && II && II->getName().startswith("__atomic") &&
S.getSourceManager().isInSystemHeader(Loc)) {
// Mark all prior declarations of the namespace as inline.
for (NamespaceDecl *NS = PrevNS->getMostRecentDecl(); NS;
NS = NS->getPreviousDecl())
NS->setInline(*IsInline);
// Patch up the lookup table for the containing namespace. This isn't really
// correct, but it's good enough for this particular case.
for (auto *I : PrevNS->decls())
if (auto *ND = dyn_cast<NamedDecl>(I))
PrevNS->getParent()->makeDeclVisibleInContext(ND);
return;
}
if (PrevNS->isInline())
// The user probably just forgot the 'inline', so suggest that it
// be added back.
S.Diag(Loc, diag::warn_inline_namespace_reopened_noninline)
<< FixItHint::CreateInsertion(KeywordLoc, "inline ");
else
S.Diag(Loc, diag::err_inline_namespace_mismatch);
S.Diag(PrevNS->getLocation(), diag::note_previous_definition);
*IsInline = PrevNS->isInline();
}
/// ActOnStartNamespaceDef - This is called at the start of a namespace
/// definition.
Decl *Sema::ActOnStartNamespaceDef(Scope *NamespcScope,
SourceLocation InlineLoc,
SourceLocation NamespaceLoc,
SourceLocation IdentLoc,
IdentifierInfo *II,
SourceLocation LBrace,
AttributeList *AttrList,
UsingDirectiveDecl *&UD) {
SourceLocation StartLoc = InlineLoc.isValid() ? InlineLoc : NamespaceLoc;
// For anonymous namespace, take the location of the left brace.
SourceLocation Loc = II ? IdentLoc : LBrace;
bool IsInline = InlineLoc.isValid();
bool IsInvalid = false;
bool IsStd = false;
bool AddToKnown = false;
Scope *DeclRegionScope = NamespcScope->getParent();
NamespaceDecl *PrevNS = nullptr;
if (II) {
// C++ [namespace.def]p2:
// The identifier in an original-namespace-definition shall not
// have been previously defined in the declarative region in
// which the original-namespace-definition appears. The
// identifier in an original-namespace-definition is the name of
// the namespace. Subsequently in that declarative region, it is
// treated as an original-namespace-name.
//
// Since namespace names are unique in their scope, and we don't
// look through using directives, just look for any ordinary names
// as if by qualified name lookup.
LookupResult R(*this, II, IdentLoc, LookupOrdinaryName, ForRedeclaration);
LookupQualifiedName(R, CurContext->getRedeclContext());
NamedDecl *PrevDecl =
R.isSingleResult() ? R.getRepresentativeDecl() : nullptr;
PrevNS = dyn_cast_or_null<NamespaceDecl>(PrevDecl);
if (PrevNS) {
// This is an extended namespace definition.
if (IsInline != PrevNS->isInline())
DiagnoseNamespaceInlineMismatch(*this, NamespaceLoc, Loc, II,
&IsInline, PrevNS);
} else if (PrevDecl) {
// This is an invalid name redefinition.
Diag(Loc, diag::err_redefinition_different_kind)
<< II;
Diag(PrevDecl->getLocation(), diag::note_previous_definition);
IsInvalid = true;
// Continue on to push Namespc as current DeclContext and return it.
} else if (II->isStr("std") &&
CurContext->getRedeclContext()->isTranslationUnit()) {
// This is the first "real" definition of the namespace "std", so update
// our cache of the "std" namespace to point at this definition.
PrevNS = getStdNamespace();
IsStd = true;
AddToKnown = !IsInline;
} else {
// We've seen this namespace for the first time.
AddToKnown = !IsInline;
}
} else {
// Anonymous namespaces.
// Determine whether the parent already has an anonymous namespace.
DeclContext *Parent = CurContext->getRedeclContext();
if (TranslationUnitDecl *TU = dyn_cast<TranslationUnitDecl>(Parent)) {
PrevNS = TU->getAnonymousNamespace();
} else {
NamespaceDecl *ND = cast<NamespaceDecl>(Parent);
PrevNS = ND->getAnonymousNamespace();
}
if (PrevNS && IsInline != PrevNS->isInline())
DiagnoseNamespaceInlineMismatch(*this, NamespaceLoc, NamespaceLoc, II,
&IsInline, PrevNS);
}
NamespaceDecl *Namespc = NamespaceDecl::Create(Context, CurContext, IsInline,
StartLoc, Loc, II, PrevNS);
if (IsInvalid)
Namespc->setInvalidDecl();
ProcessDeclAttributeList(DeclRegionScope, Namespc, AttrList);
AddPragmaAttributes(DeclRegionScope, Namespc);
// FIXME: Should we be merging attributes?
if (const VisibilityAttr *Attr = Namespc->getAttr<VisibilityAttr>())
PushNamespaceVisibilityAttr(Attr, Loc);
if (IsStd)
StdNamespace = Namespc;
if (AddToKnown)
KnownNamespaces[Namespc] = false;
if (II) {
PushOnScopeChains(Namespc, DeclRegionScope);
} else {
// Link the anonymous namespace into its parent.
DeclContext *Parent = CurContext->getRedeclContext();
if (TranslationUnitDecl *TU = dyn_cast<TranslationUnitDecl>(Parent)) {
TU->setAnonymousNamespace(Namespc);
} else {
cast<NamespaceDecl>(Parent)->setAnonymousNamespace(Namespc);
}
CurContext->addDecl(Namespc);
// C++ [namespace.unnamed]p1. An unnamed-namespace-definition
// behaves as if it were replaced by
// namespace unique { /* empty body */ }
// using namespace unique;
// namespace unique { namespace-body }
// where all occurrences of 'unique' in a translation unit are
// replaced by the same identifier and this identifier differs
// from all other identifiers in the entire program.
// We just create the namespace with an empty name and then add an
// implicit using declaration, just like the standard suggests.
//
// CodeGen enforces the "universally unique" aspect by giving all
// declarations semantically contained within an anonymous
// namespace internal linkage.
if (!PrevNS) {
UD = UsingDirectiveDecl::Create(Context, Parent,
/* 'using' */ LBrace,
/* 'namespace' */ SourceLocation(),
/* qualifier */ NestedNameSpecifierLoc(),
/* identifier */ SourceLocation(),
Namespc,
/* Ancestor */ Parent);
UD->setImplicit();
Parent->addDecl(UD);
}
}
ActOnDocumentableDecl(Namespc);
// Although we could have an invalid decl (i.e. the namespace name is a
// redefinition), push it as current DeclContext and try to continue parsing.
// FIXME: We should be able to push Namespc here, so that the each DeclContext
// for the namespace has the declarations that showed up in that particular
// namespace definition.
PushDeclContext(NamespcScope, Namespc);
return Namespc;
}
/// getNamespaceDecl - Returns the namespace a decl represents. If the decl
/// is a namespace alias, returns the namespace it points to.
static inline NamespaceDecl *getNamespaceDecl(NamedDecl *D) {
if (NamespaceAliasDecl *AD = dyn_cast_or_null<NamespaceAliasDecl>(D))
return AD->getNamespace();
return dyn_cast_or_null<NamespaceDecl>(D);
}
/// ActOnFinishNamespaceDef - This callback is called after a namespace is
/// exited. Decl is the DeclTy returned by ActOnStartNamespaceDef.
void Sema::ActOnFinishNamespaceDef(Decl *Dcl, SourceLocation RBrace) {
NamespaceDecl *Namespc = dyn_cast_or_null<NamespaceDecl>(Dcl);
assert(Namespc && "Invalid parameter, expected NamespaceDecl");
Namespc->setRBraceLoc(RBrace);
PopDeclContext();
if (Namespc->hasAttr<VisibilityAttr>())
PopPragmaVisibility(true, RBrace);
}
CXXRecordDecl *Sema::getStdBadAlloc() const {
return cast_or_null<CXXRecordDecl>(
StdBadAlloc.get(Context.getExternalSource()));
}
EnumDecl *Sema::getStdAlignValT() const {
return cast_or_null<EnumDecl>(StdAlignValT.get(Context.getExternalSource()));
}
NamespaceDecl *Sema::getStdNamespace() const {
return cast_or_null<NamespaceDecl>(
StdNamespace.get(Context.getExternalSource()));
}
NamespaceDecl *Sema::lookupStdExperimentalNamespace() {
if (!StdExperimentalNamespaceCache) {
if (auto Std = getStdNamespace()) {
LookupResult Result(*this, &PP.getIdentifierTable().get("experimental"),
SourceLocation(), LookupNamespaceName);
if (!LookupQualifiedName(Result, Std) ||
!(StdExperimentalNamespaceCache =
Result.getAsSingle<NamespaceDecl>()))
Result.suppressDiagnostics();
}
}
return StdExperimentalNamespaceCache;
}
/// \brief Retrieve the special "std" namespace, which may require us to
/// implicitly define the namespace.
NamespaceDecl *Sema::getOrCreateStdNamespace() {
if (!StdNamespace) {
// The "std" namespace has not yet been defined, so build one implicitly.
StdNamespace = NamespaceDecl::Create(Context,
Context.getTranslationUnitDecl(),
/*Inline=*/false,
SourceLocation(), SourceLocation(),
&PP.getIdentifierTable().get("std"),
/*PrevDecl=*/nullptr);
getStdNamespace()->setImplicit(true);
}
return getStdNamespace();
}
bool Sema::isStdInitializerList(QualType Ty, QualType *Element) {
assert(getLangOpts().CPlusPlus &&
"Looking for std::initializer_list outside of C++.");
// We're looking for implicit instantiations of
// template <typename E> class std::initializer_list.
if (!StdNamespace) // If we haven't seen namespace std yet, this can't be it.
return false;
ClassTemplateDecl *Template = nullptr;
const TemplateArgument *Arguments = nullptr;
if (const RecordType *RT = Ty->getAs<RecordType>()) {
ClassTemplateSpecializationDecl *Specialization =
dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl());
if (!Specialization)
return false;
Template = Specialization->getSpecializedTemplate();
Arguments = Specialization->getTemplateArgs().data();
} else if (const TemplateSpecializationType *TST =
Ty->getAs<TemplateSpecializationType>()) {
Template = dyn_cast_or_null<ClassTemplateDecl>(
TST->getTemplateName().getAsTemplateDecl());
Arguments = TST->getArgs();
}
if (!Template)
return false;
if (!StdInitializerList) {
// Haven't recognized std::initializer_list yet, maybe this is it.
CXXRecordDecl *TemplateClass = Template->getTemplatedDecl();
if (TemplateClass->getIdentifier() !=
&PP.getIdentifierTable().get("initializer_list") ||
!getStdNamespace()->InEnclosingNamespaceSetOf(
TemplateClass->getDeclContext()))
return false;
// This is a template called std::initializer_list, but is it the right
// template?
TemplateParameterList *Params = Template->getTemplateParameters();
if (Params->getMinRequiredArguments() != 1)
return false;
if (!isa<TemplateTypeParmDecl>(Params->getParam(0)))
return false;
// It's the right template.
StdInitializerList = Template;
}
if (Template->getCanonicalDecl() != StdInitializerList->getCanonicalDecl())
return false;
// This is an instance of std::initializer_list. Find the argument type.
if (Element)
*Element = Arguments[0].getAsType();
return true;
}
static ClassTemplateDecl *LookupStdInitializerList(Sema &S, SourceLocation Loc){
NamespaceDecl *Std = S.getStdNamespace();
if (!Std) {
S.Diag(Loc, diag::err_implied_std_initializer_list_not_found);
return nullptr;
}
LookupResult Result(S, &S.PP.getIdentifierTable().get("initializer_list"),
Loc, Sema::LookupOrdinaryName);
if (!S.LookupQualifiedName(Result, Std)) {
S.Diag(Loc, diag::err_implied_std_initializer_list_not_found);
return nullptr;
}
ClassTemplateDecl *Template = Result.getAsSingle<ClassTemplateDecl>();
if (!Template) {
Result.suppressDiagnostics();
// We found something weird. Complain about the first thing we found.
NamedDecl *Found = *Result.begin();
S.Diag(Found->getLocation(), diag::err_malformed_std_initializer_list);
return nullptr;
}
// We found some template called std::initializer_list. Now verify that it's
// correct.
TemplateParameterList *Params = Template->getTemplateParameters();
if (Params->getMinRequiredArguments() != 1 ||
!isa<TemplateTypeParmDecl>(Params->getParam(0))) {
S.Diag(Template->getLocation(), diag::err_malformed_std_initializer_list);
return nullptr;
}
return Template;
}
QualType Sema::BuildStdInitializerList(QualType Element, SourceLocation Loc) {
if (!StdInitializerList) {
StdInitializerList = LookupStdInitializerList(*this, Loc);
if (!StdInitializerList)
return QualType();
}
TemplateArgumentListInfo Args(Loc, Loc);
Args.addArgument(TemplateArgumentLoc(TemplateArgument(Element),
Context.getTrivialTypeSourceInfo(Element,
Loc)));
return Context.getCanonicalType(
CheckTemplateIdType(TemplateName(StdInitializerList), Loc, Args));
}
bool Sema::isInitListConstructor(const FunctionDecl *Ctor) {
// C++ [dcl.init.list]p2:
// A constructor is an initializer-list constructor if its first parameter
// is of type std::initializer_list<E> or reference to possibly cv-qualified
// std::initializer_list<E> for some type E, and either there are no other
// parameters or else all other parameters have default arguments.
if (Ctor->getNumParams() < 1 ||
(Ctor->getNumParams() > 1 && !Ctor->getParamDecl(1)->hasDefaultArg()))
return false;
QualType ArgType = Ctor->getParamDecl(0)->getType();
if (const ReferenceType *RT = ArgType->getAs<ReferenceType>())
ArgType = RT->getPointeeType().getUnqualifiedType();
return isStdInitializerList(ArgType, nullptr);
}
/// \brief Determine whether a using statement is in a context where it will be
/// apply in all contexts.
static bool IsUsingDirectiveInToplevelContext(DeclContext *CurContext) {
switch (CurContext->getDeclKind()) {
case Decl::TranslationUnit:
return true;
case Decl::LinkageSpec:
return IsUsingDirectiveInToplevelContext(CurContext->getParent());
default:
return false;
}
}
namespace {
// Callback to only accept typo corrections that are namespaces.
class NamespaceValidatorCCC : public CorrectionCandidateCallback {
public:
bool ValidateCandidate(const TypoCorrection &candidate) override {
if (NamedDecl *ND = candidate.getCorrectionDecl())
return isa<NamespaceDecl>(ND) || isa<NamespaceAliasDecl>(ND);
return false;
}
};
}
static bool TryNamespaceTypoCorrection(Sema &S, LookupResult &R, Scope *Sc,
CXXScopeSpec &SS,
SourceLocation IdentLoc,
IdentifierInfo *Ident) {
R.clear();
if (TypoCorrection Corrected =
S.CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), Sc, &SS,
llvm::make_unique<NamespaceValidatorCCC>(),
Sema::CTK_ErrorRecovery)) {
if (DeclContext *DC = S.computeDeclContext(SS, false)) {
std::string CorrectedStr(Corrected.getAsString(S.getLangOpts()));
bool DroppedSpecifier = Corrected.WillReplaceSpecifier() &&
Ident->getName().equals(CorrectedStr);
S.diagnoseTypo(Corrected,
S.PDiag(diag::err_using_directive_member_suggest)
<< Ident << DC << DroppedSpecifier << SS.getRange(),
S.PDiag(diag::note_namespace_defined_here));
} else {
S.diagnoseTypo(Corrected,
S.PDiag(diag::err_using_directive_suggest) << Ident,
S.PDiag(diag::note_namespace_defined_here));
}
R.addDecl(Corrected.getFoundDecl());
return true;
}
return false;
}
Decl *Sema::ActOnUsingDirective(Scope *S,
SourceLocation UsingLoc,
SourceLocation NamespcLoc,
CXXScopeSpec &SS,
SourceLocation IdentLoc,
IdentifierInfo *NamespcName,
AttributeList *AttrList) {
assert(!SS.isInvalid() && "Invalid CXXScopeSpec.");
assert(NamespcName && "Invalid NamespcName.");
assert(IdentLoc.isValid() && "Invalid NamespceName location.");
// This can only happen along a recovery path.
while (S->isTemplateParamScope())
S = S->getParent();
assert(S->getFlags() & Scope::DeclScope && "Invalid Scope.");
UsingDirectiveDecl *UDir = nullptr;
NestedNameSpecifier *Qualifier = nullptr;
if (SS.isSet())
Qualifier = SS.getScopeRep();
// Lookup namespace name.
LookupResult R(*this, NamespcName, IdentLoc, LookupNamespaceName);
LookupParsedName(R, S, &SS);
if (R.isAmbiguous())
return nullptr;
if (R.empty()) {
R.clear();
// Allow "using namespace std;" or "using namespace ::std;" even if
// "std" hasn't been defined yet, for GCC compatibility.
if ((!Qualifier || Qualifier->getKind() == NestedNameSpecifier::Global) &&
NamespcName->isStr("std")) {
Diag(IdentLoc, diag::ext_using_undefined_std);
R.addDecl(getOrCreateStdNamespace());
R.resolveKind();
}
// Otherwise, attempt typo correction.
else TryNamespaceTypoCorrection(*this, R, S, SS, IdentLoc, NamespcName);
}
if (!R.empty()) {
NamedDecl *Named = R.getRepresentativeDecl();
NamespaceDecl *NS = R.getAsSingle<NamespaceDecl>();
assert(NS && "expected namespace decl");
// The use of a nested name specifier may trigger deprecation warnings.
DiagnoseUseOfDecl(Named, IdentLoc);
// C++ [namespace.udir]p1:
// A using-directive specifies that the names in the nominated
// namespace can be used in the scope in which the
// using-directive appears after the using-directive. During
// unqualified name lookup (3.4.1), the names appear as if they
// were declared in the nearest enclosing namespace which
// contains both the using-directive and the nominated
// namespace. [Note: in this context, "contains" means "contains
// directly or indirectly". ]
// Find enclosing context containing both using-directive and
// nominated namespace.
DeclContext *CommonAncestor = cast<DeclContext>(NS);
while (CommonAncestor && !CommonAncestor->Encloses(CurContext))
CommonAncestor = CommonAncestor->getParent();
UDir = UsingDirectiveDecl::Create(Context, CurContext, UsingLoc, NamespcLoc,
SS.getWithLocInContext(Context),
IdentLoc, Named, CommonAncestor);
if (IsUsingDirectiveInToplevelContext(CurContext) &&
!SourceMgr.isInMainFile(SourceMgr.getExpansionLoc(IdentLoc))) {
Diag(IdentLoc, diag::warn_using_directive_in_header);
}
PushUsingDirective(S, UDir);
} else {
Diag(IdentLoc, diag::err_expected_namespace_name) << SS.getRange();
}
if (UDir)
ProcessDeclAttributeList(S, UDir, AttrList);
return UDir;
}
void Sema::PushUsingDirective(Scope *S, UsingDirectiveDecl *UDir) {
// If the scope has an associated entity and the using directive is at
// namespace or translation unit scope, add the UsingDirectiveDecl into
// its lookup structure so qualified name lookup can find it.
DeclContext *Ctx = S->getEntity();
if (Ctx && !Ctx->isFunctionOrMethod())
Ctx->addDecl(UDir);
else
// Otherwise, it is at block scope. The using-directives will affect lookup
// only to the end of the scope.
S->PushUsingDirective(UDir);
}
Decl *Sema::ActOnUsingDeclaration(Scope *S,
AccessSpecifier AS,
SourceLocation UsingLoc,
SourceLocation TypenameLoc,
CXXScopeSpec &SS,
UnqualifiedId &Name,
SourceLocation EllipsisLoc,
AttributeList *AttrList) {
assert(S->getFlags() & Scope::DeclScope && "Invalid Scope.");
if (SS.isEmpty()) {
Diag(Name.getLocStart(), diag::err_using_requires_qualname);
return nullptr;
}
switch (Name.getKind()) {
case UnqualifiedId::IK_ImplicitSelfParam:
case UnqualifiedId::IK_Identifier:
case UnqualifiedId::IK_OperatorFunctionId:
case UnqualifiedId::IK_LiteralOperatorId:
case UnqualifiedId::IK_ConversionFunctionId:
break;
case UnqualifiedId::IK_ConstructorName:
case UnqualifiedId::IK_ConstructorTemplateId:
// C++11 inheriting constructors.
Diag(Name.getLocStart(),
getLangOpts().CPlusPlus11 ?
diag::warn_cxx98_compat_using_decl_constructor :
diag::err_using_decl_constructor)
<< SS.getRange();
if (getLangOpts().CPlusPlus11) break;
return nullptr;
case UnqualifiedId::IK_DestructorName:
Diag(Name.getLocStart(), diag::err_using_decl_destructor)
<< SS.getRange();
return nullptr;
case UnqualifiedId::IK_TemplateId:
Diag(Name.getLocStart(), diag::err_using_decl_template_id)
<< SourceRange(Name.TemplateId->LAngleLoc, Name.TemplateId->RAngleLoc);
return nullptr;
case UnqualifiedId::IK_DeductionGuideName:
llvm_unreachable("cannot parse qualified deduction guide name");
}
DeclarationNameInfo TargetNameInfo = GetNameFromUnqualifiedId(Name);
DeclarationName TargetName = TargetNameInfo.getName();
if (!TargetName)
return nullptr;
// Warn about access declarations.
if (UsingLoc.isInvalid()) {
Diag(Name.getLocStart(),
getLangOpts().CPlusPlus11 ? diag::err_access_decl
: diag::warn_access_decl_deprecated)
<< FixItHint::CreateInsertion(SS.getRange().getBegin(), "using ");
}
if (EllipsisLoc.isInvalid()) {
if (DiagnoseUnexpandedParameterPack(SS, UPPC_UsingDeclaration) ||
DiagnoseUnexpandedParameterPack(TargetNameInfo, UPPC_UsingDeclaration))
return nullptr;
} else {
if (!SS.getScopeRep()->containsUnexpandedParameterPack() &&
!TargetNameInfo.containsUnexpandedParameterPack()) {
Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs)
<< SourceRange(SS.getBeginLoc(), TargetNameInfo.getEndLoc());
EllipsisLoc = SourceLocation();
}
}
NamedDecl *UD =
BuildUsingDeclaration(S, AS, UsingLoc, TypenameLoc.isValid(), TypenameLoc,
SS, TargetNameInfo, EllipsisLoc, AttrList,
/*IsInstantiation*/false);
if (UD)
PushOnScopeChains(UD, S, /*AddToContext*/ false);
return UD;
}
/// \brief Determine whether a using declaration considers the given
/// declarations as "equivalent", e.g., if they are redeclarations of
/// the same entity or are both typedefs of the same type.
static bool
IsEquivalentForUsingDecl(ASTContext &Context, NamedDecl *D1, NamedDecl *D2) {
if (D1->getCanonicalDecl() == D2->getCanonicalDecl())
return true;
if (TypedefNameDecl *TD1 = dyn_cast<TypedefNameDecl>(D1))
if (TypedefNameDecl *TD2 = dyn_cast<TypedefNameDecl>(D2))
return Context.hasSameType(TD1->getUnderlyingType(),
TD2->getUnderlyingType());
return false;
}
/// Determines whether to create a using shadow decl for a particular
/// decl, given the set of decls existing prior to this using lookup.
bool Sema::CheckUsingShadowDecl(UsingDecl *Using, NamedDecl *Orig,
const LookupResult &Previous,
UsingShadowDecl *&PrevShadow) {
// Diagnose finding a decl which is not from a base class of the
// current class. We do this now because there are cases where this
// function will silently decide not to build a shadow decl, which
// will pre-empt further diagnostics.
//
// We don't need to do this in C++11 because we do the check once on
// the qualifier.
//
// FIXME: diagnose the following if we care enough:
// struct A { int foo; };
// struct B : A { using A::foo; };
// template <class T> struct C : A {};
// template <class T> struct D : C<T> { using B::foo; } // <---
// This is invalid (during instantiation) in C++03 because B::foo
// resolves to the using decl in B, which is not a base class of D<T>.
// We can't diagnose it immediately because C<T> is an unknown
// specialization. The UsingShadowDecl in D<T> then points directly
// to A::foo, which will look well-formed when we instantiate.
// The right solution is to not collapse the shadow-decl chain.
if (!getLangOpts().CPlusPlus11 && CurContext->isRecord()) {
DeclContext *OrigDC = Orig->getDeclContext();
// Handle enums and anonymous structs.
if (isa<EnumDecl>(OrigDC)) OrigDC = OrigDC->getParent();
CXXRecordDecl *OrigRec = cast<CXXRecordDecl>(OrigDC);
while (OrigRec->isAnonymousStructOrUnion())
OrigRec = cast<CXXRecordDecl>(OrigRec->getDeclContext());
if (cast<CXXRecordDecl>(CurContext)->isProvablyNotDerivedFrom(OrigRec)) {
if (OrigDC == CurContext) {
Diag(Using->getLocation(),
diag::err_using_decl_nested_name_specifier_is_current_class)
<< Using->getQualifierLoc().getSourceRange();
Diag(Orig->getLocation(), diag::note_using_decl_target);
Using->setInvalidDecl();
return true;
}
Diag(Using->getQualifierLoc().getBeginLoc(),
diag::err_using_decl_nested_name_specifier_is_not_base_class)
<< Using->getQualifier()
<< cast<CXXRecordDecl>(CurContext)
<< Using->getQualifierLoc().getSourceRange();
Diag(Orig->getLocation(), diag::note_using_decl_target);
Using->setInvalidDecl();
return true;
}
}
if (Previous.empty()) return false;
NamedDecl *Target = Orig;
if (isa<UsingShadowDecl>(Target))
Target = cast<UsingShadowDecl>(Target)->getTargetDecl();
// If the target happens to be one of the previous declarations, we
// don't have a conflict.
//
// FIXME: but we might be increasing its access, in which case we
// should redeclare it.
NamedDecl *NonTag = nullptr, *Tag = nullptr;
bool FoundEquivalentDecl = false;
for (LookupResult::iterator I = Previous.begin(), E = Previous.end();
I != E; ++I) {
NamedDecl *D = (*I)->getUnderlyingDecl();
// We can have UsingDecls in our Previous results because we use the same
// LookupResult for checking whether the UsingDecl itself is a valid
// redeclaration.
if (isa<UsingDecl>(D) || isa<UsingPackDecl>(D))
continue;
if (IsEquivalentForUsingDecl(Context, D, Target)) {
if (UsingShadowDecl *Shadow = dyn_cast<UsingShadowDecl>(*I))
PrevShadow = Shadow;
FoundEquivalentDecl = true;
} else if (isEquivalentInternalLinkageDeclaration(D, Target)) {
// We don't conflict with an existing using shadow decl of an equivalent
// declaration, but we're not a redeclaration of it.
FoundEquivalentDecl = true;
}
if (isVisible(D))
(isa<TagDecl>(D) ? Tag : NonTag) = D;
}
if (FoundEquivalentDecl)
return false;
if (FunctionDecl *FD = Target->getAsFunction()) {
NamedDecl *OldDecl = nullptr;
switch (CheckOverload(nullptr, FD, Previous, OldDecl,
/*IsForUsingDecl*/ true)) {
case Ovl_Overload:
return false;
case Ovl_NonFunction:
Diag(Using->getLocation(), diag::err_using_decl_conflict);
break;
// We found a decl with the exact signature.
case Ovl_Match:
// If we're in a record, we want to hide the target, so we
// return true (without a diagnostic) to tell the caller not to
// build a shadow decl.
if (CurContext->isRecord())
return true;
// If we're not in a record, this is an error.
Diag(Using->getLocation(), diag::err_using_decl_conflict);
break;
}
Diag(Target->getLocation(), diag::note_using_decl_target);
Diag(OldDecl->getLocation(), diag::note_using_decl_conflict);
Using->setInvalidDecl();
return true;
}
// Target is not a function.
if (isa<TagDecl>(Target)) {
// No conflict between a tag and a non-tag.
if (!Tag) return false;
Diag(Using->getLocation(), diag::err_using_decl_conflict);
Diag(Target->getLocation(), diag::note_using_decl_target);
Diag(Tag->getLocation(), diag::note_using_decl_conflict);
Using->setInvalidDecl();
return true;
}
// No conflict between a tag and a non-tag.
if (!NonTag) return false;
Diag(Using->getLocation(), diag::err_using_decl_conflict);
Diag(Target->getLocation(), diag::note_using_decl_target);
Diag(NonTag->getLocation(), diag::note_using_decl_conflict);
Using->setInvalidDecl();
return true;
}
/// Determine whether a direct base class is a virtual base class.
static bool isVirtualDirectBase(CXXRecordDecl *Derived, CXXRecordDecl *Base) {
if (!Derived->getNumVBases())
return false;
for (auto &B : Derived->bases())
if (B.getType()->getAsCXXRecordDecl() == Base)
return B.isVirtual();
llvm_unreachable("not a direct base class");
}
/// Builds a shadow declaration corresponding to a 'using' declaration.
UsingShadowDecl *Sema::BuildUsingShadowDecl(Scope *S,
UsingDecl *UD,
NamedDecl *Orig,
UsingShadowDecl *PrevDecl) {
// If we resolved to another shadow declaration, just coalesce them.
NamedDecl *Target = Orig;
if (isa<UsingShadowDecl>(Target)) {
Target = cast<UsingShadowDecl>(Target)->getTargetDecl();
assert(!isa<UsingShadowDecl>(Target) && "nested shadow declaration");
}
NamedDecl *NonTemplateTarget = Target;
if (auto *TargetTD = dyn_cast<TemplateDecl>(Target))
NonTemplateTarget = TargetTD->getTemplatedDecl();
UsingShadowDecl *Shadow;
if (isa<CXXConstructorDecl>(NonTemplateTarget)) {
bool IsVirtualBase =
isVirtualDirectBase(cast<CXXRecordDecl>(CurContext),
UD->getQualifier()->getAsRecordDecl());
Shadow = ConstructorUsingShadowDecl::Create(
Context, CurContext, UD->getLocation(), UD, Orig, IsVirtualBase);
} else {
Shadow = UsingShadowDecl::Create(Context, CurContext, UD->getLocation(), UD,
Target);
}
UD->addShadowDecl(Shadow);
Shadow->setAccess(UD->getAccess());
if (Orig->isInvalidDecl() || UD->isInvalidDecl())
Shadow->setInvalidDecl();
Shadow->setPreviousDecl(PrevDecl);
if (S)
PushOnScopeChains(Shadow, S);
else
CurContext->addDecl(Shadow);
return Shadow;
}
/// Hides a using shadow declaration. This is required by the current
/// using-decl implementation when a resolvable using declaration in a
/// class is followed by a declaration which would hide or override
/// one or more of the using decl's targets; for example:
///
/// struct Base { void foo(int); };
/// struct Derived : Base {
/// using Base::foo;
/// void foo(int);
/// };
///
/// The governing language is C++03 [namespace.udecl]p12:
///
/// When a using-declaration brings names from a base class into a
/// derived class scope, member functions in the derived class
/// override and/or hide member functions with the same name and
/// parameter types in a base class (rather than conflicting).
///
/// There are two ways to implement this:
/// (1) optimistically create shadow decls when they're not hidden
/// by existing declarations, or
/// (2) don't create any shadow decls (or at least don't make them
/// visible) until we've fully parsed/instantiated the class.
/// The problem with (1) is that we might have to retroactively remove
/// a shadow decl, which requires several O(n) operations because the
/// decl structures are (very reasonably) not designed for removal.
/// (2) avoids this but is very fiddly and phase-dependent.
void Sema::HideUsingShadowDecl(Scope *S, UsingShadowDecl *Shadow) {
if (Shadow->getDeclName().getNameKind() ==
DeclarationName::CXXConversionFunctionName)
cast<CXXRecordDecl>(Shadow->getDeclContext())->removeConversion(Shadow);
// Remove it from the DeclContext...
Shadow->getDeclContext()->removeDecl(Shadow);
// ...and the scope, if applicable...
if (S) {
S->RemoveDecl(Shadow);
IdResolver.RemoveDecl(Shadow);
}
// ...and the using decl.
Shadow->getUsingDecl()->removeShadowDecl(Shadow);
// TODO: complain somehow if Shadow was used. It shouldn't
// be possible for this to happen, because...?
}
/// Find the base specifier for a base class with the given type.
static CXXBaseSpecifier *findDirectBaseWithType(CXXRecordDecl *Derived,
QualType DesiredBase,
bool &AnyDependentBases) {
// Check whether the named type is a direct base class.
CanQualType CanonicalDesiredBase = DesiredBase->getCanonicalTypeUnqualified();
for (auto &Base : Derived->bases()) {
CanQualType BaseType = Base.getType()->getCanonicalTypeUnqualified();
if (CanonicalDesiredBase == BaseType)
return &Base;
if (BaseType->isDependentType())
AnyDependentBases = true;
}
return nullptr;
}
namespace {
class UsingValidatorCCC : public CorrectionCandidateCallback {
public:
UsingValidatorCCC(bool HasTypenameKeyword, bool IsInstantiation,
NestedNameSpecifier *NNS, CXXRecordDecl *RequireMemberOf)
: HasTypenameKeyword(HasTypenameKeyword),
IsInstantiation(IsInstantiation), OldNNS(NNS),
RequireMemberOf(RequireMemberOf) {}
bool ValidateCandidate(const TypoCorrection &Candidate) override {
NamedDecl *ND = Candidate.getCorrectionDecl();
// Keywords are not valid here.
if (!ND || isa<NamespaceDecl>(ND))
return false;
// Completely unqualified names are invalid for a 'using' declaration.
if (Candidate.WillReplaceSpecifier() && !Candidate.getCorrectionSpecifier())
return false;
// FIXME: Don't correct to a name that CheckUsingDeclRedeclaration would
// reject.
if (RequireMemberOf) {
auto *FoundRecord = dyn_cast<CXXRecordDecl>(ND);
if (FoundRecord && FoundRecord->isInjectedClassName()) {
// No-one ever wants a using-declaration to name an injected-class-name
// of a base class, unless they're declaring an inheriting constructor.
ASTContext &Ctx = ND->getASTContext();
if (!Ctx.getLangOpts().CPlusPlus11)
return false;
QualType FoundType = Ctx.getRecordType(FoundRecord);
// Check that the injected-class-name is named as a member of its own
// type; we don't want to suggest 'using Derived::Base;', since that
// means something else.
NestedNameSpecifier *Specifier =
Candidate.WillReplaceSpecifier()
? Candidate.getCorrectionSpecifier()
: OldNNS;
if (!Specifier->getAsType() ||
!Ctx.hasSameType(QualType(Specifier->getAsType(), 0), FoundType))
return false;
// Check that this inheriting constructor declaration actually names a
// direct base class of the current class.
bool AnyDependentBases = false;
if (!findDirectBaseWithType(RequireMemberOf,
Ctx.getRecordType(FoundRecord),
AnyDependentBases) &&
!AnyDependentBases)
return false;
} else {
auto *RD = dyn_cast<CXXRecordDecl>(ND->getDeclContext());
if (!RD || RequireMemberOf->isProvablyNotDerivedFrom(RD))
return false;
// FIXME: Check that the base class member is accessible?
}
} else {
auto *FoundRecord = dyn_cast<CXXRecordDecl>(ND);
if (FoundRecord && FoundRecord->isInjectedClassName())
return false;
}
if (isa<TypeDecl>(ND))
return HasTypenameKeyword || !IsInstantiation;
return !HasTypenameKeyword;
}
private:
bool HasTypenameKeyword;
bool IsInstantiation;
NestedNameSpecifier *OldNNS;
CXXRecordDecl *RequireMemberOf;
};
} // end anonymous namespace
/// Builds a using declaration.
///
/// \param IsInstantiation - Whether this call arises from an
/// instantiation of an unresolved using declaration. We treat
/// the lookup differently for these declarations.
NamedDecl *Sema::BuildUsingDeclaration(Scope *S, AccessSpecifier AS,
SourceLocation UsingLoc,
bool HasTypenameKeyword,
SourceLocation TypenameLoc,
CXXScopeSpec &SS,
DeclarationNameInfo NameInfo,
SourceLocation EllipsisLoc,
AttributeList *AttrList,
bool IsInstantiation) {
assert(!SS.isInvalid() && "Invalid CXXScopeSpec.");
SourceLocation IdentLoc = NameInfo.getLoc();
assert(IdentLoc.isValid() && "Invalid TargetName location.");
// FIXME: We ignore attributes for now.
// For an inheriting constructor declaration, the name of the using
// declaration is the name of a constructor in this class, not in the
// base class.
DeclarationNameInfo UsingName = NameInfo;
if (UsingName.getName().getNameKind() == DeclarationName::CXXConstructorName)
if (auto *RD = dyn_cast<CXXRecordDecl>(CurContext))
UsingName.setName(Context.DeclarationNames.getCXXConstructorName(
Context.getCanonicalType(Context.getRecordType(RD))));
// Do the redeclaration lookup in the current scope.
LookupResult Previous(*this, UsingName, LookupUsingDeclName,
ForRedeclaration);
Previous.setHideTags(false);
if (S) {
LookupName(Previous, S);
// It is really dumb that we have to do this.
LookupResult::Filter F = Previous.makeFilter();
while (F.hasNext()) {
NamedDecl *D = F.next();
if (!isDeclInScope(D, CurContext, S))
F.erase();
// If we found a local extern declaration that's not ordinarily visible,
// and this declaration is being added to a non-block scope, ignore it.
// We're only checking for scope conflicts here, not also for violations
// of the linkage rules.
else if (!CurContext->isFunctionOrMethod() && D->isLocalExternDecl() &&
!(D->getIdentifierNamespace() & Decl::IDNS_Ordinary))
F.erase();
}
F.done();
} else {
assert(IsInstantiation && "no scope in non-instantiation");
if (CurContext->isRecord())
LookupQualifiedName(Previous, CurContext);
else {
// No redeclaration check is needed here; in non-member contexts we
// diagnosed all possible conflicts with other using-declarations when
// building the template:
//
// For a dependent non-type using declaration, the only valid case is
// if we instantiate to a single enumerator. We check for conflicts
// between shadow declarations we introduce, and we check in the template
// definition for conflicts between a non-type using declaration and any
// other declaration, which together covers all cases.
//
// A dependent typename using declaration will never successfully
// instantiate, since it will always name a class member, so we reject
// that in the template definition.
}
}
// Check for invalid redeclarations.
if (CheckUsingDeclRedeclaration(UsingLoc, HasTypenameKeyword,
SS, IdentLoc, Previous))
return nullptr;
// Check for bad qualifiers.
if (CheckUsingDeclQualifier(UsingLoc, HasTypenameKeyword, SS, NameInfo,
IdentLoc))
return nullptr;
DeclContext *LookupContext = computeDeclContext(SS);
NamedDecl *D;
NestedNameSpecifierLoc QualifierLoc = SS.getWithLocInContext(Context);
if (!LookupContext || EllipsisLoc.isValid()) {
if (HasTypenameKeyword) {
// FIXME: not all declaration name kinds are legal here
D = UnresolvedUsingTypenameDecl::Create(Context, CurContext,
UsingLoc, TypenameLoc,
QualifierLoc,
IdentLoc, NameInfo.getName(),
EllipsisLoc);
} else {
D = UnresolvedUsingValueDecl::Create(Context, CurContext, UsingLoc,
QualifierLoc, NameInfo, EllipsisLoc);
}
D->setAccess(AS);
CurContext->addDecl(D);
return D;
}
auto Build = [&](bool Invalid) {
UsingDecl *UD =
UsingDecl::Create(Context, CurContext, UsingLoc, QualifierLoc,
UsingName, HasTypenameKeyword);
UD->setAccess(AS);
CurContext->addDecl(UD);
UD->setInvalidDecl(Invalid);
return UD;
};
auto BuildInvalid = [&]{ return Build(true); };
auto BuildValid = [&]{ return Build(false); };
if (RequireCompleteDeclContext(SS, LookupContext))
return BuildInvalid();
// Look up the target name.
LookupResult R(*this, NameInfo, LookupOrdinaryName);
// Unlike most lookups, we don't always want to hide tag
// declarations: tag names are visible through the using declaration
// even if hidden by ordinary names, *except* in a dependent context
// where it's important for the sanity of two-phase lookup.
if (!IsInstantiation)
R.setHideTags(false);
// For the purposes of this lookup, we have a base object type
// equal to that of the current context.
if (CurContext->isRecord()) {
R.setBaseObjectType(
Context.getTypeDeclType(cast<CXXRecordDecl>(CurContext)));
}
LookupQualifiedName(R, LookupContext);
// Try to correct typos if possible. If constructor name lookup finds no
// results, that means the named class has no explicit constructors, and we
// suppressed declaring implicit ones (probably because it's dependent or
// invalid).
if (R.empty() &&
NameInfo.getName().getNameKind() != DeclarationName::CXXConstructorName) {
// HACK: Work around a bug in libstdc++'s detection of ::gets. Sometimes
// it will believe that glibc provides a ::gets in cases where it does not,
// and will try to pull it into namespace std with a using-declaration.
// Just ignore the using-declaration in that case.
auto *II = NameInfo.getName().getAsIdentifierInfo();
if (getLangOpts().CPlusPlus14 && II && II->isStr("gets") &&
CurContext->isStdNamespace() &&
isa<TranslationUnitDecl>(LookupContext) &&
getSourceManager().isInSystemHeader(UsingLoc))
return nullptr;
if (TypoCorrection Corrected = CorrectTypo(
R.getLookupNameInfo(), R.getLookupKind(), S, &SS,
llvm::make_unique<UsingValidatorCCC>(
HasTypenameKeyword, IsInstantiation, SS.getScopeRep(),
dyn_cast<CXXRecordDecl>(CurContext)),
CTK_ErrorRecovery)) {
// We reject candidates where DroppedSpecifier == true, hence the
// literal '0' below.
diagnoseTypo(Corrected, PDiag(diag::err_no_member_suggest)
<< NameInfo.getName() << LookupContext << 0
<< SS.getRange());
// If we picked a correction with no attached Decl we can't do anything
// useful with it, bail out.
NamedDecl *ND = Corrected.getCorrectionDecl();
if (!ND)
return BuildInvalid();
// If we corrected to an inheriting constructor, handle it as one.
auto *RD = dyn_cast<CXXRecordDecl>(ND);
if (RD && RD->isInjectedClassName()) {
// The parent of the injected class name is the class itself.
RD = cast<CXXRecordDecl>(RD->getParent());
// Fix up the information we'll use to build the using declaration.
if (Corrected.WillReplaceSpecifier()) {
NestedNameSpecifierLocBuilder Builder;
Builder.MakeTrivial(Context, Corrected.getCorrectionSpecifier(),
QualifierLoc.getSourceRange());
QualifierLoc = Builder.getWithLocInContext(Context);
}
// In this case, the name we introduce is the name of a derived class
// constructor.
auto *CurClass = cast<CXXRecordDecl>(CurContext);
UsingName.setName(Context.DeclarationNames.getCXXConstructorName(
Context.getCanonicalType(Context.getRecordType(CurClass))));
UsingName.setNamedTypeInfo(nullptr);
for (auto *Ctor : LookupConstructors(RD))
R.addDecl(Ctor);
R.resolveKind();
} else {
// FIXME: Pick up all the declarations if we found an overloaded
// function.
UsingName.setName(ND->getDeclName());
R.addDecl(ND);
}
} else {
Diag(IdentLoc, diag::err_no_member)
<< NameInfo.getName() << LookupContext << SS.getRange();
return BuildInvalid();
}
}
if (R.isAmbiguous())
return BuildInvalid();
if (HasTypenameKeyword) {
// If we asked for a typename and got a non-type decl, error out.
if (!R.getAsSingle<TypeDecl>()) {
Diag(IdentLoc, diag::err_using_typename_non_type);
for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I)
Diag((*I)->getUnderlyingDecl()->getLocation(),
diag::note_using_decl_target);
return BuildInvalid();
}
} else {
// If we asked for a non-typename and we got a type, error out,
// but only if this is an instantiation of an unresolved using
// decl. Otherwise just silently find the type name.
if (IsInstantiation && R.getAsSingle<TypeDecl>()) {
Diag(IdentLoc, diag::err_using_dependent_value_is_type);
Diag(R.getFoundDecl()->getLocation(), diag::note_using_decl_target);
return BuildInvalid();
}
}
// C++14 [namespace.udecl]p6:
// A using-declaration shall not name a namespace.
if (R.getAsSingle<NamespaceDecl>()) {
Diag(IdentLoc, diag::err_using_decl_can_not_refer_to_namespace)
<< SS.getRange();
return BuildInvalid();
}
// C++14 [namespace.udecl]p7:
// A using-declaration shall not name a scoped enumerator.
if (auto *ED = R.getAsSingle<EnumConstantDecl>()) {
if (cast<EnumDecl>(ED->getDeclContext())->isScoped()) {
Diag(IdentLoc, diag::err_using_decl_can_not_refer_to_scoped_enum)
<< SS.getRange();
return BuildInvalid();
}
}
UsingDecl *UD = BuildValid();
// Some additional rules apply to inheriting constructors.
if (UsingName.getName().getNameKind() ==
DeclarationName::CXXConstructorName) {
// Suppress access diagnostics; the access check is instead performed at the
// point of use for an inheriting constructor.
R.suppressDiagnostics();
if (CheckInheritingConstructorUsingDecl(UD))
return UD;
}
for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I) {
UsingShadowDecl *PrevDecl = nullptr;
if (!CheckUsingShadowDecl(UD, *I, Previous, PrevDecl))
BuildUsingShadowDecl(S, UD, *I, PrevDecl);
}
return UD;
}
NamedDecl *Sema::BuildUsingPackDecl(NamedDecl *InstantiatedFrom,
ArrayRef<NamedDecl *> Expansions) {
assert(isa<UnresolvedUsingValueDecl>(InstantiatedFrom) ||
isa<UnresolvedUsingTypenameDecl>(InstantiatedFrom) ||
isa<UsingPackDecl>(InstantiatedFrom));
auto *UPD =
UsingPackDecl::Create(Context, CurContext, InstantiatedFrom, Expansions);
UPD->setAccess(InstantiatedFrom->getAccess());
CurContext->addDecl(UPD);
return UPD;
}
/// Additional checks for a using declaration referring to a constructor name.
bool Sema::CheckInheritingConstructorUsingDecl(UsingDecl *UD) {
assert(!UD->hasTypename() && "expecting a constructor name");
const Type *SourceType = UD->getQualifier()->getAsType();
assert(SourceType &&
"Using decl naming constructor doesn't have type in scope spec.");
CXXRecordDecl *TargetClass = cast<CXXRecordDecl>(CurContext);
// Check whether the named type is a direct base class.
bool AnyDependentBases = false;
auto *Base = findDirectBaseWithType(TargetClass, QualType(SourceType, 0),
AnyDependentBases);
if (!Base && !AnyDependentBases) {
Diag(UD->getUsingLoc(),
diag::err_using_decl_constructor_not_in_direct_base)
<< UD->getNameInfo().getSourceRange()
<< QualType(SourceType, 0) << TargetClass;
UD->setInvalidDecl();
return true;
}
if (Base)
Base->setInheritConstructors();
return false;
}
/// Checks that the given using declaration is not an invalid
/// redeclaration. Note that this is checking only for the using decl
/// itself, not for any ill-formedness among the UsingShadowDecls.
bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc,
bool HasTypenameKeyword,
const CXXScopeSpec &SS,
SourceLocation NameLoc,
const LookupResult &Prev) {
NestedNameSpecifier *Qual = SS.getScopeRep();
// C++03 [namespace.udecl]p8:
// C++0x [namespace.udecl]p10:
// A using-declaration is a declaration and can therefore be used
// repeatedly where (and only where) multiple declarations are
// allowed.
//
// That's in non-member contexts.
if (!CurContext->getRedeclContext()->isRecord()) {
// A dependent qualifier outside a class can only ever resolve to an
// enumeration type. Therefore it conflicts with any other non-type
// declaration in the same scope.
// FIXME: How should we check for dependent type-type conflicts at block
// scope?
if (Qual->isDependent() && !HasTypenameKeyword) {
for (auto *D : Prev) {
if (!isa<TypeDecl>(D) && !isa<UsingDecl>(D) && !isa<UsingPackDecl>(D)) {
bool OldCouldBeEnumerator =
isa<UnresolvedUsingValueDecl>(D) || isa<EnumConstantDecl>(D);
Diag(NameLoc,
OldCouldBeEnumerator ? diag::err_redefinition
: diag::err_redefinition_different_kind)
<< Prev.getLookupName();
Diag(D->getLocation(), diag::note_previous_definition);
return true;
}
}
}
return false;
}
for (LookupResult::iterator I = Prev.begin(), E = Prev.end(); I != E; ++I) {
NamedDecl *D = *I;
bool DTypename;
NestedNameSpecifier *DQual;
if (UsingDecl *UD = dyn_cast<UsingDecl>(D)) {
DTypename = UD->hasTypename();
DQual = UD->getQualifier();
} else if (UnresolvedUsingValueDecl *UD
= dyn_cast<UnresolvedUsingValueDecl>(D)) {
DTypename = false;
DQual = UD->getQualifier();
} else if (UnresolvedUsingTypenameDecl *UD
= dyn_cast<UnresolvedUsingTypenameDecl>(D)) {
DTypename = true;
DQual = UD->getQualifier();
} else continue;
// using decls differ if one says 'typename' and the other doesn't.
// FIXME: non-dependent using decls?
if (HasTypenameKeyword != DTypename) continue;
// using decls differ if they name different scopes (but note that
// template instantiation can cause this check to trigger when it
// didn't before instantiation).
if (Context.getCanonicalNestedNameSpecifier(Qual) !=
Context.getCanonicalNestedNameSpecifier(DQual))
continue;
Diag(NameLoc, diag::err_using_decl_redeclaration) << SS.getRange();
Diag(D->getLocation(), diag::note_using_decl) << 1;
return true;
}
return false;
}
/// Checks that the given nested-name qualifier used in a using decl
/// in the current context is appropriately related to the current
/// scope. If an error is found, diagnoses it and returns true.
bool Sema::CheckUsingDeclQualifier(SourceLocation UsingLoc,
bool HasTypename,
const CXXScopeSpec &SS,
const DeclarationNameInfo &NameInfo,
SourceLocation NameLoc) {
DeclContext *NamedContext = computeDeclContext(SS);
if (!CurContext->isRecord()) {
// C++03 [namespace.udecl]p3:
// C++0x [namespace.udecl]p8:
// A using-declaration for a class member shall be a member-declaration.
// If we weren't able to compute a valid scope, it might validly be a
// dependent class scope or a dependent enumeration unscoped scope. If
// we have a 'typename' keyword, the scope must resolve to a class type.
if ((HasTypename && !NamedContext) ||
(NamedContext && NamedContext->getRedeclContext()->isRecord())) {
auto *RD = NamedContext
? cast<CXXRecordDecl>(NamedContext->getRedeclContext())
: nullptr;
if (RD && RequireCompleteDeclContext(const_cast<CXXScopeSpec&>(SS), RD))
RD = nullptr;
Diag(NameLoc, diag::err_using_decl_can_not_refer_to_class_member)
<< SS.getRange();
// If we have a complete, non-dependent source type, try to suggest a
// way to get the same effect.
if (!RD)
return true;
// Find what this using-declaration was referring to.
LookupResult R(*this, NameInfo, LookupOrdinaryName);
R.setHideTags(false);
R.suppressDiagnostics();
LookupQualifiedName(R, RD);
if (R.getAsSingle<TypeDecl>()) {
if (getLangOpts().CPlusPlus11) {
// Convert 'using X::Y;' to 'using Y = X::Y;'.
Diag(SS.getBeginLoc(), diag::note_using_decl_class_member_workaround)
<< 0 // alias declaration
<< FixItHint::CreateInsertion(SS.getBeginLoc(),
NameInfo.getName().getAsString() +
" = ");
} else {
// Convert 'using X::Y;' to 'typedef X::Y Y;'.
SourceLocation InsertLoc =
getLocForEndOfToken(NameInfo.getLocEnd());
Diag(InsertLoc, diag::note_using_decl_class_member_workaround)
<< 1 // typedef declaration
<< FixItHint::CreateReplacement(UsingLoc, "typedef")
<< FixItHint::CreateInsertion(
InsertLoc, " " + NameInfo.getName().getAsString());
}
} else if (R.getAsSingle<VarDecl>()) {
// Don't provide a fixit outside C++11 mode; we don't want to suggest
// repeating the type of the static data member here.
FixItHint FixIt;
if (getLangOpts().CPlusPlus11) {
// Convert 'using X::Y;' to 'auto &Y = X::Y;'.
FixIt = FixItHint::CreateReplacement(
UsingLoc, "auto &" + NameInfo.getName().getAsString() + " = ");
}
Diag(UsingLoc, diag::note_using_decl_class_member_workaround)
<< 2 // reference declaration
<< FixIt;
} else if (R.getAsSingle<EnumConstantDecl>()) {
// Don't provide a fixit outside C++11 mode; we don't want to suggest
// repeating the type of the enumeration here, and we can't do so if
// the type is anonymous.
FixItHint FixIt;
if (getLangOpts().CPlusPlus11) {
// Convert 'using X::Y;' to 'auto &Y = X::Y;'.
FixIt = FixItHint::CreateReplacement(
UsingLoc,
"constexpr auto " + NameInfo.getName().getAsString() + " = ");
}
Diag(UsingLoc, diag::note_using_decl_class_member_workaround)
<< (getLangOpts().CPlusPlus11 ? 4 : 3) // const[expr] variable
<< FixIt;
}
return true;
}
// Otherwise, this might be valid.
return false;
}
// The current scope is a record.
// If the named context is dependent, we can't decide much.
if (!NamedContext) {
// FIXME: in C++0x, we can diagnose if we can prove that the
// nested-name-specifier does not refer to a base class, which is
// still possible in some cases.
// Otherwise we have to conservatively report that things might be
// okay.
return false;
}
if (!NamedContext->isRecord()) {
// Ideally this would point at the last name in the specifier,
// but we don't have that level of source info.
Diag(SS.getRange().getBegin(),
diag::err_using_decl_nested_name_specifier_is_not_class)
<< SS.getScopeRep() << SS.getRange();
return true;
}
if (!NamedContext->isDependentContext() &&
RequireCompleteDeclContext(const_cast<CXXScopeSpec&>(SS), NamedContext))
return true;
if (getLangOpts().CPlusPlus11) {
// C++11 [namespace.udecl]p3:
// In a using-declaration used as a member-declaration, the
// nested-name-specifier shall name a base class of the class
// being defined.
if (cast<CXXRecordDecl>(CurContext)->isProvablyNotDerivedFrom(
cast<CXXRecordDecl>(NamedContext))) {
if (CurContext == NamedContext) {
Diag(NameLoc,
diag::err_using_decl_nested_name_specifier_is_current_class)
<< SS.getRange();
return true;
}
if (!cast<CXXRecordDecl>(NamedContext)->isInvalidDecl()) {
Diag(SS.getRange().getBegin(),
diag::err_using_decl_nested_name_specifier_is_not_base_class)
<< SS.getScopeRep()
<< cast<CXXRecordDecl>(CurContext)
<< SS.getRange();
}
return true;
}
return false;
}
// C++03 [namespace.udecl]p4:
// A using-declaration used as a member-declaration shall refer
// to a member of a base class of the class being defined [etc.].
// Salient point: SS doesn't have to name a base class as long as
// lookup only finds members from base classes. Therefore we can
// diagnose here only if we can prove that that can't happen,
// i.e. if the class hierarchies provably don't intersect.
// TODO: it would be nice if "definitely valid" results were cached
// in the UsingDecl and UsingShadowDecl so that these checks didn't
// need to be repeated.
llvm::SmallPtrSet<const CXXRecordDecl *, 4> Bases;
auto Collect = [&Bases](const CXXRecordDecl *Base) {
Bases.insert(Base);
return true;
};
// Collect all bases. Return false if we find a dependent base.
if (!cast<CXXRecordDecl>(CurContext)->forallBases(Collect))
return false;
// Returns true if the base is dependent or is one of the accumulated base
// classes.
auto IsNotBase = [&Bases](const CXXRecordDecl *Base) {
return !Bases.count(Base);
};
// Return false if the class has a dependent base or if it or one
// of its bases is present in the base set of the current context.
if (Bases.count(cast<CXXRecordDecl>(NamedContext)) ||
!cast<CXXRecordDecl>(NamedContext)->forallBases(IsNotBase))
return false;
Diag(SS.getRange().getBegin(),
diag::err_using_decl_nested_name_specifier_is_not_base_class)
<< SS.getScopeRep()
<< cast<CXXRecordDecl>(CurContext)
<< SS.getRange();
return true;
}
Decl *Sema::ActOnAliasDeclaration(Scope *S,
AccessSpecifier AS,
MultiTemplateParamsArg TemplateParamLists,
SourceLocation UsingLoc,
UnqualifiedId &Name,
AttributeList *AttrList,
TypeResult Type,
Decl *DeclFromDeclSpec) {
// Skip up to the relevant declaration scope.
while (S->isTemplateParamScope())
S = S->getParent();
assert((S->getFlags() & Scope::DeclScope) &&
"got alias-declaration outside of declaration scope");
if (Type.isInvalid())
return nullptr;
bool Invalid = false;
DeclarationNameInfo NameInfo = GetNameFromUnqualifiedId(Name);
TypeSourceInfo *TInfo = nullptr;
GetTypeFromParser(Type.get(), &TInfo);
if (DiagnoseClassNameShadow(CurContext, NameInfo))
return nullptr;
if (DiagnoseUnexpandedParameterPack(Name.StartLocation, TInfo,
UPPC_DeclarationType)) {
Invalid = true;
TInfo = Context.getTrivialTypeSourceInfo(Context.IntTy,
TInfo->getTypeLoc().getBeginLoc());
}
LookupResult Previous(*this, NameInfo, LookupOrdinaryName, ForRedeclaration);
LookupName(Previous, S);
// Warn about shadowing the name of a template parameter.
if (Previous.isSingleResult() &&
Previous.getFoundDecl()->isTemplateParameter()) {
DiagnoseTemplateParameterShadow(Name.StartLocation,Previous.getFoundDecl());
Previous.clear();
}
assert(Name.Kind == UnqualifiedId::IK_Identifier &&
"name in alias declaration must be an identifier");
TypeAliasDecl *NewTD = TypeAliasDecl::Create(Context, CurContext, UsingLoc,
Name.StartLocation,
Name.Identifier, TInfo);
NewTD->setAccess(AS);
if (Invalid)
NewTD->setInvalidDecl();
ProcessDeclAttributeList(S, NewTD, AttrList);
AddPragmaAttributes(S, NewTD);
CheckTypedefForVariablyModifiedType(S, NewTD);
Invalid |= NewTD->isInvalidDecl();
bool Redeclaration = false;
NamedDecl *NewND;
if (TemplateParamLists.size()) {
TypeAliasTemplateDecl *OldDecl = nullptr;
TemplateParameterList *OldTemplateParams = nullptr;
if (TemplateParamLists.size() != 1) {
Diag(UsingLoc, diag::err_alias_template_extra_headers)
<< SourceRange(TemplateParamLists[1]->getTemplateLoc(),
TemplateParamLists[TemplateParamLists.size()-1]->getRAngleLoc());
}
TemplateParameterList *TemplateParams = TemplateParamLists[0];
// Check that we can declare a template here.
if (CheckTemplateDeclScope(S, TemplateParams))
return nullptr;
// Only consider previous declarations in the same scope.
FilterLookupForScope(Previous, CurContext, S, /*ConsiderLinkage*/false,
/*ExplicitInstantiationOrSpecialization*/false);
if (!Previous.empty()) {
Redeclaration = true;
OldDecl = Previous.getAsSingle<TypeAliasTemplateDecl>();
if (!OldDecl && !Invalid) {
Diag(UsingLoc, diag::err_redefinition_different_kind)
<< Name.Identifier;
NamedDecl *OldD = Previous.getRepresentativeDecl();
if (OldD->getLocation().isValid())
Diag(OldD->getLocation(), diag::note_previous_definition);
Invalid = true;
}
if (!Invalid && OldDecl && !OldDecl->isInvalidDecl()) {
if (TemplateParameterListsAreEqual(TemplateParams,
OldDecl->getTemplateParameters(),
/*Complain=*/true,
TPL_TemplateMatch))
OldTemplateParams = OldDecl->getTemplateParameters();
else
Invalid = true;
TypeAliasDecl *OldTD = OldDecl->getTemplatedDecl();
if (!Invalid &&
!Context.hasSameType(OldTD->getUnderlyingType(),
NewTD->getUnderlyingType())) {
// FIXME: The C++0x standard does not clearly say this is ill-formed,
// but we can't reasonably accept it.
Diag(NewTD->getLocation(), diag::err_redefinition_different_typedef)
<< 2 << NewTD->getUnderlyingType() << OldTD->getUnderlyingType();
if (OldTD->getLocation().isValid())
Diag(OldTD->getLocation(), diag::note_previous_definition);
Invalid = true;
}
}
}
// Merge any previous default template arguments into our parameters,
// and check the parameter list.
if (CheckTemplateParameterList(TemplateParams, OldTemplateParams,
TPC_TypeAliasTemplate))
return nullptr;
TypeAliasTemplateDecl *NewDecl =
TypeAliasTemplateDecl::Create(Context, CurContext, UsingLoc,
Name.Identifier, TemplateParams,
NewTD);
NewTD->setDescribedAliasTemplate(NewDecl);
NewDecl->setAccess(AS);
if (Invalid)
NewDecl->setInvalidDecl();
else if (OldDecl)
NewDecl->setPreviousDecl(OldDecl);
NewND = NewDecl;
} else {
if (auto *TD = dyn_cast_or_null<TagDecl>(DeclFromDeclSpec)) {
setTagNameForLinkagePurposes(TD, NewTD);
handleTagNumbering(TD, S);
}
ActOnTypedefNameDecl(S, CurContext, NewTD, Previous, Redeclaration);
NewND = NewTD;
}
PushOnScopeChains(NewND, S);
ActOnDocumentableDecl(NewND);
return NewND;
}
Decl *Sema::ActOnNamespaceAliasDef(Scope *S, SourceLocation NamespaceLoc,
SourceLocation AliasLoc,
IdentifierInfo *Alias, CXXScopeSpec &SS,
SourceLocation IdentLoc,
IdentifierInfo *Ident) {
// Lookup the namespace name.
LookupResult R(*this, Ident, IdentLoc, LookupNamespaceName);
LookupParsedName(R, S, &SS);
if (R.isAmbiguous())
return nullptr;
if (R.empty()) {
if (!TryNamespaceTypoCorrection(*this, R, S, SS, IdentLoc, Ident)) {
Diag(IdentLoc, diag::err_expected_namespace_name) << SS.getRange();
return nullptr;
}
}
assert(!R.isAmbiguous() && !R.empty());
NamedDecl *ND = R.getRepresentativeDecl();
// Check if we have a previous declaration with the same name.
LookupResult PrevR(*this, Alias, AliasLoc, LookupOrdinaryName,
ForRedeclaration);
LookupName(PrevR, S);
// Check we're not shadowing a template parameter.
if (PrevR.isSingleResult() && PrevR.getFoundDecl()->isTemplateParameter()) {
DiagnoseTemplateParameterShadow(AliasLoc, PrevR.getFoundDecl());
PrevR.clear();
}
// Filter out any other lookup result from an enclosing scope.
FilterLookupForScope(PrevR, CurContext, S, /*ConsiderLinkage*/false,
/*AllowInlineNamespace*/false);
// Find the previous declaration and check that we can redeclare it.
NamespaceAliasDecl *Prev = nullptr;
if (PrevR.isSingleResult()) {
NamedDecl *PrevDecl = PrevR.getRepresentativeDecl();
if (NamespaceAliasDecl *AD = dyn_cast<NamespaceAliasDecl>(PrevDecl)) {
// We already have an alias with the same name that points to the same
// namespace; check that it matches.
if (AD->getNamespace()->Equals(getNamespaceDecl(ND))) {
Prev = AD;
} else if (isVisible(PrevDecl)) {
Diag(AliasLoc, diag::err_redefinition_different_namespace_alias)
<< Alias;
Diag(AD->getLocation(), diag::note_previous_namespace_alias)
<< AD->getNamespace();
return nullptr;
}
} else if (isVisible(PrevDecl)) {
unsigned DiagID = isa<NamespaceDecl>(PrevDecl->getUnderlyingDecl())
? diag::err_redefinition
: diag::err_redefinition_different_kind;
Diag(AliasLoc, DiagID) << Alias;
Diag(PrevDecl->getLocation(), diag::note_previous_definition);
return nullptr;
}
}
// The use of a nested name specifier may trigger deprecation warnings.
DiagnoseUseOfDecl(ND, IdentLoc);
NamespaceAliasDecl *AliasDecl =
NamespaceAliasDecl::Create(Context, CurContext, NamespaceLoc, AliasLoc,
Alias, SS.getWithLocInContext(Context),
IdentLoc, ND);
if (Prev)
AliasDecl->setPreviousDecl(Prev);
PushOnScopeChains(AliasDecl, S);
return AliasDecl;
}
namespace {
struct SpecialMemberExceptionSpecInfo
: SpecialMemberVisitor<SpecialMemberExceptionSpecInfo> {
SourceLocation Loc;
Sema::ImplicitExceptionSpecification ExceptSpec;
SpecialMemberExceptionSpecInfo(Sema &S, CXXMethodDecl *MD,
Sema::CXXSpecialMember CSM,
Sema::InheritedConstructorInfo *ICI,
SourceLocation Loc)
: SpecialMemberVisitor(S, MD, CSM, ICI), Loc(Loc), ExceptSpec(S) {}
bool visitBase(CXXBaseSpecifier *Base);
bool visitField(FieldDecl *FD);
void visitClassSubobject(CXXRecordDecl *Class, Subobject Subobj,
unsigned Quals);
void visitSubobjectCall(Subobject Subobj,
Sema::SpecialMemberOverloadResult SMOR);
};
}
bool SpecialMemberExceptionSpecInfo::visitBase(CXXBaseSpecifier *Base) {
auto *RT = Base->getType()->getAs<RecordType>();
if (!RT)
return false;
auto *BaseClass = cast<CXXRecordDecl>(RT->getDecl());
Sema::SpecialMemberOverloadResult SMOR = lookupInheritedCtor(BaseClass);
if (auto *BaseCtor = SMOR.getMethod()) {
visitSubobjectCall(Base, BaseCtor);
return false;
}
visitClassSubobject(BaseClass, Base, 0);
return false;
}
bool SpecialMemberExceptionSpecInfo::visitField(FieldDecl *FD) {
if (CSM == Sema::CXXDefaultConstructor && FD->hasInClassInitializer()) {
Expr *E = FD->getInClassInitializer();
if (!E)
// FIXME: It's a little wasteful to build and throw away a
// CXXDefaultInitExpr here.
// FIXME: We should have a single context note pointing at Loc, and
// this location should be MD->getLocation() instead, since that's
// the location where we actually use the default init expression.
E = S.BuildCXXDefaultInitExpr(Loc, FD).get();
if (E)
ExceptSpec.CalledExpr(E);
} else if (auto *RT = S.Context.getBaseElementType(FD->getType())
->getAs<RecordType>()) {
visitClassSubobject(cast<CXXRecordDecl>(RT->getDecl()), FD,
FD->getType().getCVRQualifiers());
}
return false;
}
void SpecialMemberExceptionSpecInfo::visitClassSubobject(CXXRecordDecl *Class,
Subobject Subobj,
unsigned Quals) {
FieldDecl *Field = Subobj.dyn_cast<FieldDecl*>();
bool IsMutable = Field && Field->isMutable();
visitSubobjectCall(Subobj, lookupIn(Class, Quals, IsMutable));
}
void SpecialMemberExceptionSpecInfo::visitSubobjectCall(
Subobject Subobj, Sema::SpecialMemberOverloadResult SMOR) {
// Note, if lookup fails, it doesn't matter what exception specification we
// choose because the special member will be deleted.
if (CXXMethodDecl *MD = SMOR.getMethod())
ExceptSpec.CalledDecl(getSubobjectLoc(Subobj), MD);
}
static Sema::ImplicitExceptionSpecification
ComputeDefaultedSpecialMemberExceptionSpec(
Sema &S, SourceLocation Loc, CXXMethodDecl *MD, Sema::CXXSpecialMember CSM,
Sema::InheritedConstructorInfo *ICI) {
CXXRecordDecl *ClassDecl = MD->getParent();
// C++ [except.spec]p14:
// An implicitly declared special member function (Clause 12) shall have an
// exception-specification. [...]
SpecialMemberExceptionSpecInfo Info(S, MD, CSM, ICI, Loc);
if (ClassDecl->isInvalidDecl())
return Info.ExceptSpec;
// C++1z [except.spec]p7:
// [Look for exceptions thrown by] a constructor selected [...] to
// initialize a potentially constructed subobject,
// C++1z [except.spec]p8:
// The exception specification for an implicitly-declared destructor, or a
// destructor without a noexcept-specifier, is potentially-throwing if and
// only if any of the destructors for any of its potentially constructed
// subojects is potentially throwing.
// FIXME: We respect the first rule but ignore the "potentially constructed"
// in the second rule to resolve a core issue (no number yet) that would have
// us reject:
// struct A { virtual void f() = 0; virtual ~A() noexcept(false) = 0; };
// struct B : A {};
// struct C : B { void f(); };
// ... due to giving B::~B() a non-throwing exception specification.
Info.visit(Info.IsConstructor ? Info.VisitPotentiallyConstructedBases
: Info.VisitAllBases);
return Info.ExceptSpec;
}
namespace {
/// RAII object to register a special member as being currently declared.
struct DeclaringSpecialMember {
Sema &S;
Sema::SpecialMemberDecl D;
Sema::ContextRAII SavedContext;
bool WasAlreadyBeingDeclared;
DeclaringSpecialMember(Sema &S, CXXRecordDecl *RD, Sema::CXXSpecialMember CSM)
: S(S), D(RD, CSM), SavedContext(S, RD) {
WasAlreadyBeingDeclared = !S.SpecialMembersBeingDeclared.insert(D).second;
if (WasAlreadyBeingDeclared)
// This almost never happens, but if it does, ensure that our cache
// doesn't contain a stale result.
S.SpecialMemberCache.clear();
else {
// Register a note to be produced if we encounter an error while
// declaring the special member.
Sema::CodeSynthesisContext Ctx;
Ctx.Kind = Sema::CodeSynthesisContext::DeclaringSpecialMember;
// FIXME: We don't have a location to use here. Using the class's
// location maintains the fiction that we declare all special members
// with the class, but (1) it's not clear that lying about that helps our
// users understand what's going on, and (2) there may be outer contexts
// on the stack (some of which are relevant) and printing them exposes
// our lies.
Ctx.PointOfInstantiation = RD->getLocation();
Ctx.Entity = RD;
Ctx.SpecialMember = CSM;
S.pushCodeSynthesisContext(Ctx);
}
}
~DeclaringSpecialMember() {
if (!WasAlreadyBeingDeclared) {
S.SpecialMembersBeingDeclared.erase(D);
S.popCodeSynthesisContext();
}
}
/// \brief Are we already trying to declare this special member?
bool isAlreadyBeingDeclared() const {
return WasAlreadyBeingDeclared;
}
};
}
void Sema::CheckImplicitSpecialMemberDeclaration(Scope *S, FunctionDecl *FD) {
// Look up any existing declarations, but don't trigger declaration of all
// implicit special members with this name.
DeclarationName Name = FD->getDeclName();
LookupResult R(*this, Name, SourceLocation(), LookupOrdinaryName,
ForRedeclaration);
for (auto *D : FD->getParent()->lookup(Name))
if (auto *Acceptable = R.getAcceptableDecl(D))
R.addDecl(Acceptable);
R.resolveKind();
R.suppressDiagnostics();
CheckFunctionDeclaration(S, FD, R, /*IsMemberSpecialization*/false);
}
CXXConstructorDecl *Sema::DeclareImplicitDefaultConstructor(
CXXRecordDecl *ClassDecl) {
// C++ [class.ctor]p5:
// A default constructor for a class X is a constructor of class X
// that can be called without an argument. If there is no
// user-declared constructor for class X, a default constructor is
// implicitly declared. An implicitly-declared default constructor
// is an inline public member of its class.
assert(ClassDecl->needsImplicitDefaultConstructor() &&
"Should not build implicit default constructor!");
DeclaringSpecialMember DSM(*this, ClassDecl, CXXDefaultConstructor);
if (DSM.isAlreadyBeingDeclared())
return nullptr;
bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl,
CXXDefaultConstructor,
false);
// Create the actual constructor declaration.
CanQualType ClassType
= Context.getCanonicalType(Context.getTypeDeclType(ClassDecl));
SourceLocation ClassLoc = ClassDecl->getLocation();
DeclarationName Name
= Context.DeclarationNames.getCXXConstructorName(ClassType);
DeclarationNameInfo NameInfo(Name, ClassLoc);
CXXConstructorDecl *DefaultCon = CXXConstructorDecl::Create(
Context, ClassDecl, ClassLoc, NameInfo, /*Type*/QualType(),
/*TInfo=*/nullptr, /*isExplicit=*/false, /*isInline=*/true,
/*isImplicitlyDeclared=*/true, Constexpr);
DefaultCon->setAccess(AS_public);
DefaultCon->setDefaulted();
if (getLangOpts().CUDA) {
inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXDefaultConstructor,
DefaultCon,
/* ConstRHS */ false,
/* Diagnose */ false);
}
// Build an exception specification pointing back at this constructor.
FunctionProtoType::ExtProtoInfo EPI = getImplicitMethodEPI(*this, DefaultCon);
DefaultCon->setType(Context.getFunctionType(Context.VoidTy, None, EPI));
// We don't need to use SpecialMemberIsTrivial here; triviality for default
// constructors is easy to compute.
DefaultCon->setTrivial(ClassDecl->hasTrivialDefaultConstructor());
// Note that we have declared this constructor.
++ASTContext::NumImplicitDefaultConstructorsDeclared;
Scope *S = getScopeForContext(ClassDecl);
CheckImplicitSpecialMemberDeclaration(S, DefaultCon);
if (ShouldDeleteSpecialMember(DefaultCon, CXXDefaultConstructor))
SetDeclDeleted(DefaultCon, ClassLoc);
if (S)
PushOnScopeChains(DefaultCon, S, false);
ClassDecl->addDecl(DefaultCon);
return DefaultCon;
}
void Sema::DefineImplicitDefaultConstructor(SourceLocation CurrentLocation,
CXXConstructorDecl *Constructor) {
assert((Constructor->isDefaulted() && Constructor->isDefaultConstructor() &&
!Constructor->doesThisDeclarationHaveABody() &&
!Constructor->isDeleted()) &&
"DefineImplicitDefaultConstructor - call it for implicit default ctor");
if (Constructor->willHaveBody() || Constructor->isInvalidDecl())
return;
CXXRecordDecl *ClassDecl = Constructor->getParent();
assert(ClassDecl && "DefineImplicitDefaultConstructor - invalid constructor");
SynthesizedFunctionScope Scope(*this, Constructor);
// The exception specification is needed because we are defining the
// function.
ResolveExceptionSpec(CurrentLocation,
Constructor->getType()->castAs<FunctionProtoType>());
MarkVTableUsed(CurrentLocation, ClassDecl);
// Add a context note for diagnostics produced after this point.
Scope.addContextNote(CurrentLocation);
if (SetCtorInitializers(Constructor, /*AnyErrors=*/false)) {
Constructor->setInvalidDecl();
return;
}
SourceLocation Loc = Constructor->getLocEnd().isValid()
? Constructor->getLocEnd()
: Constructor->getLocation();
Constructor->setBody(new (Context) CompoundStmt(Loc));
Constructor->markUsed(Context);
if (ASTMutationListener *L = getASTMutationListener()) {
L->CompletedImplicitDefinition(Constructor);
}
DiagnoseUninitializedFields(*this, Constructor);
}
void Sema::ActOnFinishDelayedMemberInitializers(Decl *D) {
// Perform any delayed checks on exception specifications.
CheckDelayedMemberExceptionSpecs();
}
/// Find or create the fake constructor we synthesize to model constructing an
/// object of a derived class via a constructor of a base class.
CXXConstructorDecl *
Sema::findInheritingConstructor(SourceLocation Loc,
CXXConstructorDecl *BaseCtor,
ConstructorUsingShadowDecl *Shadow) {
CXXRecordDecl *Derived = Shadow->getParent();
SourceLocation UsingLoc = Shadow->getLocation();
// FIXME: Add a new kind of DeclarationName for an inherited constructor.
// For now we use the name of the base class constructor as a member of the
// derived class to indicate a (fake) inherited constructor name.
DeclarationName Name = BaseCtor->getDeclName();
// Check to see if we already have a fake constructor for this inherited
// constructor call.
for (NamedDecl *Ctor : Derived->lookup(Name))
if (declaresSameEntity(cast<CXXConstructorDecl>(Ctor)
->getInheritedConstructor()
.getConstructor(),
BaseCtor))
return cast<CXXConstructorDecl>(Ctor);
DeclarationNameInfo NameInfo(Name, UsingLoc);
TypeSourceInfo *TInfo =
Context.getTrivialTypeSourceInfo(BaseCtor->getType(), UsingLoc);
FunctionProtoTypeLoc ProtoLoc =
TInfo->getTypeLoc().IgnoreParens().castAs<FunctionProtoTypeLoc>();
// Check the inherited constructor is valid and find the list of base classes
// from which it was inherited.
InheritedConstructorInfo ICI(*this, Loc, Shadow);
bool Constexpr =
BaseCtor->isConstexpr() &&
defaultedSpecialMemberIsConstexpr(*this, Derived, CXXDefaultConstructor,
false, BaseCtor, &ICI);
CXXConstructorDecl *DerivedCtor = CXXConstructorDecl::Create(
Context, Derived, UsingLoc, NameInfo, TInfo->getType(), TInfo,
BaseCtor->isExplicit(), /*Inline=*/true,
/*ImplicitlyDeclared=*/true, Constexpr,
InheritedConstructor(Shadow, BaseCtor));
if (Shadow->isInvalidDecl())
DerivedCtor->setInvalidDecl();
// Build an unevaluated exception specification for this fake constructor.
const FunctionProtoType *FPT = TInfo->getType()->castAs<FunctionProtoType>();
FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
EPI.ExceptionSpec.Type = EST_Unevaluated;
EPI.ExceptionSpec.SourceDecl = DerivedCtor;
DerivedCtor->setType(Context.getFunctionType(FPT->getReturnType(),
FPT->getParamTypes(), EPI));
// Build the parameter declarations.
SmallVector<ParmVarDecl *, 16> ParamDecls;
for (unsigned I = 0, N = FPT->getNumParams(); I != N; ++I) {
TypeSourceInfo *TInfo =
Context.getTrivialTypeSourceInfo(FPT->getParamType(I), UsingLoc);
ParmVarDecl *PD = ParmVarDecl::Create(
Context, DerivedCtor, UsingLoc, UsingLoc, /*IdentifierInfo=*/nullptr,
FPT->getParamType(I), TInfo, SC_None, /*DefaultArg=*/nullptr);
PD->setScopeInfo(0, I);
PD->setImplicit();
// Ensure attributes are propagated onto parameters (this matters for
// format, pass_object_size, ...).
mergeDeclAttributes(PD, BaseCtor->getParamDecl(I));
ParamDecls.push_back(PD);
ProtoLoc.setParam(I, PD);
}
// Set up the new constructor.
assert(!BaseCtor->isDeleted() && "should not use deleted constructor");
DerivedCtor->setAccess(BaseCtor->getAccess());
DerivedCtor->setParams(ParamDecls);
Derived->addDecl(DerivedCtor);
if (ShouldDeleteSpecialMember(DerivedCtor, CXXDefaultConstructor, &ICI))
SetDeclDeleted(DerivedCtor, UsingLoc);
return DerivedCtor;
}
void Sema::NoteDeletedInheritingConstructor(CXXConstructorDecl *Ctor) {
InheritedConstructorInfo ICI(*this, Ctor->getLocation(),
Ctor->getInheritedConstructor().getShadowDecl());
ShouldDeleteSpecialMember(Ctor, CXXDefaultConstructor, &ICI,
/*Diagnose*/true);
}
void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation,
CXXConstructorDecl *Constructor) {
CXXRecordDecl *ClassDecl = Constructor->getParent();
assert(Constructor->getInheritedConstructor() &&
!Constructor->doesThisDeclarationHaveABody() &&
!Constructor->isDeleted());
if (Constructor->willHaveBody() || Constructor->isInvalidDecl())
return;
// Initializations are performed "as if by a defaulted default constructor",
// so enter the appropriate scope.
SynthesizedFunctionScope Scope(*this, Constructor);
// The exception specification is needed because we are defining the
// function.
ResolveExceptionSpec(CurrentLocation,
Constructor->getType()->castAs<FunctionProtoType>());
MarkVTableUsed(CurrentLocation, ClassDecl);
// Add a context note for diagnostics produced after this point.
Scope.addContextNote(CurrentLocation);
ConstructorUsingShadowDecl *Shadow =
Constructor->getInheritedConstructor().getShadowDecl();
CXXConstructorDecl *InheritedCtor =
Constructor->getInheritedConstructor().getConstructor();
// [class.inhctor.init]p1:
// initialization proceeds as if a defaulted default constructor is used to
// initialize the D object and each base class subobject from which the
// constructor was inherited
InheritedConstructorInfo ICI(*this, CurrentLocation, Shadow);
CXXRecordDecl *RD = Shadow->getParent();
SourceLocation InitLoc = Shadow->getLocation();
// Build explicit initializers for all base classes from which the
// constructor was inherited.
SmallVector<CXXCtorInitializer*, 8> Inits;
for (bool VBase : {false, true}) {
for (CXXBaseSpecifier &B : VBase ? RD->vbases() : RD->bases()) {
if (B.isVirtual() != VBase)
continue;
auto *BaseRD = B.getType()->getAsCXXRecordDecl();
if (!BaseRD)
continue;
auto BaseCtor = ICI.findConstructorForBase(BaseRD, InheritedCtor);
if (!BaseCtor.first)
continue;
MarkFunctionReferenced(CurrentLocation, BaseCtor.first);
ExprResult Init = new (Context) CXXInheritedCtorInitExpr(
InitLoc, B.getType(), BaseCtor.first, VBase, BaseCtor.second);
auto *TInfo = Context.getTrivialTypeSourceInfo(B.getType(), InitLoc);
Inits.push_back(new (Context) CXXCtorInitializer(
Context, TInfo, VBase, InitLoc, Init.get(), InitLoc,
SourceLocation()));
}
}
// We now proceed as if for a defaulted default constructor, with the relevant
// initializers replaced.
if (SetCtorInitializers(Constructor, /*AnyErrors*/false, Inits)) {
Constructor->setInvalidDecl();
return;
}
Constructor->setBody(new (Context) CompoundStmt(InitLoc));
Constructor->markUsed(Context);
if (ASTMutationListener *L = getASTMutationListener()) {
L->CompletedImplicitDefinition(Constructor);
}
DiagnoseUninitializedFields(*this, Constructor);
}
CXXDestructorDecl *Sema::DeclareImplicitDestructor(CXXRecordDecl *ClassDecl) {
// C++ [class.dtor]p2:
// If a class has no user-declared destructor, a destructor is
// declared implicitly. An implicitly-declared destructor is an
// inline public member of its class.
assert(ClassDecl->needsImplicitDestructor());
DeclaringSpecialMember DSM(*this, ClassDecl, CXXDestructor);
if (DSM.isAlreadyBeingDeclared())
return nullptr;
// Create the actual destructor declaration.
CanQualType ClassType
= Context.getCanonicalType(Context.getTypeDeclType(ClassDecl));
SourceLocation ClassLoc = ClassDecl->getLocation();
DeclarationName Name
= Context.DeclarationNames.getCXXDestructorName(ClassType);
DeclarationNameInfo NameInfo(Name, ClassLoc);
CXXDestructorDecl *Destructor
= CXXDestructorDecl::Create(Context, ClassDecl, ClassLoc, NameInfo,
QualType(), nullptr, /*isInline=*/true,
/*isImplicitlyDeclared=*/true);
Destructor->setAccess(AS_public);
Destructor->setDefaulted();
if (getLangOpts().CUDA) {
inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXDestructor,
Destructor,
/* ConstRHS */ false,
/* Diagnose */ false);
}
// Build an exception specification pointing back at this destructor.
FunctionProtoType::ExtProtoInfo EPI = getImplicitMethodEPI(*this, Destructor);
Destructor->setType(Context.getFunctionType(Context.VoidTy, None, EPI));
// We don't need to use SpecialMemberIsTrivial here; triviality for
// destructors is easy to compute.
Destructor->setTrivial(ClassDecl->hasTrivialDestructor());
// Note that we have declared this destructor.
++ASTContext::NumImplicitDestructorsDeclared;
Scope *S = getScopeForContext(ClassDecl);
CheckImplicitSpecialMemberDeclaration(S, Destructor);
// We can't check whether an implicit destructor is deleted before we complete
// the definition of the class, because its validity depends on the alignment
// of the class. We'll check this from ActOnFields once the class is complete.
if (ClassDecl->isCompleteDefinition() &&
ShouldDeleteSpecialMember(Destructor, CXXDestructor))
SetDeclDeleted(Destructor, ClassLoc);
// Introduce this destructor into its scope.
if (S)
PushOnScopeChains(Destructor, S, false);
ClassDecl->addDecl(Destructor);
return Destructor;
}
void Sema::DefineImplicitDestructor(SourceLocation CurrentLocation,
CXXDestructorDecl *Destructor) {
assert((Destructor->isDefaulted() &&
!Destructor->doesThisDeclarationHaveABody() &&
!Destructor->isDeleted()) &&
"DefineImplicitDestructor - call it for implicit default dtor");
if (Destructor->willHaveBody() || Destructor->isInvalidDecl())
return;
CXXRecordDecl *ClassDecl = Destructor->getParent();
assert(ClassDecl && "DefineImplicitDestructor - invalid destructor");
SynthesizedFunctionScope Scope(*this, Destructor);
// The exception specification is needed because we are defining the
// function.
ResolveExceptionSpec(CurrentLocation,
Destructor->getType()->castAs<FunctionProtoType>());
MarkVTableUsed(CurrentLocation, ClassDecl);
// Add a context note for diagnostics produced after this point.
Scope.addContextNote(CurrentLocation);
MarkBaseAndMemberDestructorsReferenced(Destructor->getLocation(),
Destructor->getParent());
if (CheckDestructor(Destructor)) {
Destructor->setInvalidDecl();
return;
}
SourceLocation Loc = Destructor->getLocEnd().isValid()
? Destructor->getLocEnd()
: Destructor->getLocation();
Destructor->setBody(new (Context) CompoundStmt(Loc));
Destructor->markUsed(Context);
if (ASTMutationListener *L = getASTMutationListener()) {
L->CompletedImplicitDefinition(Destructor);
}
}
/// \brief Perform any semantic analysis which needs to be delayed until all
/// pending class member declarations have been parsed.
void Sema::ActOnFinishCXXMemberDecls() {
// If the context is an invalid C++ class, just suppress these checks.
if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(CurContext)) {
if (Record->isInvalidDecl()) {
DelayedDefaultedMemberExceptionSpecs.clear();
DelayedExceptionSpecChecks.clear();
return;
}
checkForMultipleExportedDefaultConstructors(*this, Record);
}
}
void Sema::ActOnFinishCXXNonNestedClass(Decl *D) {
referenceDLLExportedClassMethods();
}
void Sema::referenceDLLExportedClassMethods() {
if (!DelayedDllExportClasses.empty()) {
// Calling ReferenceDllExportedMethods might cause the current function to
// be called again, so use a local copy of DelayedDllExportClasses.
SmallVector<CXXRecordDecl *, 4> WorkList;
std::swap(DelayedDllExportClasses, WorkList);
for (CXXRecordDecl *Class : WorkList)
ReferenceDllExportedMethods(*this, Class);
}
}
void Sema::AdjustDestructorExceptionSpec(CXXRecordDecl *ClassDecl,
CXXDestructorDecl *Destructor) {
assert(getLangOpts().CPlusPlus11 &&
"adjusting dtor exception specs was introduced in c++11");
// C++11 [class.dtor]p3:
// A declaration of a destructor that does not have an exception-
// specification is implicitly considered to have the same exception-
// specification as an implicit declaration.
const FunctionProtoType *DtorType = Destructor->getType()->
getAs<FunctionProtoType>();
if (DtorType->hasExceptionSpec())
return;
// Replace the destructor's type, building off the existing one. Fortunately,
// the only thing of interest in the destructor type is its extended info.
// The return and arguments are fixed.
FunctionProtoType::ExtProtoInfo EPI = DtorType->getExtProtoInfo();
EPI.ExceptionSpec.Type = EST_Unevaluated;
EPI.ExceptionSpec.SourceDecl = Destructor;
Destructor->setType(Context.getFunctionType(Context.VoidTy, None, EPI));
// FIXME: If the destructor has a body that could throw, and the newly created
// spec doesn't allow exceptions, we should emit a warning, because this
// change in behavior can break conforming C++03 programs at runtime.
// However, we don't have a body or an exception specification yet, so it
// needs to be done somewhere else.
}
namespace {
/// \brief An abstract base class for all helper classes used in building the
// copy/move operators. These classes serve as factory functions and help us
// avoid using the same Expr* in the AST twice.
class ExprBuilder {
ExprBuilder(const ExprBuilder&) = delete;
ExprBuilder &operator=(const ExprBuilder&) = delete;
protected:
static Expr *assertNotNull(Expr *E) {
assert(E && "Expression construction must not fail.");
return E;
}
public:
ExprBuilder() {}
virtual ~ExprBuilder() {}
virtual Expr *build(Sema &S, SourceLocation Loc) const = 0;
};
class RefBuilder: public ExprBuilder {
VarDecl *Var;
QualType VarType;
public:
Expr *build(Sema &S, SourceLocation Loc) const override {
return assertNotNull(S.BuildDeclRefExpr(Var, VarType, VK_LValue, Loc).get());
}
RefBuilder(VarDecl *Var, QualType VarType)
: Var(Var), VarType(VarType) {}
};
class ThisBuilder: public ExprBuilder {
public:
Expr *build(Sema &S, SourceLocation Loc) const override {
return assertNotNull(S.ActOnCXXThis(Loc).getAs<Expr>());
}
};
class CastBuilder: public ExprBuilder {
const ExprBuilder &Builder;
QualType Type;
ExprValueKind Kind;
const CXXCastPath &Path;
public:
Expr *build(Sema &S, SourceLocation Loc) const override {
return assertNotNull(S.ImpCastExprToType(Builder.build(S, Loc), Type,
CK_UncheckedDerivedToBase, Kind,
&Path).get());
}
CastBuilder(const ExprBuilder &Builder, QualType Type, ExprValueKind Kind,
const CXXCastPath &Path)
: Builder(Builder), Type(Type), Kind(Kind), Path(Path) {}
};
class DerefBuilder: public ExprBuilder {
const ExprBuilder &Builder;
public:
Expr *build(Sema &S, SourceLocation Loc) const override {
return assertNotNull(
S.CreateBuiltinUnaryOp(Loc, UO_Deref, Builder.build(S, Loc)).get());
}
DerefBuilder(const ExprBuilder &Builder) : Builder(Builder) {}
};
class MemberBuilder: public ExprBuilder {
const ExprBuilder &Builder;
QualType Type;
CXXScopeSpec SS;
bool IsArrow;
LookupResult &MemberLookup;
public:
Expr *build(Sema &S, SourceLocation Loc) const override {
return assertNotNull(S.BuildMemberReferenceExpr(
Builder.build(S, Loc), Type, Loc, IsArrow, SS, SourceLocation(),
nullptr, MemberLookup, nullptr, nullptr).get());
}
MemberBuilder(const ExprBuilder &Builder, QualType Type, bool IsArrow,
LookupResult &MemberLookup)
: Builder(Builder), Type(Type), IsArrow(IsArrow),
MemberLookup(MemberLookup) {}
};
class MoveCastBuilder: public ExprBuilder {
const ExprBuilder &Builder;
public:
Expr *build(Sema &S, SourceLocation Loc) const override {
return assertNotNull(CastForMoving(S, Builder.build(S, Loc)));
}
MoveCastBuilder(const ExprBuilder &Builder) : Builder(Builder) {}
};
class LvalueConvBuilder: public ExprBuilder {
const ExprBuilder &Builder;
public:
Expr *build(Sema &S, SourceLocation Loc) const override {
return assertNotNull(
S.DefaultLvalueConversion(Builder.build(S, Loc)).get());
}
LvalueConvBuilder(const ExprBuilder &Builder) : Builder(Builder) {}
};
class SubscriptBuilder: public ExprBuilder {
const ExprBuilder &Base;
const ExprBuilder &Index;
public:
Expr *build(Sema &S, SourceLocation Loc) const override {
return assertNotNull(S.CreateBuiltinArraySubscriptExpr(
Base.build(S, Loc), Loc, Index.build(S, Loc), Loc).get());
}
SubscriptBuilder(const ExprBuilder &Base, const ExprBuilder &Index)
: Base(Base), Index(Index) {}
};
} // end anonymous namespace
/// When generating a defaulted copy or move assignment operator, if a field
/// should be copied with __builtin_memcpy rather than via explicit assignments,
/// do so. This optimization only applies for arrays of scalars, and for arrays
/// of class type where the selected copy/move-assignment operator is trivial.
static StmtResult
buildMemcpyForAssignmentOp(Sema &S, SourceLocation Loc, QualType T,
const ExprBuilder &ToB, const ExprBuilder &FromB) {
// Compute the size of the memory buffer to be copied.
QualType SizeType = S.Context.getSizeType();
llvm::APInt Size(S.Context.getTypeSize(SizeType),
S.Context.getTypeSizeInChars(T).getQuantity());
// Take the address of the field references for "from" and "to". We
// directly construct UnaryOperators here because semantic analysis
// does not permit us to take the address of an xvalue.
Expr *From = FromB.build(S, Loc);
From = new (S.Context) UnaryOperator(From, UO_AddrOf,
S.Context.getPointerType(From->getType()),
VK_RValue, OK_Ordinary, Loc);
Expr *To = ToB.build(S, Loc);
To = new (S.Context) UnaryOperator(To, UO_AddrOf,
S.Context.getPointerType(To->getType()),
VK_RValue, OK_Ordinary, Loc);
const Type *E = T->getBaseElementTypeUnsafe();
bool NeedsCollectableMemCpy =
E->isRecordType() && E->getAs<RecordType>()->getDecl()->hasObjectMember();
// Create a reference to the __builtin_objc_memmove_collectable function
StringRef MemCpyName = NeedsCollectableMemCpy ?
"__builtin_objc_memmove_collectable" :
"__builtin_memcpy";
LookupResult R(S, &S.Context.Idents.get(MemCpyName), Loc,
Sema::LookupOrdinaryName);
S.LookupName(R, S.TUScope, true);
FunctionDecl *MemCpy = R.getAsSingle<FunctionDecl>();
if (!MemCpy)
// Something went horribly wrong earlier, and we will have complained
// about it.
return StmtError();
ExprResult MemCpyRef = S.BuildDeclRefExpr(MemCpy, S.Context.BuiltinFnTy,
VK_RValue, Loc, nullptr);
assert(MemCpyRef.isUsable() && "Builtin reference cannot fail");
Expr *CallArgs[] = {
To, From, IntegerLiteral::Create(S.Context, Size, SizeType, Loc)
};
ExprResult Call = S.ActOnCallExpr(/*Scope=*/nullptr, MemCpyRef.get(),
Loc, CallArgs, Loc);
assert(!Call.isInvalid() && "Call to __builtin_memcpy cannot fail!");
return Call.getAs<Stmt>();
}
/// \brief Builds a statement that copies/moves the given entity from \p From to
/// \c To.
///
/// This routine is used to copy/move the members of a class with an
/// implicitly-declared copy/move assignment operator. When the entities being
/// copied are arrays, this routine builds for loops to copy them.
///
/// \param S The Sema object used for type-checking.
///
/// \param Loc The location where the implicit copy/move is being generated.
///
/// \param T The type of the expressions being copied/moved. Both expressions
/// must have this type.
///
/// \param To The expression we are copying/moving to.
///
/// \param From The expression we are copying/moving from.
///
/// \param CopyingBaseSubobject Whether we're copying/moving a base subobject.
/// Otherwise, it's a non-static member subobject.
///
/// \param Copying Whether we're copying or moving.
///
/// \param Depth Internal parameter recording the depth of the recursion.
///
/// \returns A statement or a loop that copies the expressions, or StmtResult(0)
/// if a memcpy should be used instead.
static StmtResult
buildSingleCopyAssignRecursively(Sema &S, SourceLocation Loc, QualType T,
const ExprBuilder &To, const ExprBuilder &From,
bool CopyingBaseSubobject, bool Copying,
unsigned Depth = 0) {
// C++11 [class.copy]p28:
// Each subobject is assigned in the manner appropriate to its type:
//
// - if the subobject is of class type, as if by a call to operator= with
// the subobject as the object expression and the corresponding
// subobject of x as a single function argument (as if by explicit
// qualification; that is, ignoring any possible virtual overriding
// functions in more derived classes);
//
// C++03 [class.copy]p13:
// - if the subobject is of class type, the copy assignment operator for
// the class is used (as if by explicit qualification; that is,
// ignoring any possible virtual overriding functions in more derived
// classes);
if (const RecordType *RecordTy = T->getAs<RecordType>()) {
CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(RecordTy->getDecl());
// Look for operator=.
DeclarationName Name
= S.Context.DeclarationNames.getCXXOperatorName(OO_Equal);
LookupResult OpLookup(S, Name, Loc, Sema::LookupOrdinaryName);
S.LookupQualifiedName(OpLookup, ClassDecl, false);
// Prior to C++11, filter out any result that isn't a copy/move-assignment
// operator.
if (!S.getLangOpts().CPlusPlus11) {
LookupResult::Filter F = OpLookup.makeFilter();
while (F.hasNext()) {
NamedDecl *D = F.next();
if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D))
if (Method->isCopyAssignmentOperator() ||
(!Copying && Method->isMoveAssignmentOperator()))
continue;
F.erase();
}
F.done();
}
// Suppress the protected check (C++ [class.protected]) for each of the
// assignment operators we found. This strange dance is required when
// we're assigning via a base classes's copy-assignment operator. To
// ensure that we're getting the right base class subobject (without
// ambiguities), we need to cast "this" to that subobject type; to
// ensure that we don't go through the virtual call mechanism, we need
// to qualify the operator= name with the base class (see below). However,
// this means that if the base class has a protected copy assignment
// operator, the protected member access check will fail. So, we
// rewrite "protected" access to "public" access in this case, since we
// know by construction that we're calling from a derived class.
if (CopyingBaseSubobject) {
for (LookupResult::iterator L = OpLookup.begin(), LEnd = OpLookup.end();
L != LEnd; ++L) {
if (L.getAccess() == AS_protected)
L.setAccess(AS_public);
}
}
// Create the nested-name-specifier that will be used to qualify the
// reference to operator=; this is required to suppress the virtual
// call mechanism.
CXXScopeSpec SS;
const Type *CanonicalT = S.Context.getCanonicalType(T.getTypePtr());
SS.MakeTrivial(S.Context,
NestedNameSpecifier::Create(S.Context, nullptr, false,
CanonicalT),
Loc);
// Create the reference to operator=.
ExprResult OpEqualRef
= S.BuildMemberReferenceExpr(To.build(S, Loc), T, Loc, /*isArrow=*/false,
SS, /*TemplateKWLoc=*/SourceLocation(),
/*FirstQualifierInScope=*/nullptr,
OpLookup,
/*TemplateArgs=*/nullptr, /*S*/nullptr,
/*SuppressQualifierCheck=*/true);
if (OpEqualRef.isInvalid())
return StmtError();
// Build the call to the assignment operator.
Expr *FromInst = From.build(S, Loc);
ExprResult Call = S.BuildCallToMemberFunction(/*Scope=*/nullptr,
OpEqualRef.getAs<Expr>(),
Loc, FromInst, Loc);
if (Call.isInvalid())
return StmtError();
// If we built a call to a trivial 'operator=' while copying an array,
// bail out. We'll replace the whole shebang with a memcpy.
CXXMemberCallExpr *CE = dyn_cast<CXXMemberCallExpr>(Call.get());
if (CE && CE->getMethodDecl()->isTrivial() && Depth)
return StmtResult((Stmt*)nullptr);
// Convert to an expression-statement, and clean up any produced
// temporaries.
return S.ActOnExprStmt(Call);
}
// - if the subobject is of scalar type, the built-in assignment
// operator is used.
const ConstantArrayType *ArrayTy = S.Context.getAsConstantArrayType(T);
if (!ArrayTy) {
ExprResult Assignment = S.CreateBuiltinBinOp(
Loc, BO_Assign, To.build(S, Loc), From.build(S, Loc));
if (Assignment.isInvalid())
return StmtError();
return S.ActOnExprStmt(Assignment);
}
// - if the subobject is an array, each element is assigned, in the
// manner appropriate to the element type;
// Construct a loop over the array bounds, e.g.,
//
// for (__SIZE_TYPE__ i0 = 0; i0 != array-size; ++i0)
//
// that will copy each of the array elements.
QualType SizeType = S.Context.getSizeType();
// Create the iteration variable.
IdentifierInfo *IterationVarName = nullptr;
{
SmallString<8> Str;
llvm::raw_svector_ostream OS(Str);
OS << "__i" << Depth;
IterationVarName = &S.Context.Idents.get(OS.str());
}
VarDecl *IterationVar = VarDecl::Create(S.Context, S.CurContext, Loc, Loc,
IterationVarName, SizeType,
S.Context.getTrivialTypeSourceInfo(SizeType, Loc),
SC_None);
// Initialize the iteration variable to zero.
llvm::APInt Zero(S.Context.getTypeSize(SizeType), 0);
IterationVar->setInit(IntegerLiteral::Create(S.Context, Zero, SizeType, Loc));
// Creates a reference to the iteration variable.
RefBuilder IterationVarRef(IterationVar, SizeType);
LvalueConvBuilder IterationVarRefRVal(IterationVarRef);
// Create the DeclStmt that holds the iteration variable.
Stmt *InitStmt = new (S.Context) DeclStmt(DeclGroupRef(IterationVar),Loc,Loc);
// Subscript the "from" and "to" expressions with the iteration variable.
SubscriptBuilder FromIndexCopy(From, IterationVarRefRVal);
MoveCastBuilder FromIndexMove(FromIndexCopy);
const ExprBuilder *FromIndex;
if (Copying)
FromIndex = &FromIndexCopy;
else
FromIndex = &FromIndexMove;
SubscriptBuilder ToIndex(To, IterationVarRefRVal);
// Build the copy/move for an individual element of the array.
StmtResult Copy =
buildSingleCopyAssignRecursively(S, Loc, ArrayTy->getElementType(),
ToIndex, *FromIndex, CopyingBaseSubobject,
Copying, Depth + 1);
// Bail out if copying fails or if we determined that we should use memcpy.
if (Copy.isInvalid() || !Copy.get())
return Copy;
// Create the comparison against the array bound.
llvm::APInt Upper
= ArrayTy->getSize().zextOrTrunc(S.Context.getTypeSize(SizeType));
Expr *Comparison
= new (S.Context) BinaryOperator(IterationVarRefRVal.build(S, Loc),
IntegerLiteral::Create(S.Context, Upper, SizeType, Loc),
BO_NE, S.Context.BoolTy,
VK_RValue, OK_Ordinary, Loc, FPOptions());
// Create the pre-increment of the iteration variable.
Expr *Increment
= new (S.Context) UnaryOperator(IterationVarRef.build(S, Loc), UO_PreInc,
SizeType, VK_LValue, OK_Ordinary, Loc);
// Construct the loop that copies all elements of this array.
return S.ActOnForStmt(
Loc, Loc, InitStmt,
S.ActOnCondition(nullptr, Loc, Comparison, Sema::ConditionKind::Boolean),
S.MakeFullDiscardedValueExpr(Increment), Loc, Copy.get());
}
static StmtResult
buildSingleCopyAssign(Sema &S, SourceLocation Loc, QualType T,
const ExprBuilder &To, const ExprBuilder &From,
bool CopyingBaseSubobject, bool Copying) {
// Maybe we should use a memcpy?
if (T->isArrayType() && !T.isConstQualified() && !T.isVolatileQualified() &&
T.isTriviallyCopyableType(S.Context))
return buildMemcpyForAssignmentOp(S, Loc, T, To, From);
StmtResult Result(buildSingleCopyAssignRecursively(S, Loc, T, To, From,
CopyingBaseSubobject,
Copying, 0));
// If we ended up picking a trivial assignment operator for an array of a
// non-trivially-copyable class type, just emit a memcpy.
if (!Result.isInvalid() && !Result.get())
return buildMemcpyForAssignmentOp(S, Loc, T, To, From);
return Result;
}
CXXMethodDecl *Sema::DeclareImplicitCopyAssignment(CXXRecordDecl *ClassDecl) {
// Note: The following rules are largely analoguous to the copy
// constructor rules. Note that virtual bases are not taken into account
// for determining the argument type of the operator. Note also that
// operators taking an object instead of a reference are allowed.
assert(ClassDecl->needsImplicitCopyAssignment());
DeclaringSpecialMember DSM(*this, ClassDecl, CXXCopyAssignment);
if (DSM.isAlreadyBeingDeclared())
return nullptr;
QualType ArgType = Context.getTypeDeclType(ClassDecl);
QualType RetType = Context.getLValueReferenceType(ArgType);
bool Const = ClassDecl->implicitCopyAssignmentHasConstParam();
if (Const)
ArgType = ArgType.withConst();
ArgType = Context.getLValueReferenceType(ArgType);
bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl,
CXXCopyAssignment,
Const);
// An implicitly-declared copy assignment operator is an inline public
// member of its class.
DeclarationName Name = Context.DeclarationNames.getCXXOperatorName(OO_Equal);
SourceLocation ClassLoc = ClassDecl->getLocation();
DeclarationNameInfo NameInfo(Name, ClassLoc);
CXXMethodDecl *CopyAssignment =
CXXMethodDecl::Create(Context, ClassDecl, ClassLoc, NameInfo, QualType(),
/*TInfo=*/nullptr, /*StorageClass=*/SC_None,
/*isInline=*/true, Constexpr, SourceLocation());
CopyAssignment->setAccess(AS_public);
CopyAssignment->setDefaulted();
CopyAssignment->setImplicit();
if (getLangOpts().CUDA) {
inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXCopyAssignment,
CopyAssignment,
/* ConstRHS */ Const,
/* Diagnose */ false);
}
// Build an exception specification pointing back at this member.
FunctionProtoType::ExtProtoInfo EPI =
getImplicitMethodEPI(*this, CopyAssignment);
CopyAssignment->setType(Context.getFunctionType(RetType, ArgType, EPI));
// Add the parameter to the operator.
ParmVarDecl *FromParam = ParmVarDecl::Create(Context, CopyAssignment,
ClassLoc, ClassLoc,
/*Id=*/nullptr, ArgType,
/*TInfo=*/nullptr, SC_None,
nullptr);
CopyAssignment->setParams(FromParam);
CopyAssignment->setTrivial(
ClassDecl->needsOverloadResolutionForCopyAssignment()
? SpecialMemberIsTrivial(CopyAssignment, CXXCopyAssignment)
: ClassDecl->hasTrivialCopyAssignment());
// Note that we have added this copy-assignment operator.
++ASTContext::NumImplicitCopyAssignmentOperatorsDeclared;
Scope *S = getScopeForContext(ClassDecl);
CheckImplicitSpecialMemberDeclaration(S, CopyAssignment);
if (ShouldDeleteSpecialMember(CopyAssignment, CXXCopyAssignment))
SetDeclDeleted(CopyAssignment, ClassLoc);
if (S)
PushOnScopeChains(CopyAssignment, S, false);
ClassDecl->addDecl(CopyAssignment);
return CopyAssignment;
}
/// Diagnose an implicit copy operation for a class which is odr-used, but
/// which is deprecated because the class has a user-declared copy constructor,
/// copy assignment operator, or destructor.
static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp) {
assert(CopyOp->isImplicit());
CXXRecordDecl *RD = CopyOp->getParent();
CXXMethodDecl *UserDeclaredOperation = nullptr;
// In Microsoft mode, assignment operations don't affect constructors and
// vice versa.
if (RD->hasUserDeclaredDestructor()) {
UserDeclaredOperation = RD->getDestructor();
} else if (!isa<CXXConstructorDecl>(CopyOp) &&
RD->hasUserDeclaredCopyConstructor() &&
!S.getLangOpts().MSVCCompat) {
// Find any user-declared copy constructor.
for (auto *I : RD->ctors()) {
if (I->isCopyConstructor()) {
UserDeclaredOperation = I;
break;
}
}
assert(UserDeclaredOperation);
} else if (isa<CXXConstructorDecl>(CopyOp) &&
RD->hasUserDeclaredCopyAssignment() &&
!S.getLangOpts().MSVCCompat) {
// Find any user-declared move assignment operator.
for (auto *I : RD->methods()) {
if (I->isCopyAssignmentOperator()) {
UserDeclaredOperation = I;
break;
}
}
assert(UserDeclaredOperation);
}
if (UserDeclaredOperation) {
S.Diag(UserDeclaredOperation->getLocation(),
diag::warn_deprecated_copy_operation)
<< RD << /*copy assignment*/!isa<CXXConstructorDecl>(CopyOp)
<< /*destructor*/isa<CXXDestructorDecl>(UserDeclaredOperation);
}
}
void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation,
CXXMethodDecl *CopyAssignOperator) {
assert((CopyAssignOperator->isDefaulted() &&
CopyAssignOperator->isOverloadedOperator() &&
CopyAssignOperator->getOverloadedOperator() == OO_Equal &&
!CopyAssignOperator->doesThisDeclarationHaveABody() &&
!CopyAssignOperator->isDeleted()) &&
"DefineImplicitCopyAssignment called for wrong function");
if (CopyAssignOperator->willHaveBody() || CopyAssignOperator->isInvalidDecl())
return;
CXXRecordDecl *ClassDecl = CopyAssignOperator->getParent();
if (ClassDecl->isInvalidDecl()) {
CopyAssignOperator->setInvalidDecl();
return;
}
SynthesizedFunctionScope Scope(*this, CopyAssignOperator);
// The exception specification is needed because we are defining the
// function.
ResolveExceptionSpec(CurrentLocation,
CopyAssignOperator->getType()->castAs<FunctionProtoType>());
// Add a context note for diagnostics produced after this point.
Scope.addContextNote(CurrentLocation);
// C++11 [class.copy]p18:
// The [definition of an implicitly declared copy assignment operator] is
// deprecated if the class has a user-declared copy constructor or a
// user-declared destructor.
if (getLangOpts().CPlusPlus11 && CopyAssignOperator->isImplicit())
diagnoseDeprecatedCopyOperation(*this, CopyAssignOperator);
// C++0x [class.copy]p30:
// The implicitly-defined or explicitly-defaulted copy assignment operator
// for a non-union class X performs memberwise copy assignment of its
// subobjects. The direct base classes of X are assigned first, in the
// order of their declaration in the base-specifier-list, and then the
// immediate non-static data members of X are assigned, in the order in
// which they were declared in the class definition.
// The statements that form the synthesized function body.
SmallVector<Stmt*, 8> Statements;
// The parameter for the "other" object, which we are copying from.
ParmVarDecl *Other = CopyAssignOperator->getParamDecl(0);
Qualifiers OtherQuals = Other->getType().getQualifiers();
QualType OtherRefType = Other->getType();
if (const LValueReferenceType *OtherRef
= OtherRefType->getAs<LValueReferenceType>()) {
OtherRefType = OtherRef->getPointeeType();
OtherQuals = OtherRefType.getQualifiers();
}
// Our location for everything implicitly-generated.
SourceLocation Loc = CopyAssignOperator->getLocEnd().isValid()
? CopyAssignOperator->getLocEnd()
: CopyAssignOperator->getLocation();
// Builds a DeclRefExpr for the "other" object.
RefBuilder OtherRef(Other, OtherRefType);
// Builds the "this" pointer.
ThisBuilder This;
// Assign base classes.
bool Invalid = false;
for (auto &Base : ClassDecl->bases()) {
// Form the assignment:
// static_cast<Base*>(this)->Base::operator=(static_cast<Base&>(other));
QualType BaseType = Base.getType().getUnqualifiedType();
if (!BaseType->isRecordType()) {
Invalid = true;
continue;
}
CXXCastPath BasePath;
BasePath.push_back(&Base);
// Construct the "from" expression, which is an implicit cast to the
// appropriately-qualified base type.
CastBuilder From(OtherRef, Context.getQualifiedType(BaseType, OtherQuals),
VK_LValue, BasePath);
// Dereference "this".
DerefBuilder DerefThis(This);
CastBuilder To(DerefThis,
Context.getCVRQualifiedType(
BaseType, CopyAssignOperator->getTypeQualifiers()),
VK_LValue, BasePath);
// Build the copy.
StmtResult Copy = buildSingleCopyAssign(*this, Loc, BaseType,
To, From,
/*CopyingBaseSubobject=*/true,
/*Copying=*/true);
if (Copy.isInvalid()) {
CopyAssignOperator->setInvalidDecl();
return;
}
// Success! Record the copy.
Statements.push_back(Copy.getAs<Expr>());
}
// Assign non-static members.
for (auto *Field : ClassDecl->fields()) {
// FIXME: We should form some kind of AST representation for the implied
// memcpy in a union copy operation.
if (Field->isUnnamedBitfield() || Field->getParent()->isUnion())
continue;
if (Field->isInvalidDecl()) {
Invalid = true;
continue;
}
// Check for members of reference type; we can't copy those.
if (Field->getType()->isReferenceType()) {
Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
<< Context.getTagDeclType(ClassDecl) << 0 << Field->getDeclName();
Diag(Field->getLocation(), diag::note_declared_at);
Invalid = true;
continue;
}
// Check for members of const-qualified, non-class type.
QualType BaseType = Context.getBaseElementType(Field->getType());
if (!BaseType->getAs<RecordType>() && BaseType.isConstQualified()) {
Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
<< Context.getTagDeclType(ClassDecl) << 1 << Field->getDeclName();
Diag(Field->getLocation(), diag::note_declared_at);
Invalid = true;
continue;
}
// Suppress assigning zero-width bitfields.
if (Field->isBitField() && Field->getBitWidthValue(Context) == 0)
continue;
QualType FieldType = Field->getType().getNonReferenceType();
if (FieldType->isIncompleteArrayType()) {
assert(ClassDecl->hasFlexibleArrayMember() &&
"Incomplete array type is not valid");
continue;
}
// Build references to the field in the object we're copying from and to.
CXXScopeSpec SS; // Intentionally empty
LookupResult MemberLookup(*this, Field->getDeclName(), Loc,
LookupMemberName);
MemberLookup.addDecl(Field);
MemberLookup.resolveKind();
MemberBuilder From(OtherRef, OtherRefType, /*IsArrow=*/false, MemberLookup);
MemberBuilder To(This, getCurrentThisType(), /*IsArrow=*/true, MemberLookup);
// Build the copy of this field.
StmtResult Copy = buildSingleCopyAssign(*this, Loc, FieldType,
To, From,
/*CopyingBaseSubobject=*/false,
/*Copying=*/true);
if (Copy.isInvalid()) {
CopyAssignOperator->setInvalidDecl();
return;
}
// Success! Record the copy.
Statements.push_back(Copy.getAs<Stmt>());
}
if (!Invalid) {
// Add a "return *this;"
ExprResult ThisObj = CreateBuiltinUnaryOp(Loc, UO_Deref, This.build(*this, Loc));
StmtResult Return = BuildReturnStmt(Loc, ThisObj.get());
if (Return.isInvalid())
Invalid = true;
else
Statements.push_back(Return.getAs<Stmt>());
}
if (Invalid) {
CopyAssignOperator->setInvalidDecl();
return;
}
StmtResult Body;
{
CompoundScopeRAII CompoundScope(*this);
Body = ActOnCompoundStmt(Loc, Loc, Statements,
/*isStmtExpr=*/false);
assert(!Body.isInvalid() && "Compound statement creation cannot fail");
}
CopyAssignOperator->setBody(Body.getAs<Stmt>());
CopyAssignOperator->markUsed(Context);
if (ASTMutationListener *L = getASTMutationListener()) {
L->CompletedImplicitDefinition(CopyAssignOperator);
}
}
CXXMethodDecl *Sema::DeclareImplicitMoveAssignment(CXXRecordDecl *ClassDecl) {
assert(ClassDecl->needsImplicitMoveAssignment());
DeclaringSpecialMember DSM(*this, ClassDecl, CXXMoveAssignment);
if (DSM.isAlreadyBeingDeclared())
return nullptr;
// Note: The following rules are largely analoguous to the move
// constructor rules.
QualType ArgType = Context.getTypeDeclType(ClassDecl);
QualType RetType = Context.getLValueReferenceType(ArgType);
ArgType = Context.getRValueReferenceType(ArgType);
bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl,
CXXMoveAssignment,
false);
// An implicitly-declared move assignment operator is an inline public
// member of its class.
DeclarationName Name = Context.DeclarationNames.getCXXOperatorName(OO_Equal);
SourceLocation ClassLoc = ClassDecl->getLocation();
DeclarationNameInfo NameInfo(Name, ClassLoc);
CXXMethodDecl *MoveAssignment =
CXXMethodDecl::Create(Context, ClassDecl, ClassLoc, NameInfo, QualType(),
/*TInfo=*/nullptr, /*StorageClass=*/SC_None,
/*isInline=*/true, Constexpr, SourceLocation());
MoveAssignment->setAccess(AS_public);
MoveAssignment->setDefaulted();
MoveAssignment->setImplicit();
if (getLangOpts().CUDA) {
inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXMoveAssignment,
MoveAssignment,
/* ConstRHS */ false,
/* Diagnose */ false);
}
// Build an exception specification pointing back at this member.
FunctionProtoType::ExtProtoInfo EPI =
getImplicitMethodEPI(*this, MoveAssignment);
MoveAssignment->setType(Context.getFunctionType(RetType, ArgType, EPI));
// Add the parameter to the operator.
ParmVarDecl *FromParam = ParmVarDecl::Create(Context, MoveAssignment,
ClassLoc, ClassLoc,
/*Id=*/nullptr, ArgType,
/*TInfo=*/nullptr, SC_None,
nullptr);
MoveAssignment->setParams(FromParam);
MoveAssignment->setTrivial(
ClassDecl->needsOverloadResolutionForMoveAssignment()
? SpecialMemberIsTrivial(MoveAssignment, CXXMoveAssignment)
: ClassDecl->hasTrivialMoveAssignment());
// Note that we have added this copy-assignment operator.
++ASTContext::NumImplicitMoveAssignmentOperatorsDeclared;
Scope *S = getScopeForContext(ClassDecl);
CheckImplicitSpecialMemberDeclaration(S, MoveAssignment);
if (ShouldDeleteSpecialMember(MoveAssignment, CXXMoveAssignment)) {
ClassDecl->setImplicitMoveAssignmentIsDeleted();
SetDeclDeleted(MoveAssignment, ClassLoc);
}
if (S)
PushOnScopeChains(MoveAssignment, S, false);
ClassDecl->addDecl(MoveAssignment);
return MoveAssignment;
}
/// Check if we're implicitly defining a move assignment operator for a class
/// with virtual bases. Such a move assignment might move-assign the virtual
/// base multiple times.
static void checkMoveAssignmentForRepeatedMove(Sema &S, CXXRecordDecl *Class,
SourceLocation CurrentLocation) {
assert(!Class->isDependentContext() && "should not define dependent move");
// Only a virtual base could get implicitly move-assigned multiple times.
// Only a non-trivial move assignment can observe this. We only want to
// diagnose if we implicitly define an assignment operator that assigns
// two base classes, both of which move-assign the same virtual base.
if (Class->getNumVBases() == 0 || Class->hasTrivialMoveAssignment() ||
Class->getNumBases() < 2)
return;
llvm::SmallVector<CXXBaseSpecifier *, 16> Worklist;
typedef llvm::DenseMap<CXXRecordDecl*, CXXBaseSpecifier*> VBaseMap;
VBaseMap VBases;
for (auto &BI : Class->bases()) {
Worklist.push_back(&BI);
while (!Worklist.empty()) {
CXXBaseSpecifier *BaseSpec = Worklist.pop_back_val();
CXXRecordDecl *Base = BaseSpec->getType()->getAsCXXRecordDecl();
// If the base has no non-trivial move assignment operators,
// we don't care about moves from it.
if (!Base->hasNonTrivialMoveAssignment())
continue;
// If there's nothing virtual here, skip it.
if (!BaseSpec->isVirtual() && !Base->getNumVBases())
continue;
// If we're not actually going to call a move assignment for this base,
// or the selected move assignment is trivial, skip it.
Sema::SpecialMemberOverloadResult SMOR =
S.LookupSpecialMember(Base, Sema::CXXMoveAssignment,
/*ConstArg*/false, /*VolatileArg*/false,
/*RValueThis*/true, /*ConstThis*/false,
/*VolatileThis*/false);
if (!SMOR.getMethod() || SMOR.getMethod()->isTrivial() ||
!SMOR.getMethod()->isMoveAssignmentOperator())
continue;
if (BaseSpec->isVirtual()) {
// We're going to move-assign this virtual base, and its move
// assignment operator is not trivial. If this can happen for
// multiple distinct direct bases of Class, diagnose it. (If it
// only happens in one base, we'll diagnose it when synthesizing
// that base class's move assignment operator.)
CXXBaseSpecifier *&Existing =
VBases.insert(std::make_pair(Base->getCanonicalDecl(), &BI))
.first->second;
if (Existing && Existing != &BI) {
S.Diag(CurrentLocation, diag::warn_vbase_moved_multiple_times)
<< Class << Base;
S.Diag(Existing->getLocStart(), diag::note_vbase_moved_here)
<< (Base->getCanonicalDecl() ==
Existing->getType()->getAsCXXRecordDecl()->getCanonicalDecl())
<< Base << Existing->getType() << Existing->getSourceRange();
S.Diag(BI.getLocStart(), diag::note_vbase_moved_here)
<< (Base->getCanonicalDecl() ==
BI.getType()->getAsCXXRecordDecl()->getCanonicalDecl())
<< Base << BI.getType() << BaseSpec->getSourceRange();
// Only diagnose each vbase once.
Existing = nullptr;
}
} else {
// Only walk over bases that have defaulted move assignment operators.
// We assume that any user-provided move assignment operator handles
// the multiple-moves-of-vbase case itself somehow.
if (!SMOR.getMethod()->isDefaulted())
continue;
// We're going to move the base classes of Base. Add them to the list.
for (auto &BI : Base->bases())
Worklist.push_back(&BI);
}
}
}
}
void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
CXXMethodDecl *MoveAssignOperator) {
assert((MoveAssignOperator->isDefaulted() &&
MoveAssignOperator->isOverloadedOperator() &&
MoveAssignOperator->getOverloadedOperator() == OO_Equal &&
!MoveAssignOperator->doesThisDeclarationHaveABody() &&
!MoveAssignOperator->isDeleted()) &&
"DefineImplicitMoveAssignment called for wrong function");
if (MoveAssignOperator->willHaveBody() || MoveAssignOperator->isInvalidDecl())
return;
CXXRecordDecl *ClassDecl = MoveAssignOperator->getParent();
if (ClassDecl->isInvalidDecl()) {
MoveAssignOperator->setInvalidDecl();
return;
}
// C++0x [class.copy]p28:
// The implicitly-defined or move assignment operator for a non-union class
// X performs memberwise move assignment of its subobjects. The direct base
// classes of X are assigned first, in the order of their declaration in the
// base-specifier-list, and then the immediate non-static data members of X
// are assigned, in the order in which they were declared in the class
// definition.
// Issue a warning if our implicit move assignment operator will move
// from a virtual base more than once.
checkMoveAssignmentForRepeatedMove(*this, ClassDecl, CurrentLocation);
SynthesizedFunctionScope Scope(*this, MoveAssignOperator);
// The exception specification is needed because we are defining the
// function.
ResolveExceptionSpec(CurrentLocation,
MoveAssignOperator->getType()->castAs<FunctionProtoType>());
// Add a context note for diagnostics produced after this point.
Scope.addContextNote(CurrentLocation);
// The statements that form the synthesized function body.
SmallVector<Stmt*, 8> Statements;
// The parameter for the "other" object, which we are move from.
ParmVarDecl *Other = MoveAssignOperator->getParamDecl(0);
QualType OtherRefType = Other->getType()->
getAs<RValueReferenceType>()->getPointeeType();
assert(!OtherRefType.getQualifiers() &&
"Bad argument type of defaulted move assignment");
// Our location for everything implicitly-generated.
SourceLocation Loc = MoveAssignOperator->getLocEnd().isValid()
? MoveAssignOperator->getLocEnd()
: MoveAssignOperator->getLocation();
// Builds a reference to the "other" object.
RefBuilder OtherRef(Other, OtherRefType);
// Cast to rvalue.
MoveCastBuilder MoveOther(OtherRef);
// Builds the "this" pointer.
ThisBuilder This;
// Assign base classes.
bool Invalid = false;
for (auto &Base : ClassDecl->bases()) {
// C++11 [class.copy]p28:
// It is unspecified whether subobjects representing virtual base classes
// are assigned more than once by the implicitly-defined copy assignment
// operator.
// FIXME: Do not assign to a vbase that will be assigned by some other base
// class. For a move-assignment, this can result in the vbase being moved
// multiple times.
// Form the assignment:
// static_cast<Base*>(this)->Base::operator=(static_cast<Base&&>(other));
QualType BaseType = Base.getType().getUnqualifiedType();
if (!BaseType->isRecordType()) {
Invalid = true;
continue;
}
CXXCastPath BasePath;
BasePath.push_back(&Base);
// Construct the "from" expression, which is an implicit cast to the
// appropriately-qualified base type.
CastBuilder From(OtherRef, BaseType, VK_XValue, BasePath);
// Dereference "this".
DerefBuilder DerefThis(This);
// Implicitly cast "this" to the appropriately-qualified base type.
CastBuilder To(DerefThis,
Context.getCVRQualifiedType(
BaseType, MoveAssignOperator->getTypeQualifiers()),
VK_LValue, BasePath);
// Build the move.
StmtResult Move = buildSingleCopyAssign(*this, Loc, BaseType,
To, From,
/*CopyingBaseSubobject=*/true,
/*Copying=*/false);
if (Move.isInvalid()) {
MoveAssignOperator->setInvalidDecl();
return;
}
// Success! Record the move.
Statements.push_back(Move.getAs<Expr>());
}
// Assign non-static members.
for (auto *Field : ClassDecl->fields()) {
// FIXME: We should form some kind of AST representation for the implied
// memcpy in a union copy operation.
if (Field->isUnnamedBitfield() || Field->getParent()->isUnion())
continue;
if (Field->isInvalidDecl()) {
Invalid = true;
continue;
}
// Check for members of reference type; we can't move those.
if (Field->getType()->isReferenceType()) {
Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
<< Context.getTagDeclType(ClassDecl) << 0 << Field->getDeclName();
Diag(Field->getLocation(), diag::note_declared_at);
Invalid = true;
continue;
}
// Check for members of const-qualified, non-class type.
QualType BaseType = Context.getBaseElementType(Field->getType());
if (!BaseType->getAs<RecordType>() && BaseType.isConstQualified()) {
Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
<< Context.getTagDeclType(ClassDecl) << 1 << Field->getDeclName();
Diag(Field->getLocation(), diag::note_declared_at);
Invalid = true;
continue;
}
// Suppress assigning zero-width bitfields.
if (Field->isBitField() && Field->getBitWidthValue(Context) == 0)
continue;
QualType FieldType = Field->getType().getNonReferenceType();
if (FieldType->isIncompleteArrayType()) {
assert(ClassDecl->hasFlexibleArrayMember() &&
"Incomplete array type is not valid");
continue;
}
// Build references to the field in the object we're copying from and to.
LookupResult MemberLookup(*this, Field->getDeclName(), Loc,
LookupMemberName);
MemberLookup.addDecl(Field);
MemberLookup.resolveKind();
MemberBuilder From(MoveOther, OtherRefType,
/*IsArrow=*/false, MemberLookup);
MemberBuilder To(This, getCurrentThisType(),
/*IsArrow=*/true, MemberLookup);
assert(!From.build(*this, Loc)->isLValue() && // could be xvalue or prvalue
"Member reference with rvalue base must be rvalue except for reference "
"members, which aren't allowed for move assignment.");
// Build the move of this field.
StmtResult Move = buildSingleCopyAssign(*this, Loc, FieldType,
To, From,
/*CopyingBaseSubobject=*/false,
/*Copying=*/false);
if (Move.isInvalid()) {
MoveAssignOperator->setInvalidDecl();
return;
}
// Success! Record the copy.
Statements.push_back(Move.getAs<Stmt>());
}
if (!Invalid) {
// Add a "return *this;"
ExprResult ThisObj =
CreateBuiltinUnaryOp(Loc, UO_Deref, This.build(*this, Loc));
StmtResult Return = BuildReturnStmt(Loc, ThisObj.get());
if (Return.isInvalid())
Invalid = true;
else
Statements.push_back(Return.getAs<Stmt>());
}
if (Invalid) {
MoveAssignOperator->setInvalidDecl();
return;
}
StmtResult Body;
{
CompoundScopeRAII CompoundScope(*this);
Body = ActOnCompoundStmt(Loc, Loc, Statements,
/*isStmtExpr=*/false);
assert(!Body.isInvalid() && "Compound statement creation cannot fail");
}
MoveAssignOperator->setBody(Body.getAs<Stmt>());
MoveAssignOperator->markUsed(Context);
if (ASTMutationListener *L = getASTMutationListener()) {
L->CompletedImplicitDefinition(MoveAssignOperator);
}
}
CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor(
CXXRecordDecl *ClassDecl) {
// C++ [class.copy]p4:
// If the class definition does not explicitly declare a copy
// constructor, one is declared implicitly.
assert(ClassDecl->needsImplicitCopyConstructor());
DeclaringSpecialMember DSM(*this, ClassDecl, CXXCopyConstructor);
if (DSM.isAlreadyBeingDeclared())
return nullptr;
QualType ClassType = Context.getTypeDeclType(ClassDecl);
QualType ArgType = ClassType;
bool Const = ClassDecl->implicitCopyConstructorHasConstParam();
if (Const)
ArgType = ArgType.withConst();
ArgType = Context.getLValueReferenceType(ArgType);
bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl,
CXXCopyConstructor,
Const);
DeclarationName Name
= Context.DeclarationNames.getCXXConstructorName(
Context.getCanonicalType(ClassType));
SourceLocation ClassLoc = ClassDecl->getLocation();
DeclarationNameInfo NameInfo(Name, ClassLoc);
// An implicitly-declared copy constructor is an inline public
// member of its class.
CXXConstructorDecl *CopyConstructor = CXXConstructorDecl::Create(
Context, ClassDecl, ClassLoc, NameInfo, QualType(), /*TInfo=*/nullptr,
/*isExplicit=*/false, /*isInline=*/true, /*isImplicitlyDeclared=*/true,
Constexpr);
CopyConstructor->setAccess(AS_public);
CopyConstructor->setDefaulted();
if (getLangOpts().CUDA) {
inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXCopyConstructor,
CopyConstructor,
/* ConstRHS */ Const,
/* Diagnose */ false);
}
// Build an exception specification pointing back at this member.
FunctionProtoType::ExtProtoInfo EPI =
getImplicitMethodEPI(*this, CopyConstructor);
CopyConstructor->setType(
Context.getFunctionType(Context.VoidTy, ArgType, EPI));
// Add the parameter to the constructor.
ParmVarDecl *FromParam = ParmVarDecl::Create(Context, CopyConstructor,
ClassLoc, ClassLoc,
/*IdentifierInfo=*/nullptr,
ArgType, /*TInfo=*/nullptr,
SC_None, nullptr);
CopyConstructor->setParams(FromParam);
CopyConstructor->setTrivial(
ClassDecl->needsOverloadResolutionForCopyConstructor()
? SpecialMemberIsTrivial(CopyConstructor, CXXCopyConstructor)
: ClassDecl->hasTrivialCopyConstructor());
// Note that we have declared this constructor.
++ASTContext::NumImplicitCopyConstructorsDeclared;
Scope *S = getScopeForContext(ClassDecl);
CheckImplicitSpecialMemberDeclaration(S, CopyConstructor);
- if (ShouldDeleteSpecialMember(CopyConstructor, CXXCopyConstructor))
+ if (ShouldDeleteSpecialMember(CopyConstructor, CXXCopyConstructor)) {
+ ClassDecl->setImplicitCopyConstructorIsDeleted();
SetDeclDeleted(CopyConstructor, ClassLoc);
+ }
if (S)
PushOnScopeChains(CopyConstructor, S, false);
ClassDecl->addDecl(CopyConstructor);
return CopyConstructor;
}
void Sema::DefineImplicitCopyConstructor(SourceLocation CurrentLocation,
CXXConstructorDecl *CopyConstructor) {
assert((CopyConstructor->isDefaulted() &&
CopyConstructor->isCopyConstructor() &&
!CopyConstructor->doesThisDeclarationHaveABody() &&
!CopyConstructor->isDeleted()) &&
"DefineImplicitCopyConstructor - call it for implicit copy ctor");
if (CopyConstructor->willHaveBody() || CopyConstructor->isInvalidDecl())
return;
CXXRecordDecl *ClassDecl = CopyConstructor->getParent();
assert(ClassDecl && "DefineImplicitCopyConstructor - invalid constructor");
SynthesizedFunctionScope Scope(*this, CopyConstructor);
// The exception specification is needed because we are defining the
// function.
ResolveExceptionSpec(CurrentLocation,
CopyConstructor->getType()->castAs<FunctionProtoType>());
MarkVTableUsed(CurrentLocation, ClassDecl);
// Add a context note for diagnostics produced after this point.
Scope.addContextNote(CurrentLocation);
// C++11 [class.copy]p7:
// The [definition of an implicitly declared copy constructor] is
// deprecated if the class has a user-declared copy assignment operator
// or a user-declared destructor.
if (getLangOpts().CPlusPlus11 && CopyConstructor->isImplicit())
diagnoseDeprecatedCopyOperation(*this, CopyConstructor);
if (SetCtorInitializers(CopyConstructor, /*AnyErrors=*/false)) {
CopyConstructor->setInvalidDecl();
} else {
SourceLocation Loc = CopyConstructor->getLocEnd().isValid()
? CopyConstructor->getLocEnd()
: CopyConstructor->getLocation();
Sema::CompoundScopeRAII CompoundScope(*this);
CopyConstructor->setBody(
ActOnCompoundStmt(Loc, Loc, None, /*isStmtExpr=*/false).getAs<Stmt>());
CopyConstructor->markUsed(Context);
}
if (ASTMutationListener *L = getASTMutationListener()) {
L->CompletedImplicitDefinition(CopyConstructor);
}
}
CXXConstructorDecl *Sema::DeclareImplicitMoveConstructor(
CXXRecordDecl *ClassDecl) {
assert(ClassDecl->needsImplicitMoveConstructor());
DeclaringSpecialMember DSM(*this, ClassDecl, CXXMoveConstructor);
if (DSM.isAlreadyBeingDeclared())
return nullptr;
QualType ClassType = Context.getTypeDeclType(ClassDecl);
QualType ArgType = Context.getRValueReferenceType(ClassType);
bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl,
CXXMoveConstructor,
false);
DeclarationName Name
= Context.DeclarationNames.getCXXConstructorName(
Context.getCanonicalType(ClassType));
SourceLocation ClassLoc = ClassDecl->getLocation();
DeclarationNameInfo NameInfo(Name, ClassLoc);
// C++11 [class.copy]p11:
// An implicitly-declared copy/move constructor is an inline public
// member of its class.
CXXConstructorDecl *MoveConstructor = CXXConstructorDecl::Create(
Context, ClassDecl, ClassLoc, NameInfo, QualType(), /*TInfo=*/nullptr,
/*isExplicit=*/false, /*isInline=*/true, /*isImplicitlyDeclared=*/true,
Constexpr);
MoveConstructor->setAccess(AS_public);
MoveConstructor->setDefaulted();
if (getLangOpts().CUDA) {
inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXMoveConstructor,
MoveConstructor,
/* ConstRHS */ false,
/* Diagnose */ false);
}
// Build an exception specification pointing back at this member.
FunctionProtoType::ExtProtoInfo EPI =
getImplicitMethodEPI(*this, MoveConstructor);
MoveConstructor->setType(
Context.getFunctionType(Context.VoidTy, ArgType, EPI));
// Add the parameter to the constructor.
ParmVarDecl *FromParam = ParmVarDecl::Create(Context, MoveConstructor,
ClassLoc, ClassLoc,
/*IdentifierInfo=*/nullptr,
ArgType, /*TInfo=*/nullptr,
SC_None, nullptr);
MoveConstructor->setParams(FromParam);
MoveConstructor->setTrivial(
ClassDecl->needsOverloadResolutionForMoveConstructor()
? SpecialMemberIsTrivial(MoveConstructor, CXXMoveConstructor)
: ClassDecl->hasTrivialMoveConstructor());
// Note that we have declared this constructor.
++ASTContext::NumImplicitMoveConstructorsDeclared;
Scope *S = getScopeForContext(ClassDecl);
CheckImplicitSpecialMemberDeclaration(S, MoveConstructor);
if (ShouldDeleteSpecialMember(MoveConstructor, CXXMoveConstructor)) {
ClassDecl->setImplicitMoveConstructorIsDeleted();
SetDeclDeleted(MoveConstructor, ClassLoc);
}
if (S)
PushOnScopeChains(MoveConstructor, S, false);
ClassDecl->addDecl(MoveConstructor);
return MoveConstructor;
}
void Sema::DefineImplicitMoveConstructor(SourceLocation CurrentLocation,
CXXConstructorDecl *MoveConstructor) {
assert((MoveConstructor->isDefaulted() &&
MoveConstructor->isMoveConstructor() &&
!MoveConstructor->doesThisDeclarationHaveABody() &&
!MoveConstructor->isDeleted()) &&
"DefineImplicitMoveConstructor - call it for implicit move ctor");
if (MoveConstructor->willHaveBody() || MoveConstructor->isInvalidDecl())
return;
CXXRecordDecl *ClassDecl = MoveConstructor->getParent();
assert(ClassDecl && "DefineImplicitMoveConstructor - invalid constructor");
SynthesizedFunctionScope Scope(*this, MoveConstructor);
// The exception specification is needed because we are defining the
// function.
ResolveExceptionSpec(CurrentLocation,
MoveConstructor->getType()->castAs<FunctionProtoType>());
MarkVTableUsed(CurrentLocation, ClassDecl);
// Add a context note for diagnostics produced after this point.
Scope.addContextNote(CurrentLocation);
if (SetCtorInitializers(MoveConstructor, /*AnyErrors=*/false)) {
MoveConstructor->setInvalidDecl();
} else {
SourceLocation Loc = MoveConstructor->getLocEnd().isValid()
? MoveConstructor->getLocEnd()
: MoveConstructor->getLocation();
Sema::CompoundScopeRAII CompoundScope(*this);
MoveConstructor->setBody(ActOnCompoundStmt(
Loc, Loc, None, /*isStmtExpr=*/ false).getAs<Stmt>());
MoveConstructor->markUsed(Context);
}
if (ASTMutationListener *L = getASTMutationListener()) {
L->CompletedImplicitDefinition(MoveConstructor);
}
}
bool Sema::isImplicitlyDeleted(FunctionDecl *FD) {
return FD->isDeleted() && FD->isDefaulted() && isa<CXXMethodDecl>(FD);
}
void Sema::DefineImplicitLambdaToFunctionPointerConversion(
SourceLocation CurrentLocation,
CXXConversionDecl *Conv) {
SynthesizedFunctionScope Scope(*this, Conv);
CXXRecordDecl *Lambda = Conv->getParent();
CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
// If we are defining a specialization of a conversion to function-ptr
// cache the deduced template arguments for this specialization
// so that we can use them to retrieve the corresponding call-operator
// and static-invoker.
const TemplateArgumentList *DeducedTemplateArgs = nullptr;
// Retrieve the corresponding call-operator specialization.
if (Lambda->isGenericLambda()) {
assert(Conv->isFunctionTemplateSpecialization());
FunctionTemplateDecl *CallOpTemplate =
CallOp->getDescribedFunctionTemplate();
DeducedTemplateArgs = Conv->getTemplateSpecializationArgs();
void *InsertPos = nullptr;
FunctionDecl *CallOpSpec = CallOpTemplate->findSpecialization(
DeducedTemplateArgs->asArray(),
InsertPos);
assert(CallOpSpec &&
"Conversion operator must have a corresponding call operator");
CallOp = cast<CXXMethodDecl>(CallOpSpec);
}
// Mark the call operator referenced (and add to pending instantiations
// if necessary).
// For both the conversion and static-invoker template specializations
// we construct their body's in this function, so no need to add them
// to the PendingInstantiations.
MarkFunctionReferenced(CurrentLocation, CallOp);
// Retrieve the static invoker...
CXXMethodDecl *Invoker = Lambda->getLambdaStaticInvoker();
// ... and get the corresponding specialization for a generic lambda.
if (Lambda->isGenericLambda()) {
assert(DeducedTemplateArgs &&
"Must have deduced template arguments from Conversion Operator");
FunctionTemplateDecl *InvokeTemplate =
Invoker->getDescribedFunctionTemplate();
void *InsertPos = nullptr;
FunctionDecl *InvokeSpec = InvokeTemplate->findSpecialization(
DeducedTemplateArgs->asArray(),
InsertPos);
assert(InvokeSpec &&
"Must have a corresponding static invoker specialization");
Invoker = cast<CXXMethodDecl>(InvokeSpec);
}
// Construct the body of the conversion function { return __invoke; }.
Expr *FunctionRef = BuildDeclRefExpr(Invoker, Invoker->getType(),
VK_LValue, Conv->getLocation()).get();
assert(FunctionRef && "Can't refer to __invoke function?");
Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get();
Conv->setBody(new (Context) CompoundStmt(Context, Return,
Conv->getLocation(),
Conv->getLocation()));
Conv->markUsed(Context);
Conv->setReferenced();
// Fill in the __invoke function with a dummy implementation. IR generation
// will fill in the actual details.
Invoker->markUsed(Context);
Invoker->setReferenced();
Invoker->setBody(new (Context) CompoundStmt(Conv->getLocation()));
if (ASTMutationListener *L = getASTMutationListener()) {
L->CompletedImplicitDefinition(Conv);
L->CompletedImplicitDefinition(Invoker);
}
}
void Sema::DefineImplicitLambdaToBlockPointerConversion(
SourceLocation CurrentLocation,
CXXConversionDecl *Conv)
{
assert(!Conv->getParent()->isGenericLambda());
SynthesizedFunctionScope Scope(*this, Conv);
// Copy-initialize the lambda object as needed to capture it.
Expr *This = ActOnCXXThis(CurrentLocation).get();
Expr *DerefThis =CreateBuiltinUnaryOp(CurrentLocation, UO_Deref, This).get();
ExprResult BuildBlock = BuildBlockForLambdaConversion(CurrentLocation,
Conv->getLocation(),
Conv, DerefThis);
// If we're not under ARC, make sure we still get the _Block_copy/autorelease
// behavior. Note that only the general conversion function does this
// (since it's unusable otherwise); in the case where we inline the
// block literal, it has block literal lifetime semantics.
if (!BuildBlock.isInvalid() && !getLangOpts().ObjCAutoRefCount)
BuildBlock = ImplicitCastExpr::Create(Context, BuildBlock.get()->getType(),
CK_CopyAndAutoreleaseBlockObject,
BuildBlock.get(), nullptr, VK_RValue);
if (BuildBlock.isInvalid()) {
Diag(CurrentLocation, diag::note_lambda_to_block_conv);
Conv->setInvalidDecl();
return;
}
// Create the return statement that returns the block from the conversion
// function.
StmtResult Return = BuildReturnStmt(Conv->getLocation(), BuildBlock.get());
if (Return.isInvalid()) {
Diag(CurrentLocation, diag::note_lambda_to_block_conv);
Conv->setInvalidDecl();
return;
}
// Set the body of the conversion function.
Stmt *ReturnS = Return.get();
Conv->setBody(new (Context) CompoundStmt(Context, ReturnS,
Conv->getLocation(),
Conv->getLocation()));
Conv->markUsed(Context);
// We're done; notify the mutation listener, if any.
if (ASTMutationListener *L = getASTMutationListener()) {
L->CompletedImplicitDefinition(Conv);
}
}
/// \brief Determine whether the given list arguments contains exactly one
/// "real" (non-default) argument.
static bool hasOneRealArgument(MultiExprArg Args) {
switch (Args.size()) {
case 0:
return false;
default:
if (!Args[1]->isDefaultArgument())
return false;
// fall through
case 1:
return !Args[0]->isDefaultArgument();
}
return false;
}
ExprResult
Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
NamedDecl *FoundDecl,
CXXConstructorDecl *Constructor,
MultiExprArg ExprArgs,
bool HadMultipleCandidates,
bool IsListInitialization,
bool IsStdInitListInitialization,
bool RequiresZeroInit,
unsigned ConstructKind,
SourceRange ParenRange) {
bool Elidable = false;
// C++0x [class.copy]p34:
// When certain criteria are met, an implementation is allowed to
// omit the copy/move construction of a class object, even if the
// copy/move constructor and/or destructor for the object have
// side effects. [...]
// - when a temporary class object that has not been bound to a
// reference (12.2) would be copied/moved to a class object
// with the same cv-unqualified type, the copy/move operation
// can be omitted by constructing the temporary object
// directly into the target of the omitted copy/move
if (ConstructKind == CXXConstructExpr::CK_Complete && Constructor &&
Constructor->isCopyOrMoveConstructor() && hasOneRealArgument(ExprArgs)) {
Expr *SubExpr = ExprArgs[0];
Elidable = SubExpr->isTemporaryObject(
Context, cast<CXXRecordDecl>(FoundDecl->getDeclContext()));
}
return BuildCXXConstructExpr(ConstructLoc, DeclInitType,
FoundDecl, Constructor,
Elidable, ExprArgs, HadMultipleCandidates,
IsListInitialization,
IsStdInitListInitialization, RequiresZeroInit,
ConstructKind, ParenRange);
}
ExprResult
Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
NamedDecl *FoundDecl,
CXXConstructorDecl *Constructor,
bool Elidable,
MultiExprArg ExprArgs,
bool HadMultipleCandidates,
bool IsListInitialization,
bool IsStdInitListInitialization,
bool RequiresZeroInit,
unsigned ConstructKind,
SourceRange ParenRange) {
if (auto *Shadow = dyn_cast<ConstructorUsingShadowDecl>(FoundDecl)) {
Constructor = findInheritingConstructor(ConstructLoc, Constructor, Shadow);
if (DiagnoseUseOfDecl(Constructor, ConstructLoc))
return ExprError();
}
return BuildCXXConstructExpr(
ConstructLoc, DeclInitType, Constructor, Elidable, ExprArgs,
HadMultipleCandidates, IsListInitialization, IsStdInitListInitialization,
RequiresZeroInit, ConstructKind, ParenRange);
}
/// BuildCXXConstructExpr - Creates a complete call to a constructor,
/// including handling of its default argument expressions.
ExprResult
Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
CXXConstructorDecl *Constructor,
bool Elidable,
MultiExprArg ExprArgs,
bool HadMultipleCandidates,
bool IsListInitialization,
bool IsStdInitListInitialization,
bool RequiresZeroInit,
unsigned ConstructKind,
SourceRange ParenRange) {
assert(declaresSameEntity(
Constructor->getParent(),
DeclInitType->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) &&
"given constructor for wrong type");
MarkFunctionReferenced(ConstructLoc, Constructor);
if (getLangOpts().CUDA && !CheckCUDACall(ConstructLoc, Constructor))
return ExprError();
return CXXConstructExpr::Create(
Context, DeclInitType, ConstructLoc, Constructor, Elidable,
ExprArgs, HadMultipleCandidates, IsListInitialization,
IsStdInitListInitialization, RequiresZeroInit,
static_cast<CXXConstructExpr::ConstructionKind>(ConstructKind),
ParenRange);
}
ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
assert(Field->hasInClassInitializer());
// If we already have the in-class initializer nothing needs to be done.
if (Field->getInClassInitializer())
return CXXDefaultInitExpr::Create(Context, Loc, Field);
// If we might have already tried and failed to instantiate, don't try again.
if (Field->isInvalidDecl())
return ExprError();
// Maybe we haven't instantiated the in-class initializer. Go check the
// pattern FieldDecl to see if it has one.
CXXRecordDecl *ParentRD = cast<CXXRecordDecl>(Field->getParent());
if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) {
CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern();
DeclContext::lookup_result Lookup =
ClassPattern->lookup(Field->getDeclName());
// Lookup can return at most two results: the pattern for the field, or the
// injected class name of the parent record. No other member can have the
// same name as the field.
// In modules mode, lookup can return multiple results (coming from
// different modules).
assert((getLangOpts().Modules || (!Lookup.empty() && Lookup.size() <= 2)) &&
"more than two lookup results for field name");
FieldDecl *Pattern = dyn_cast<FieldDecl>(Lookup[0]);
if (!Pattern) {
assert(isa<CXXRecordDecl>(Lookup[0]) &&
"cannot have other non-field member with same name");
for (auto L : Lookup)
if (isa<FieldDecl>(L)) {
Pattern = cast<FieldDecl>(L);
break;
}
assert(Pattern && "We must have set the Pattern!");
}
if (InstantiateInClassInitializer(Loc, Field, Pattern,
getTemplateInstantiationArgs(Field))) {
// Don't diagnose this again.
Field->setInvalidDecl();
return ExprError();
}
return CXXDefaultInitExpr::Create(Context, Loc, Field);
}
// DR1351:
// If the brace-or-equal-initializer of a non-static data member
// invokes a defaulted default constructor of its class or of an
// enclosing class in a potentially evaluated subexpression, the
// program is ill-formed.
//
// This resolution is unworkable: the exception specification of the
// default constructor can be needed in an unevaluated context, in
// particular, in the operand of a noexcept-expression, and we can be
// unable to compute an exception specification for an enclosed class.
//
// Any attempt to resolve the exception specification of a defaulted default
// constructor before the initializer is lexically complete will ultimately
// come here at which point we can diagnose it.
RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext();
Diag(Loc, diag::err_in_class_initializer_not_yet_parsed)
<< OutermostClass << Field;
Diag(Field->getLocEnd(), diag::note_in_class_initializer_not_yet_parsed);
// Recover by marking the field invalid, unless we're in a SFINAE context.
if (!isSFINAEContext())
Field->setInvalidDecl();
return ExprError();
}
void Sema::FinalizeVarWithDestructor(VarDecl *VD, const RecordType *Record) {
if (VD->isInvalidDecl()) return;
CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(Record->getDecl());
if (ClassDecl->isInvalidDecl()) return;
if (ClassDecl->hasIrrelevantDestructor()) return;
if (ClassDecl->isDependentContext()) return;
CXXDestructorDecl *Destructor = LookupDestructor(ClassDecl);
MarkFunctionReferenced(VD->getLocation(), Destructor);
CheckDestructorAccess(VD->getLocation(), Destructor,
PDiag(diag::err_access_dtor_var)
<< VD->getDeclName()
<< VD->getType());
DiagnoseUseOfDecl(Destructor, VD->getLocation());
if (Destructor->isTrivial()) return;
if (!VD->hasGlobalStorage()) return;
// Emit warning for non-trivial dtor in global scope (a real global,
// class-static, function-static).
Diag(VD->getLocation(), diag::warn_exit_time_destructor);
// TODO: this should be re-enabled for static locals by !CXAAtExit
if (!VD->isStaticLocal())
Diag(VD->getLocation(), diag::warn_global_destructor);
}
/// \brief Given a constructor and the set of arguments provided for the
/// constructor, convert the arguments and add any required default arguments
/// to form a proper call to this constructor.
///
/// \returns true if an error occurred, false otherwise.
bool
Sema::CompleteConstructorCall(CXXConstructorDecl *Constructor,
MultiExprArg ArgsPtr,
SourceLocation Loc,
SmallVectorImpl<Expr*> &ConvertedArgs,
bool AllowExplicit,
bool IsListInitialization) {
// FIXME: This duplicates a lot of code from Sema::ConvertArgumentsForCall.
unsigned NumArgs = ArgsPtr.size();
Expr **Args = ArgsPtr.data();
const FunctionProtoType *Proto
= Constructor->getType()->getAs<FunctionProtoType>();
assert(Proto && "Constructor without a prototype?");
unsigned NumParams = Proto->getNumParams();
// If too few arguments are available, we'll fill in the rest with defaults.
if (NumArgs < NumParams)
ConvertedArgs.reserve(NumParams);
else
ConvertedArgs.reserve(NumArgs);
VariadicCallType CallType =
Proto->isVariadic() ? VariadicConstructor : VariadicDoesNotApply;
SmallVector<Expr *, 8> AllArgs;
bool Invalid = GatherArgumentsForCall(Loc, Constructor,
Proto, 0,
llvm::makeArrayRef(Args, NumArgs),
AllArgs,
CallType, AllowExplicit,
IsListInitialization);
ConvertedArgs.append(AllArgs.begin(), AllArgs.end());
DiagnoseSentinelCalls(Constructor, Loc, AllArgs);
CheckConstructorCall(Constructor,
llvm::makeArrayRef(AllArgs.data(), AllArgs.size()),
Proto, Loc);
return Invalid;
}
static inline bool
CheckOperatorNewDeleteDeclarationScope(Sema &SemaRef,
const FunctionDecl *FnDecl) {
const DeclContext *DC = FnDecl->getDeclContext()->getRedeclContext();
if (isa<NamespaceDecl>(DC)) {
return SemaRef.Diag(FnDecl->getLocation(),
diag::err_operator_new_delete_declared_in_namespace)
<< FnDecl->getDeclName();
}
if (isa<TranslationUnitDecl>(DC) &&
FnDecl->getStorageClass() == SC_Static) {
return SemaRef.Diag(FnDecl->getLocation(),
diag::err_operator_new_delete_declared_static)
<< FnDecl->getDeclName();
}
return false;
}
static inline bool
CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl,
CanQualType ExpectedResultType,
CanQualType ExpectedFirstParamType,
unsigned DependentParamTypeDiag,
unsigned InvalidParamTypeDiag) {
QualType ResultType =
FnDecl->getType()->getAs<FunctionType>()->getReturnType();
// Check that the result type is not dependent.
if (ResultType->isDependentType())
return SemaRef.Diag(FnDecl->getLocation(),
diag::err_operator_new_delete_dependent_result_type)
<< FnDecl->getDeclName() << ExpectedResultType;
// Check that the result type is what we expect.
if (SemaRef.Context.getCanonicalType(ResultType) != ExpectedResultType)
return SemaRef.Diag(FnDecl->getLocation(),
diag::err_operator_new_delete_invalid_result_type)
<< FnDecl->getDeclName() << ExpectedResultType;
// A function template must have at least 2 parameters.
if (FnDecl->getDescribedFunctionTemplate() && FnDecl->getNumParams() < 2)
return SemaRef.Diag(FnDecl->getLocation(),
diag::err_operator_new_delete_template_too_few_parameters)
<< FnDecl->getDeclName();
// The function decl must have at least 1 parameter.
if (FnDecl->getNumParams() == 0)
return SemaRef.Diag(FnDecl->getLocation(),
diag::err_operator_new_delete_too_few_parameters)
<< FnDecl->getDeclName();
// Check the first parameter type is not dependent.
QualType FirstParamType = FnDecl->getParamDecl(0)->getType();
if (FirstParamType->isDependentType())
return SemaRef.Diag(FnDecl->getLocation(), DependentParamTypeDiag)
<< FnDecl->getDeclName() << ExpectedFirstParamType;
// Check that the first parameter type is what we expect.
if (SemaRef.Context.getCanonicalType(FirstParamType).getUnqualifiedType() !=
ExpectedFirstParamType)
return SemaRef.Diag(FnDecl->getLocation(), InvalidParamTypeDiag)
<< FnDecl->getDeclName() << ExpectedFirstParamType;
return false;
}
static bool
CheckOperatorNewDeclaration(Sema &SemaRef, const FunctionDecl *FnDecl) {
// C++ [basic.stc.dynamic.allocation]p1:
// A program is ill-formed if an allocation function is declared in a
// namespace scope other than global scope or declared static in global
// scope.
if (CheckOperatorNewDeleteDeclarationScope(SemaRef, FnDecl))
return true;
CanQualType SizeTy =
SemaRef.Context.getCanonicalType(SemaRef.Context.getSizeType());
// C++ [basic.stc.dynamic.allocation]p1:
// The return type shall be void*. The first parameter shall have type
// std::size_t.
if (CheckOperatorNewDeleteTypes(SemaRef, FnDecl, SemaRef.Context.VoidPtrTy,
SizeTy,
diag::err_operator_new_dependent_param_type,
diag::err_operator_new_param_type))
return true;
// C++ [basic.stc.dynamic.allocation]p1:
// The first parameter shall not have an associated default argument.
if (FnDecl->getParamDecl(0)->hasDefaultArg())
return SemaRef.Diag(FnDecl->getLocation(),
diag::err_operator_new_default_arg)
<< FnDecl->getDeclName() << FnDecl->getParamDecl(0)->getDefaultArgRange();
return false;
}
static bool
CheckOperatorDeleteDeclaration(Sema &SemaRef, FunctionDecl *FnDecl) {
// C++ [basic.stc.dynamic.deallocation]p1:
// A program is ill-formed if deallocation functions are declared in a
// namespace scope other than global scope or declared static in global
// scope.
if (CheckOperatorNewDeleteDeclarationScope(SemaRef, FnDecl))
return true;
// C++ [basic.stc.dynamic.deallocation]p2:
// Each deallocation function shall return void and its first parameter
// shall be void*.
if (CheckOperatorNewDeleteTypes(SemaRef, FnDecl, SemaRef.Context.VoidTy,
SemaRef.Context.VoidPtrTy,
diag::err_operator_delete_dependent_param_type,
diag::err_operator_delete_param_type))
return true;
return false;
}
/// CheckOverloadedOperatorDeclaration - Check whether the declaration
/// of this overloaded operator is well-formed. If so, returns false;
/// otherwise, emits appropriate diagnostics and returns true.
bool Sema::CheckOverloadedOperatorDeclaration(FunctionDecl *FnDecl) {
assert(FnDecl && FnDecl->isOverloadedOperator() &&
"Expected an overloaded operator declaration");
OverloadedOperatorKind Op = FnDecl->getOverloadedOperator();
// C++ [over.oper]p5:
// The allocation and deallocation functions, operator new,
// operator new[], operator delete and operator delete[], are
// described completely in 3.7.3. The attributes and restrictions
// found in the rest of this subclause do not apply to them unless
// explicitly stated in 3.7.3.
if (Op == OO_Delete || Op == OO_Array_Delete)
return CheckOperatorDeleteDeclaration(*this, FnDecl);
if (Op == OO_New || Op == OO_Array_New)
return CheckOperatorNewDeclaration(*this, FnDecl);
// C++ [over.oper]p6:
// An operator function shall either be a non-static member
// function or be a non-member function and have at least one
// parameter whose type is a class, a reference to a class, an
// enumeration, or a reference to an enumeration.
if (CXXMethodDecl *MethodDecl = dyn_cast<CXXMethodDecl>(FnDecl)) {
if (MethodDecl->isStatic())
return Diag(FnDecl->getLocation(),
diag::err_operator_overload_static) << FnDecl->getDeclName();
} else {
bool ClassOrEnumParam = false;
for (auto Param : FnDecl->parameters()) {
QualType ParamType = Param->getType().getNonReferenceType();
if (ParamType->isDependentType() || ParamType->isRecordType() ||
ParamType->isEnumeralType()) {
ClassOrEnumParam = true;
break;
}
}
if (!ClassOrEnumParam)
return Diag(FnDecl->getLocation(),
diag::err_operator_overload_needs_class_or_enum)
<< FnDecl->getDeclName();
}
// C++ [over.oper]p8:
// An operator function cannot have default arguments (8.3.6),
// except where explicitly stated below.
//
// Only the function-call operator allows default arguments
// (C++ [over.call]p1).
if (Op != OO_Call) {
for (auto Param : FnDecl->parameters()) {
if (Param->hasDefaultArg())
return Diag(Param->getLocation(),
diag::err_operator_overload_default_arg)
<< FnDecl->getDeclName() << Param->getDefaultArgRange();
}
}
static const bool OperatorUses[NUM_OVERLOADED_OPERATORS][3] = {
{ false, false, false }
#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
, { Unary, Binary, MemberOnly }
#include "clang/Basic/OperatorKinds.def"
};
bool CanBeUnaryOperator = OperatorUses[Op][0];
bool CanBeBinaryOperator = OperatorUses[Op][1];
bool MustBeMemberOperator = OperatorUses[Op][2];
// C++ [over.oper]p8:
// [...] Operator functions cannot have more or fewer parameters
// than the number required for the corresponding operator, as
// described in the rest of this subclause.
unsigned NumParams = FnDecl->getNumParams()
+ (isa<CXXMethodDecl>(FnDecl)? 1 : 0);
if (Op != OO_Call &&
((NumParams == 1 && !CanBeUnaryOperator) ||
(NumParams == 2 && !CanBeBinaryOperator) ||
(NumParams < 1) || (NumParams > 2))) {
// We have the wrong number of parameters.
unsigned ErrorKind;
if (CanBeUnaryOperator && CanBeBinaryOperator) {
ErrorKind = 2; // 2 -> unary or binary.
} else if (CanBeUnaryOperator) {
ErrorKind = 0; // 0 -> unary
} else {
assert(CanBeBinaryOperator &&
"All non-call overloaded operators are unary or binary!");
ErrorKind = 1; // 1 -> binary
}
return Diag(FnDecl->getLocation(), diag::err_operator_overload_must_be)
<< FnDecl->getDeclName() << NumParams << ErrorKind;
}
// Overloaded operators other than operator() cannot be variadic.
if (Op != OO_Call &&
FnDecl->getType()->getAs<FunctionProtoType>()->isVariadic()) {
return Diag(FnDecl->getLocation(), diag::err_operator_overload_variadic)
<< FnDecl->getDeclName();
}
// Some operators must be non-static member functions.
if (MustBeMemberOperator && !isa<CXXMethodDecl>(FnDecl)) {
return Diag(FnDecl->getLocation(),
diag::err_operator_overload_must_be_member)
<< FnDecl->getDeclName();
}
// C++ [over.inc]p1:
// The user-defined function called operator++ implements the
// prefix and postfix ++ operator. If this function is a member
// function with no parameters, or a non-member function with one
// parameter of class or enumeration type, it defines the prefix
// increment operator ++ for objects of that type. If the function
// is a member function with one parameter (which shall be of type
// int) or a non-member function with two parameters (the second
// of which shall be of type int), it defines the postfix
// increment operator ++ for objects of that type.
if ((Op == OO_PlusPlus || Op == OO_MinusMinus) && NumParams == 2) {
ParmVarDecl *LastParam = FnDecl->getParamDecl(FnDecl->getNumParams() - 1);
QualType ParamType = LastParam->getType();
if (!ParamType->isSpecificBuiltinType(BuiltinType::Int) &&
!ParamType->isDependentType())
return Diag(LastParam->getLocation(),
diag::err_operator_overload_post_incdec_must_be_int)
<< LastParam->getType() << (Op == OO_MinusMinus);
}
return false;
}
static bool
checkLiteralOperatorTemplateParameterList(Sema &SemaRef,
FunctionTemplateDecl *TpDecl) {
TemplateParameterList *TemplateParams = TpDecl->getTemplateParameters();
// Must have one or two template parameters.
if (TemplateParams->size() == 1) {
NonTypeTemplateParmDecl *PmDecl =
dyn_cast<NonTypeTemplateParmDecl>(TemplateParams->getParam(0));
// The template parameter must be a char parameter pack.
if (PmDecl && PmDecl->isTemplateParameterPack() &&
SemaRef.Context.hasSameType(PmDecl->getType(), SemaRef.Context.CharTy))
return false;
} else if (TemplateParams->size() == 2) {
TemplateTypeParmDecl *PmType =
dyn_cast<TemplateTypeParmDecl>(TemplateParams->getParam(0));
NonTypeTemplateParmDecl *PmArgs =
dyn_cast<NonTypeTemplateParmDecl>(TemplateParams->getParam(1));
// The second template parameter must be a parameter pack with the
// first template parameter as its type.
if (PmType && PmArgs && !PmType->isTemplateParameterPack() &&
PmArgs->isTemplateParameterPack()) {
const TemplateTypeParmType *TArgs =
PmArgs->getType()->getAs<TemplateTypeParmType>();
if (TArgs && TArgs->getDepth() == PmType->getDepth() &&
TArgs->getIndex() == PmType->getIndex()) {
if (!SemaRef.inTemplateInstantiation())
SemaRef.Diag(TpDecl->getLocation(),
diag::ext_string_literal_operator_template);
return false;
}
}
}
SemaRef.Diag(TpDecl->getTemplateParameters()->getSourceRange().getBegin(),
diag::err_literal_operator_template)
<< TpDecl->getTemplateParameters()->getSourceRange();
return true;
}
/// CheckLiteralOperatorDeclaration - Check whether the declaration
/// of this literal operator function is well-formed. If so, returns
/// false; otherwise, emits appropriate diagnostics and returns true.
bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) {
if (isa<CXXMethodDecl>(FnDecl)) {
Diag(FnDecl->getLocation(), diag::err_literal_operator_outside_namespace)
<< FnDecl->getDeclName();
return true;
}
if (FnDecl->isExternC()) {
Diag(FnDecl->getLocation(), diag::err_literal_operator_extern_c);
if (const LinkageSpecDecl *LSD =
FnDecl->getDeclContext()->getExternCContext())
Diag(LSD->getExternLoc(), diag::note_extern_c_begins_here);
return true;
}
// This might be the definition of a literal operator template.
FunctionTemplateDecl *TpDecl = FnDecl->getDescribedFunctionTemplate();
// This might be a specialization of a literal operator template.
if (!TpDecl)
TpDecl = FnDecl->getPrimaryTemplate();
// template <char...> type operator "" name() and
// template <class T, T...> type operator "" name() are the only valid
// template signatures, and the only valid signatures with no parameters.
if (TpDecl) {
if (FnDecl->param_size() != 0) {
Diag(FnDecl->getLocation(),
diag::err_literal_operator_template_with_params);
return true;
}
if (checkLiteralOperatorTemplateParameterList(*this, TpDecl))
return true;
} else if (FnDecl->param_size() == 1) {
const ParmVarDecl *Param = FnDecl->getParamDecl(0);
QualType ParamType = Param->getType().getUnqualifiedType();
// Only unsigned long long int, long double, any character type, and const
// char * are allowed as the only parameters.
if (ParamType->isSpecificBuiltinType(BuiltinType::ULongLong) ||
ParamType->isSpecificBuiltinType(BuiltinType::LongDouble) ||
Context.hasSameType(ParamType, Context.CharTy) ||
Context.hasSameType(ParamType, Context.WideCharTy) ||
Context.hasSameType(ParamType, Context.Char16Ty) ||
Context.hasSameType(ParamType, Context.Char32Ty)) {
} else if (const PointerType *Ptr = ParamType->getAs<PointerType>()) {
QualType InnerType = Ptr->getPointeeType();
// Pointer parameter must be a const char *.
if (!(Context.hasSameType(InnerType.getUnqualifiedType(),
Context.CharTy) &&
InnerType.isConstQualified() && !InnerType.isVolatileQualified())) {
Diag(Param->getSourceRange().getBegin(),
diag::err_literal_operator_param)
<< ParamType << "'const char *'" << Param->getSourceRange();
return true;
}
} else if (ParamType->isRealFloatingType()) {
Diag(Param->getSourceRange().getBegin(), diag::err_literal_operator_param)
<< ParamType << Context.LongDoubleTy << Param->getSourceRange();
return true;
} else if (ParamType->isIntegerType()) {
Diag(Param->getSourceRange().getBegin(), diag::err_literal_operator_param)
<< ParamType << Context.UnsignedLongLongTy << Param->getSourceRange();
return true;
} else {
Diag(Param->getSourceRange().getBegin(),
diag::err_literal_operator_invalid_param)
<< ParamType << Param->getSourceRange();
return true;
}
} else if (FnDecl->param_size() == 2) {
FunctionDecl::param_iterator Param = FnDecl->param_begin();
// First, verify that the first parameter is correct.
QualType FirstParamType = (*Param)->getType().getUnqualifiedType();
// Two parameter function must have a pointer to const as a
// first parameter; let's strip those qualifiers.
const PointerType *PT = FirstParamType->getAs<PointerType>();
if (!PT) {
Diag((*Param)->getSourceRange().getBegin(),
diag::err_literal_operator_param)
<< FirstParamType << "'const char *'" << (*Param)->getSourceRange();
return true;
}
QualType PointeeType = PT->getPointeeType();
// First parameter must be const
if (!PointeeType.isConstQualified() || PointeeType.isVolatileQualified()) {
Diag((*Param)->getSourceRange().getBegin(),
diag::err_literal_operator_param)
<< FirstParamType << "'const char *'" << (*Param)->getSourceRange();
return true;
}
QualType InnerType = PointeeType.getUnqualifiedType();
// Only const char *, const wchar_t*, const char16_t*, and const char32_t*
// are allowed as the first parameter to a two-parameter function
if (!(Context.hasSameType(InnerType, Context.CharTy) ||
Context.hasSameType(InnerType, Context.WideCharTy) ||
Context.hasSameType(InnerType, Context.Char16Ty) ||
Context.hasSameType(InnerType, Context.Char32Ty))) {
Diag((*Param)->getSourceRange().getBegin(),
diag::err_literal_operator_param)
<< FirstParamType << "'const char *'" << (*Param)->getSourceRange();
return true;
}
// Move on to the second and final parameter.
++Param;
// The second parameter must be a std::size_t.
QualType SecondParamType = (*Param)->getType().getUnqualifiedType();
if (!Context.hasSameType(SecondParamType, Context.getSizeType())) {
Diag((*Param)->getSourceRange().getBegin(),
diag::err_literal_operator_param)
<< SecondParamType << Context.getSizeType()
<< (*Param)->getSourceRange();
return true;
}
} else {
Diag(FnDecl->getLocation(), diag::err_literal_operator_bad_param_count);
return true;
}
// Parameters are good.
// A parameter-declaration-clause containing a default argument is not
// equivalent to any of the permitted forms.
for (auto Param : FnDecl->parameters()) {
if (Param->hasDefaultArg()) {
Diag(Param->getDefaultArgRange().getBegin(),
diag::err_literal_operator_default_argument)
<< Param->getDefaultArgRange();
break;
}
}
StringRef LiteralName
= FnDecl->getDeclName().getCXXLiteralIdentifier()->getName();
if (LiteralName[0] != '_') {
// C++11 [usrlit.suffix]p1:
// Literal suffix identifiers that do not start with an underscore
// are reserved for future standardization.
Diag(FnDecl->getLocation(), diag::warn_user_literal_reserved)
<< StringLiteralParser::isValidUDSuffix(getLangOpts(), LiteralName);
}
return false;
}
/// ActOnStartLinkageSpecification - Parsed the beginning of a C++
/// linkage specification, including the language and (if present)
/// the '{'. ExternLoc is the location of the 'extern', Lang is the
/// language string literal. LBraceLoc, if valid, provides the location of
/// the '{' brace. Otherwise, this linkage specification does not
/// have any braces.
Decl *Sema::ActOnStartLinkageSpecification(Scope *S, SourceLocation ExternLoc,
Expr *LangStr,
SourceLocation LBraceLoc) {
StringLiteral *Lit = cast<StringLiteral>(LangStr);
if (!Lit->isAscii()) {
Diag(LangStr->getExprLoc(), diag::err_language_linkage_spec_not_ascii)
<< LangStr->getSourceRange();
return nullptr;
}
StringRef Lang = Lit->getString();
LinkageSpecDecl::LanguageIDs Language;
if (Lang == "C")
Language = LinkageSpecDecl::lang_c;
else if (Lang == "C++")
Language = LinkageSpecDecl::lang_cxx;
else {
Diag(LangStr->getExprLoc(), diag::err_language_linkage_spec_unknown)
<< LangStr->getSourceRange();
return nullptr;
}
// FIXME: Add all the various semantics of linkage specifications
LinkageSpecDecl *D = LinkageSpecDecl::Create(Context, CurContext, ExternLoc,
LangStr->getExprLoc(), Language,
LBraceLoc.isValid());
CurContext->addDecl(D);
PushDeclContext(S, D);
return D;
}
/// ActOnFinishLinkageSpecification - Complete the definition of
/// the C++ linkage specification LinkageSpec. If RBraceLoc is
/// valid, it's the position of the closing '}' brace in a linkage
/// specification that uses braces.
Decl *Sema::ActOnFinishLinkageSpecification(Scope *S,
Decl *LinkageSpec,
SourceLocation RBraceLoc) {
if (RBraceLoc.isValid()) {
LinkageSpecDecl* LSDecl = cast<LinkageSpecDecl>(LinkageSpec);
LSDecl->setRBraceLoc(RBraceLoc);
}
PopDeclContext();
return LinkageSpec;
}
Decl *Sema::ActOnEmptyDeclaration(Scope *S,
AttributeList *AttrList,
SourceLocation SemiLoc) {
Decl *ED = EmptyDecl::Create(Context, CurContext, SemiLoc);
// Attribute declarations appertain to empty declaration so we handle
// them here.
if (AttrList)
ProcessDeclAttributeList(S, ED, AttrList);
CurContext->addDecl(ED);
return ED;
}
/// \brief Perform semantic analysis for the variable declaration that
/// occurs within a C++ catch clause, returning the newly-created
/// variable.
VarDecl *Sema::BuildExceptionDeclaration(Scope *S,
TypeSourceInfo *TInfo,
SourceLocation StartLoc,
SourceLocation Loc,
IdentifierInfo *Name) {
bool Invalid = false;
QualType ExDeclType = TInfo->getType();
// Arrays and functions decay.
if (ExDeclType->isArrayType())
ExDeclType = Context.getArrayDecayedType(ExDeclType);
else if (ExDeclType->isFunctionType())
ExDeclType = Context.getPointerType(ExDeclType);
// C++ 15.3p1: The exception-declaration shall not denote an incomplete type.
// The exception-declaration shall not denote a pointer or reference to an
// incomplete type, other than [cv] void*.
// N2844 forbids rvalue references.
if (!ExDeclType->isDependentType() && ExDeclType->isRValueReferenceType()) {
Diag(Loc, diag::err_catch_rvalue_ref);
Invalid = true;
}
if (ExDeclType->isVariablyModifiedType()) {
Diag(Loc, diag::err_catch_variably_modified) << ExDeclType;
Invalid = true;
}
QualType BaseType = ExDeclType;
int Mode = 0; // 0 for direct type, 1 for pointer, 2 for reference
unsigned DK = diag::err_catch_incomplete;
if (const PointerType *Ptr = BaseType->getAs<PointerType>()) {
BaseType = Ptr->getPointeeType();
Mode = 1;
DK = diag::err_catch_incomplete_ptr;
} else if (const ReferenceType *Ref = BaseType->getAs<ReferenceType>()) {
// For the purpose of error recovery, we treat rvalue refs like lvalue refs.
BaseType = Ref->getPointeeType();
Mode = 2;
DK = diag::err_catch_incomplete_ref;
}
if (!Invalid && (Mode == 0 || !BaseType->isVoidType()) &&
!BaseType->isDependentType() && RequireCompleteType(Loc, BaseType, DK))
Invalid = true;
if (!Invalid && !ExDeclType->isDependentType() &&
RequireNonAbstractType(Loc, ExDeclType,
diag::err_abstract_type_in_decl,
AbstractVariableType))
Invalid = true;
// Only the non-fragile NeXT runtime currently supports C++ catches
// of ObjC types, and no runtime supports catching ObjC types by value.
if (!Invalid && getLangOpts().ObjC1) {
QualType T = ExDeclType;
if (const ReferenceType *RT = T->getAs<ReferenceType>())
T = RT->getPointeeType();
if (T->isObjCObjectType()) {
Diag(Loc, diag::err_objc_object_catch);
Invalid = true;
} else if (T->isObjCObjectPointerType()) {
// FIXME: should this be a test for macosx-fragile specifically?
if (getLangOpts().ObjCRuntime.isFragile())
Diag(Loc, diag::warn_objc_pointer_cxx_catch_fragile);
}
}
VarDecl *ExDecl = VarDecl::Create(Context, CurContext, StartLoc, Loc, Name,
ExDeclType, TInfo, SC_None);
ExDecl->setExceptionVariable(true);
// In ARC, infer 'retaining' for variables of retainable type.
if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(ExDecl))
Invalid = true;
if (!Invalid && !ExDeclType->isDependentType()) {
if (const RecordType *recordType = ExDeclType->getAs<RecordType>()) {
// Insulate this from anything else we might currently be parsing.
EnterExpressionEvaluationContext scope(
*this, ExpressionEvaluationContext::PotentiallyEvaluated);
// C++ [except.handle]p16:
// The object declared in an exception-declaration or, if the
// exception-declaration does not specify a name, a temporary (12.2) is
// copy-initialized (8.5) from the exception object. [...]
// The object is destroyed when the handler exits, after the destruction
// of any automatic objects initialized within the handler.
//
// We just pretend to initialize the object with itself, then make sure
// it can be destroyed later.
QualType initType = Context.getExceptionObjectType(ExDeclType);
InitializedEntity entity =
InitializedEntity::InitializeVariable(ExDecl);
InitializationKind initKind =
InitializationKind::CreateCopy(Loc, SourceLocation());
Expr *opaqueValue =
new (Context) OpaqueValueExpr(Loc, initType, VK_LValue, OK_Ordinary);
InitializationSequence sequence(*this, entity, initKind, opaqueValue);
ExprResult result = sequence.Perform(*this, entity, initKind, opaqueValue);
if (result.isInvalid())
Invalid = true;
else {
// If the constructor used was non-trivial, set this as the
// "initializer".
CXXConstructExpr *construct = result.getAs<CXXConstructExpr>();
if (!construct->getConstructor()->isTrivial()) {
Expr *init = MaybeCreateExprWithCleanups(construct);
ExDecl->setInit(init);
}
// And make sure it's destructable.
FinalizeVarWithDestructor(ExDecl, recordType);
}
}
}
if (Invalid)
ExDecl->setInvalidDecl();
return ExDecl;
}
/// ActOnExceptionDeclarator - Parsed the exception-declarator in a C++ catch
/// handler.
Decl *Sema::ActOnExceptionDeclarator(Scope *S, Declarator &D) {
TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
bool Invalid = D.isInvalidType();
// Check for unexpanded parameter packs.
if (DiagnoseUnexpandedParameterPack(D.getIdentifierLoc(), TInfo,
UPPC_ExceptionType)) {
TInfo = Context.getTrivialTypeSourceInfo(Context.IntTy,
D.getIdentifierLoc());
Invalid = true;
}
IdentifierInfo *II = D.getIdentifier();
if (NamedDecl *PrevDecl = LookupSingleName(S, II, D.getIdentifierLoc(),
LookupOrdinaryName,
ForRedeclaration)) {
// The scope should be freshly made just for us. There is just no way
// it contains any previous declaration, except for function parameters in
// a function-try-block's catch statement.
assert(!S->isDeclScope(PrevDecl));
if (isDeclInScope(PrevDecl, CurContext, S)) {
Diag(D.getIdentifierLoc(), diag::err_redefinition)
<< D.getIdentifier();
Diag(PrevDecl->getLocation(), diag::note_previous_definition);
Invalid = true;
} else if (PrevDecl->isTemplateParameter())
// Maybe we will complain about the shadowed template parameter.
DiagnoseTemplateParameterShadow(D.getIdentifierLoc(), PrevDecl);
}
if (D.getCXXScopeSpec().isSet() && !Invalid) {
Diag(D.getIdentifierLoc(), diag::err_qualified_catch_declarator)
<< D.getCXXScopeSpec().getRange();
Invalid = true;
}
VarDecl *ExDecl = BuildExceptionDeclaration(S, TInfo,
D.getLocStart(),
D.getIdentifierLoc(),
D.getIdentifier());
if (Invalid)
ExDecl->setInvalidDecl();
// Add the exception declaration into this scope.
if (II)
PushOnScopeChains(ExDecl, S);
else
CurContext->addDecl(ExDecl);
ProcessDeclAttributes(S, ExDecl, D);
return ExDecl;
}
Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc,
Expr *AssertExpr,
Expr *AssertMessageExpr,
SourceLocation RParenLoc) {
StringLiteral *AssertMessage =
AssertMessageExpr ? cast<StringLiteral>(AssertMessageExpr) : nullptr;
if (DiagnoseUnexpandedParameterPack(AssertExpr, UPPC_StaticAssertExpression))
return nullptr;
return BuildStaticAssertDeclaration(StaticAssertLoc, AssertExpr,
AssertMessage, RParenLoc, false);
}
Decl *Sema::BuildStaticAssertDeclaration(SourceLocation StaticAssertLoc,
Expr *AssertExpr,
StringLiteral *AssertMessage,
SourceLocation RParenLoc,
bool Failed) {
assert(AssertExpr != nullptr && "Expected non-null condition");
if (!AssertExpr->isTypeDependent() && !AssertExpr->isValueDependent() &&
!Failed) {
// In a static_assert-declaration, the constant-expression shall be a
// constant expression that can be contextually converted to bool.
ExprResult Converted = PerformContextuallyConvertToBool(AssertExpr);
if (Converted.isInvalid())
Failed = true;
llvm::APSInt Cond;
if (!Failed && VerifyIntegerConstantExpression(Converted.get(), &Cond,
diag::err_static_assert_expression_is_not_constant,
/*AllowFold=*/false).isInvalid())
Failed = true;
if (!Failed && !Cond) {
SmallString<256> MsgBuffer;
llvm::raw_svector_ostream Msg(MsgBuffer);
if (AssertMessage)
AssertMessage->printPretty(Msg, nullptr, getPrintingPolicy());
Diag(StaticAssertLoc, diag::err_static_assert_failed)
<< !AssertMessage << Msg.str() << AssertExpr->getSourceRange();
Failed = true;
}
}
ExprResult FullAssertExpr = ActOnFinishFullExpr(AssertExpr, StaticAssertLoc,
/*DiscardedValue*/false,
/*IsConstexpr*/true);
if (FullAssertExpr.isInvalid())
Failed = true;
else
AssertExpr = FullAssertExpr.get();
Decl *Decl = StaticAssertDecl::Create(Context, CurContext, StaticAssertLoc,
AssertExpr, AssertMessage, RParenLoc,
Failed);
CurContext->addDecl(Decl);
return Decl;
}
/// \brief Perform semantic analysis of the given friend type declaration.
///
/// \returns A friend declaration that.
FriendDecl *Sema::CheckFriendTypeDecl(SourceLocation LocStart,
SourceLocation FriendLoc,
TypeSourceInfo *TSInfo) {
assert(TSInfo && "NULL TypeSourceInfo for friend type declaration");
QualType T = TSInfo->getType();
SourceRange TypeRange = TSInfo->getTypeLoc().getLocalSourceRange();
// C++03 [class.friend]p2:
// An elaborated-type-specifier shall be used in a friend declaration
// for a class.*
//
// * The class-key of the elaborated-type-specifier is required.
if (!CodeSynthesisContexts.empty()) {
// Do not complain about the form of friend template types during any kind
// of code synthesis. For template instantiation, we will have complained
// when the template was defined.
} else {
if (!T->isElaboratedTypeSpecifier()) {
// If we evaluated the type to a record type, suggest putting
// a tag in front.
if (const RecordType *RT = T->getAs<RecordType>()) {
RecordDecl *RD = RT->getDecl();
SmallString<16> InsertionText(" ");
InsertionText += RD->getKindName();
Diag(TypeRange.getBegin(),
getLangOpts().CPlusPlus11 ?
diag::warn_cxx98_compat_unelaborated_friend_type :
diag::ext_unelaborated_friend_type)
<< (unsigned) RD->getTagKind()
<< T
<< FixItHint::CreateInsertion(getLocForEndOfToken(FriendLoc),
InsertionText);
} else {
Diag(FriendLoc,
getLangOpts().CPlusPlus11 ?
diag::warn_cxx98_compat_nonclass_type_friend :
diag::ext_nonclass_type_friend)
<< T
<< TypeRange;
}
} else if (T->getAs<EnumType>()) {
Diag(FriendLoc,
getLangOpts().CPlusPlus11 ?
diag::warn_cxx98_compat_enum_friend :
diag::ext_enum_friend)
<< T
<< TypeRange;
}
// C++11 [class.friend]p3:
// A friend declaration that does not declare a function shall have one
// of the following forms:
// friend elaborated-type-specifier ;
// friend simple-type-specifier ;
// friend typename-specifier ;
if (getLangOpts().CPlusPlus11 && LocStart != FriendLoc)
Diag(FriendLoc, diag::err_friend_not_first_in_declaration) << T;
}
// If the type specifier in a friend declaration designates a (possibly
// cv-qualified) class type, that class is declared as a friend; otherwise,
// the friend declaration is ignored.
return FriendDecl::Create(Context, CurContext,
TSInfo->getTypeLoc().getLocStart(), TSInfo,
FriendLoc);
}
/// Handle a friend tag declaration where the scope specifier was
/// templated.
Decl *Sema::ActOnTemplatedFriendTag(Scope *S, SourceLocation FriendLoc,
unsigned TagSpec, SourceLocation TagLoc,
CXXScopeSpec &SS,
IdentifierInfo *Name,
SourceLocation NameLoc,
AttributeList *Attr,
MultiTemplateParamsArg TempParamLists) {
TagTypeKind Kind = TypeWithKeyword::getTagTypeKindForTypeSpec(TagSpec);
bool IsMemberSpecialization = false;
bool Invalid = false;
if (TemplateParameterList *TemplateParams =
MatchTemplateParametersToScopeSpecifier(
TagLoc, NameLoc, SS, nullptr, TempParamLists, /*friend*/ true,
IsMemberSpecialization, Invalid)) {
if (TemplateParams->size() > 0) {
// This is a declaration of a class template.
if (Invalid)
return nullptr;
return CheckClassTemplate(S, TagSpec, TUK_Friend, TagLoc, SS, Name,
NameLoc, Attr, TemplateParams, AS_public,
/*ModulePrivateLoc=*/SourceLocation(),
FriendLoc, TempParamLists.size() - 1,
TempParamLists.data()).get();
} else {
// The "template<>" header is extraneous.
Diag(TemplateParams->getTemplateLoc(), diag::err_template_tag_noparams)
<< TypeWithKeyword::getTagTypeKindName(Kind) << Name;
IsMemberSpecialization = true;
}
}
if (Invalid) return nullptr;
bool isAllExplicitSpecializations = true;
for (unsigned I = TempParamLists.size(); I-- > 0; ) {
if (TempParamLists[I]->size()) {
isAllExplicitSpecializations = false;
break;
}
}
// FIXME: don't ignore attributes.
// If it's explicit specializations all the way down, just forget
// about the template header and build an appropriate non-templated
// friend. TODO: for source fidelity, remember the headers.
if (isAllExplicitSpecializations) {
if (SS.isEmpty()) {
bool Owned = false;
bool IsDependent = false;
return ActOnTag(S, TagSpec, TUK_Friend, TagLoc, SS, Name, NameLoc,
Attr, AS_public,
/*ModulePrivateLoc=*/SourceLocation(),
MultiTemplateParamsArg(), Owned, IsDependent,
/*ScopedEnumKWLoc=*/SourceLocation(),
/*ScopedEnumUsesClassTag=*/false,
/*UnderlyingType=*/TypeResult(),
/*IsTypeSpecifier=*/false,
/*IsTemplateParamOrArg=*/false);
}
NestedNameSpecifierLoc QualifierLoc = SS.getWithLocInContext(Context);
ElaboratedTypeKeyword Keyword
= TypeWithKeyword::getKeywordForTagTypeKind(Kind);
QualType T = CheckTypenameType(Keyword, TagLoc, QualifierLoc,
*Name, NameLoc);
if (T.isNull())
return nullptr;
TypeSourceInfo *TSI = Context.CreateTypeSourceInfo(T);
if (isa<DependentNameType>(T)) {
DependentNameTypeLoc TL =
TSI->getTypeLoc().castAs<DependentNameTypeLoc>();
TL.setElaboratedKeywordLoc(TagLoc);
TL.setQualifierLoc(QualifierLoc);
TL.setNameLoc(NameLoc);
} else {
ElaboratedTypeLoc TL = TSI->getTypeLoc().castAs<ElaboratedTypeLoc>();
TL.setElaboratedKeywordLoc(TagLoc);
TL.setQualifierLoc(QualifierLoc);
TL.getNamedTypeLoc().castAs<TypeSpecTypeLoc>().setNameLoc(NameLoc);
}
FriendDecl *Friend = FriendDecl::Create(Context, CurContext, NameLoc,
TSI, FriendLoc, TempParamLists);
Friend->setAccess(AS_public);
CurContext->addDecl(Friend);
return Friend;
}
assert(SS.isNotEmpty() && "valid templated tag with no SS and no direct?");
// Handle the case of a templated-scope friend class. e.g.
// template <class T> class A<T>::B;
// FIXME: we don't support these right now.
Diag(NameLoc, diag::warn_template_qualified_friend_unsupported)
<< SS.getScopeRep() << SS.getRange() << cast<CXXRecordDecl>(CurContext);
ElaboratedTypeKeyword ETK = TypeWithKeyword::getKeywordForTagTypeKind(Kind);
QualType T = Context.getDependentNameType(ETK, SS.getScopeRep(), Name);
TypeSourceInfo *TSI = Context.CreateTypeSourceInfo(T);
DependentNameTypeLoc TL = TSI->getTypeLoc().castAs<DependentNameTypeLoc>();
TL.setElaboratedKeywordLoc(TagLoc);
TL.setQualifierLoc(SS.getWithLocInContext(Context));
TL.setNameLoc(NameLoc);
FriendDecl *Friend = FriendDecl::Create(Context, CurContext, NameLoc,
TSI, FriendLoc, TempParamLists);
Friend->setAccess(AS_public);
Friend->setUnsupportedFriend(true);
CurContext->addDecl(Friend);
return Friend;
}
/// Handle a friend type declaration. This works in tandem with
/// ActOnTag.
///
/// Notes on friend class templates:
///
/// We generally treat friend class declarations as if they were
/// declaring a class. So, for example, the elaborated type specifier
/// in a friend declaration is required to obey the restrictions of a
/// class-head (i.e. no typedefs in the scope chain), template
/// parameters are required to match up with simple template-ids, &c.
/// However, unlike when declaring a template specialization, it's
/// okay to refer to a template specialization without an empty
/// template parameter declaration, e.g.
/// friend class A<T>::B<unsigned>;
/// We permit this as a special case; if there are any template
/// parameters present at all, require proper matching, i.e.
/// template <> template \<class T> friend class A<int>::B;
Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS,
MultiTemplateParamsArg TempParams) {
SourceLocation Loc = DS.getLocStart();
assert(DS.isFriendSpecified());
assert(DS.getStorageClassSpec() == DeclSpec::SCS_unspecified);
// Try to convert the decl specifier to a type. This works for
// friend templates because ActOnTag never produces a ClassTemplateDecl
// for a TUK_Friend.
Declarator TheDeclarator(DS, Declarator::MemberContext);
TypeSourceInfo *TSI = GetTypeForDeclarator(TheDeclarator, S);
QualType T = TSI->getType();
if (TheDeclarator.isInvalidType())
return nullptr;
if (DiagnoseUnexpandedParameterPack(Loc, TSI, UPPC_FriendDeclaration))
return nullptr;
// This is definitely an error in C++98. It's probably meant to
// be forbidden in C++0x, too, but the specification is just
// poorly written.
//
// The problem is with declarations like the following:
// template <T> friend A<T>::foo;
// where deciding whether a class C is a friend or not now hinges
// on whether there exists an instantiation of A that causes
// 'foo' to equal C. There are restrictions on class-heads
// (which we declare (by fiat) elaborated friend declarations to
// be) that makes this tractable.
//
// FIXME: handle "template <> friend class A<T>;", which
// is possibly well-formed? Who even knows?
if (TempParams.size() && !T->isElaboratedTypeSpecifier()) {
Diag(Loc, diag::err_tagless_friend_type_template)
<< DS.getSourceRange();
return nullptr;
}
// C++98 [class.friend]p1: A friend of a class is a function
// or class that is not a member of the class . . .
// This is fixed in DR77, which just barely didn't make the C++03
// deadline. It's also a very silly restriction that seriously
// affects inner classes and which nobody else seems to implement;
// thus we never diagnose it, not even in -pedantic.
//
// But note that we could warn about it: it's always useless to
// friend one of your own members (it's not, however, worthless to
// friend a member of an arbitrary specialization of your template).
Decl *D;
if (!TempParams.empty())
D = FriendTemplateDecl::Create(Context, CurContext, Loc,
TempParams,
TSI,
DS.getFriendSpecLoc());
else
D = CheckFriendTypeDecl(Loc, DS.getFriendSpecLoc(), TSI);
if (!D)
return nullptr;
D->setAccess(AS_public);
CurContext->addDecl(D);
return D;
}
NamedDecl *Sema::ActOnFriendFunctionDecl(Scope *S, Declarator &D,
MultiTemplateParamsArg TemplateParams) {
const DeclSpec &DS = D.getDeclSpec();
assert(DS.isFriendSpecified());
assert(DS.getStorageClassSpec() == DeclSpec::SCS_unspecified);
SourceLocation Loc = D.getIdentifierLoc();
TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
// C++ [class.friend]p1
// A friend of a class is a function or class....
// Note that this sees through typedefs, which is intended.
// It *doesn't* see through dependent types, which is correct
// according to [temp.arg.type]p3:
// If a declaration acquires a function type through a
// type dependent on a template-parameter and this causes
// a declaration that does not use the syntactic form of a
// function declarator to have a function type, the program
// is ill-formed.
if (!TInfo->getType()->isFunctionType()) {
Diag(Loc, diag::err_unexpected_friend);
// It might be worthwhile to try to recover by creating an
// appropriate declaration.
return nullptr;
}
// C++ [namespace.memdef]p3
// - If a friend declaration in a non-local class first declares a
// class or function, the friend class or function is a member
// of the innermost enclosing namespace.
// - The name of the friend is not found by simple name lookup
// until a matching declaration is provided in that namespace
// scope (either before or after the class declaration granting
// friendship).
// - If a friend function is called, its name may be found by the
// name lookup that considers functions from namespaces and
// classes associated with the types of the function arguments.
// - When looking for a prior declaration of a class or a function
// declared as a friend, scopes outside the innermost enclosing
// namespace scope are not considered.
CXXScopeSpec &SS = D.getCXXScopeSpec();
DeclarationNameInfo NameInfo = GetNameForDeclarator(D);
DeclarationName Name = NameInfo.getName();
assert(Name);
// Check for unexpanded parameter packs.
if (DiagnoseUnexpandedParameterPack(Loc, TInfo, UPPC_FriendDeclaration) ||
DiagnoseUnexpandedParameterPack(NameInfo, UPPC_FriendDeclaration) ||
DiagnoseUnexpandedParameterPack(SS, UPPC_FriendDeclaration))
return nullptr;
// The context we found the declaration in, or in which we should
// create the declaration.
DeclContext *DC;
Scope *DCScope = S;
LookupResult Previous(*this, NameInfo, LookupOrdinaryName,
ForRedeclaration);
// There are five cases here.
// - There's no scope specifier and we're in a local class. Only look
// for functions declared in the immediately-enclosing block scope.
// We recover from invalid scope qualifiers as if they just weren't there.
FunctionDecl *FunctionContainingLocalClass = nullptr;
if ((SS.isInvalid() || !SS.isSet()) &&
(FunctionContainingLocalClass =
cast<CXXRecordDecl>(CurContext)->isLocalClass())) {
// C++11 [class.friend]p11:
// If a friend declaration appears in a local class and the name
// specified is an unqualified name, a prior declaration is
// looked up without considering scopes that are outside the
// innermost enclosing non-class scope. For a friend function
// declaration, if there is no prior declaration, the program is
// ill-formed.
// Find the innermost enclosing non-class scope. This is the block
// scope containing the local class definition (or for a nested class,
// the outer local class).
DCScope = S->getFnParent();
// Look up the function name in the scope.
Previous.clear(LookupLocalFriendName);
LookupName(Previous, S, /*AllowBuiltinCreation*/false);
if (!Previous.empty()) {
// All possible previous declarations must have the same context:
// either they were declared at block scope or they are members of
// one of the enclosing local classes.
DC = Previous.getRepresentativeDecl()->getDeclContext();
} else {
// This is ill-formed, but provide the context that we would have
// declared the function in, if we were permitted to, for error recovery.
DC = FunctionContainingLocalClass;
}
adjustContextForLocalExternDecl(DC);
// C++ [class.friend]p6:
// A function can be defined in a friend declaration of a class if and
// only if the class is a non-local class (9.8), the function name is
// unqualified, and the function has namespace scope.
if (D.isFunctionDefinition()) {
Diag(NameInfo.getBeginLoc(), diag::err_friend_def_in_local_class);
}
// - There's no scope specifier, in which case we just go to the
// appropriate scope and look for a function or function template
// there as appropriate.
} else if (SS.isInvalid() || !SS.isSet()) {
// C++11 [namespace.memdef]p3:
// If the name in a friend declaration is neither qualified nor
// a template-id and the declaration is a function or an
// elaborated-type-specifier, the lookup to determine whether
// the entity has been previously declared shall not consider
// any scopes outside the innermost enclosing namespace.
bool isTemplateId = D.getName().getKind() == UnqualifiedId::IK_TemplateId;
// Find the appropriate context according to the above.
DC = CurContext;
// Skip class contexts. If someone can cite chapter and verse
// for this behavior, that would be nice --- it's what GCC and
// EDG do, and it seems like a reasonable intent, but the spec
// really only says that checks for unqualified existing
// declarations should stop at the nearest enclosing namespace,
// not that they should only consider the nearest enclosing
// namespace.
while (DC->isRecord())
DC = DC->getParent();
DeclContext *LookupDC = DC;
while (LookupDC->isTransparentContext())
LookupDC = LookupDC->getParent();
while (true) {
LookupQualifiedName(Previous, LookupDC);
if (!Previous.empty()) {
DC = LookupDC;
break;
}
if (isTemplateId) {
if (isa<TranslationUnitDecl>(LookupDC)) break;
} else {
if (LookupDC->isFileContext()) break;
}
LookupDC = LookupDC->getParent();
}
DCScope = getScopeForDeclContext(S, DC);
// - There's a non-dependent scope specifier, in which case we
// compute it and do a previous lookup there for a function
// or function template.
} else if (!SS.getScopeRep()->isDependent()) {
DC = computeDeclContext(SS);
if (!DC) return nullptr;
if (RequireCompleteDeclContext(SS, DC)) return nullptr;
LookupQualifiedName(Previous, DC);
// Ignore things found implicitly in the wrong scope.
// TODO: better diagnostics for this case. Suggesting the right
// qualified scope would be nice...
LookupResult::Filter F = Previous.makeFilter();
while (F.hasNext()) {
NamedDecl *D = F.next();
if (!DC->InEnclosingNamespaceSetOf(
D->getDeclContext()->getRedeclContext()))
F.erase();
}
F.done();
if (Previous.empty()) {
D.setInvalidType();
Diag(Loc, diag::err_qualified_friend_not_found)
<< Name << TInfo->getType();
return nullptr;
}
// C++ [class.friend]p1: A friend of a class is a function or
// class that is not a member of the class . . .
if (DC->Equals(CurContext))
Diag(DS.getFriendSpecLoc(),
getLangOpts().CPlusPlus11 ?
diag::warn_cxx98_compat_friend_is_member :
diag::err_friend_is_member);
if (D.isFunctionDefinition()) {
// C++ [class.friend]p6:
// A function can be defined in a friend declaration of a class if and
// only if the class is a non-local class (9.8), the function name is
// unqualified, and the function has namespace scope.
SemaDiagnosticBuilder DB
= Diag(SS.getRange().getBegin(), diag::err_qualified_friend_def);
DB << SS.getScopeRep();
if (DC->isFileContext())
DB << FixItHint::CreateRemoval(SS.getRange());
SS.clear();
}
// - There's a scope specifier that does not match any template
// parameter lists, in which case we use some arbitrary context,
// create a method or method template, and wait for instantiation.
// - There's a scope specifier that does match some template
// parameter lists, which we don't handle right now.
} else {
if (D.isFunctionDefinition()) {
// C++ [class.friend]p6:
// A function can be defined in a friend declaration of a class if and
// only if the class is a non-local class (9.8), the function name is
// unqualified, and the function has namespace scope.
Diag(SS.getRange().getBegin(), diag::err_qualified_friend_def)
<< SS.getScopeRep();
}
DC = CurContext;
assert(isa<CXXRecordDecl>(DC) && "friend declaration not in class?");
}
if (!DC->isRecord()) {
int DiagArg = -1;
switch (D.getName().getKind()) {
case UnqualifiedId::IK_ConstructorTemplateId:
case UnqualifiedId::IK_ConstructorName:
DiagArg = 0;
break;
case UnqualifiedId::IK_DestructorName:
DiagArg = 1;
break;
case UnqualifiedId::IK_ConversionFunctionId:
DiagArg = 2;
break;
case UnqualifiedId::IK_DeductionGuideName:
DiagArg = 3;
break;
case UnqualifiedId::IK_Identifier:
case UnqualifiedId::IK_ImplicitSelfParam:
case UnqualifiedId::IK_LiteralOperatorId:
case UnqualifiedId::IK_OperatorFunctionId:
case UnqualifiedId::IK_TemplateId:
break;
}
// This implies that it has to be an operator or function.
if (DiagArg >= 0) {
Diag(Loc, diag::err_introducing_special_friend) << DiagArg;
return nullptr;
}
}
// FIXME: This is an egregious hack to cope with cases where the scope stack
// does not contain the declaration context, i.e., in an out-of-line
// definition of a class.
Scope FakeDCScope(S, Scope::DeclScope, Diags);
if (!DCScope) {
FakeDCScope.setEntity(DC);
DCScope = &FakeDCScope;
}
bool AddToScope = true;
NamedDecl *ND = ActOnFunctionDeclarator(DCScope, D, DC, TInfo, Previous,
TemplateParams, AddToScope);
if (!ND) return nullptr;
assert(ND->getLexicalDeclContext() == CurContext);
// If we performed typo correction, we might have added a scope specifier
// and changed the decl context.
DC = ND->getDeclContext();
// Add the function declaration to the appropriate lookup tables,
// adjusting the redeclarations list as necessary. We don't
// want to do this yet if the friending class is dependent.
//
// Also update the scope-based lookup if the target context's
// lookup context is in lexical scope.
if (!CurContext->isDependentContext()) {
DC = DC->getRedeclContext();
DC->makeDeclVisibleInContext(ND);
if (Scope *EnclosingScope = getScopeForDeclContext(S, DC))
PushOnScopeChains(ND, EnclosingScope, /*AddToContext=*/ false);
}
FriendDecl *FrD = FriendDecl::Create(Context, CurContext,
D.getIdentifierLoc(), ND,
DS.getFriendSpecLoc());
FrD->setAccess(AS_public);
CurContext->addDecl(FrD);
if (ND->isInvalidDecl()) {
FrD->setInvalidDecl();
} else {
if (DC->isRecord()) CheckFriendAccess(ND);
FunctionDecl *FD;
if (FunctionTemplateDecl *FTD = dyn_cast<FunctionTemplateDecl>(ND))
FD = FTD->getTemplatedDecl();
else
FD = cast<FunctionDecl>(ND);
// C++11 [dcl.fct.default]p4: If a friend declaration specifies a
// default argument expression, that declaration shall be a definition
// and shall be the only declaration of the function or function
// template in the translation unit.
if (functionDeclHasDefaultArgument(FD)) {
// We can't look at FD->getPreviousDecl() because it may not have been set
// if we're in a dependent context. If the function is known to be a
// redeclaration, we will have narrowed Previous down to the right decl.
if (D.isRedeclaration()) {
Diag(FD->getLocation(), diag::err_friend_decl_with_def_arg_redeclared);
Diag(Previous.getRepresentativeDecl()->getLocation(),
diag::note_previous_declaration);
} else if (!D.isFunctionDefinition())
Diag(FD->getLocation(), diag::err_friend_decl_with_def_arg_must_be_def);
}
// Mark templated-scope function declarations as unsupported.
if (FD->getNumTemplateParameterLists() && SS.isValid()) {
Diag(FD->getLocation(), diag::warn_template_qualified_friend_unsupported)
<< SS.getScopeRep() << SS.getRange()
<< cast<CXXRecordDecl>(CurContext);
FrD->setUnsupportedFriend(true);
}
}
return ND;
}
void Sema::SetDeclDeleted(Decl *Dcl, SourceLocation DelLoc) {
AdjustDeclIfTemplate(Dcl);
FunctionDecl *Fn = dyn_cast_or_null<FunctionDecl>(Dcl);
if (!Fn) {
Diag(DelLoc, diag::err_deleted_non_function);
return;
}
// Deleted function does not have a body.
Fn->setWillHaveBody(false);
if (const FunctionDecl *Prev = Fn->getPreviousDecl()) {
// Don't consider the implicit declaration we generate for explicit
// specializations. FIXME: Do not generate these implicit declarations.
if ((Prev->getTemplateSpecializationKind() != TSK_ExplicitSpecialization ||
Prev->getPreviousDecl()) &&
!Prev->isDefined()) {
Diag(DelLoc, diag::err_deleted_decl_not_first);
Diag(Prev->getLocation().isInvalid() ? DelLoc : Prev->getLocation(),
Prev->isImplicit() ? diag::note_previous_implicit_declaration
: diag::note_previous_declaration);
}
// If the declaration wasn't the first, we delete the function anyway for
// recovery.
Fn = Fn->getCanonicalDecl();
}
// dllimport/dllexport cannot be deleted.
if (const InheritableAttr *DLLAttr = getDLLAttr(Fn)) {
Diag(Fn->getLocation(), diag::err_attribute_dll_deleted) << DLLAttr;
Fn->setInvalidDecl();
}
if (Fn->isDeleted())
return;
// See if we're deleting a function which is already known to override a
// non-deleted virtual function.
if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn)) {
bool IssuedDiagnostic = false;
for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
E = MD->end_overridden_methods();
I != E; ++I) {
if (!(*MD->begin_overridden_methods())->isDeleted()) {
if (!IssuedDiagnostic) {
Diag(DelLoc, diag::err_deleted_override) << MD->getDeclName();
IssuedDiagnostic = true;
}
Diag((*I)->getLocation(), diag::note_overridden_virtual_function);
}
}
// If this function was implicitly deleted because it was defaulted,
// explain why it was deleted.
if (IssuedDiagnostic && MD->isDefaulted())
ShouldDeleteSpecialMember(MD, getSpecialMember(MD), nullptr,
/*Diagnose*/true);
}
// C++11 [basic.start.main]p3:
// A program that defines main as deleted [...] is ill-formed.
if (Fn->isMain())
Diag(DelLoc, diag::err_deleted_main);
// C++11 [dcl.fct.def.delete]p4:
// A deleted function is implicitly inline.
Fn->setImplicitlyInline();
Fn->setDeletedAsWritten();
}
void Sema::SetDeclDefaulted(Decl *Dcl, SourceLocation DefaultLoc) {
CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(Dcl);
if (MD) {
if (MD->getParent()->isDependentType()) {
MD->setDefaulted();
MD->setExplicitlyDefaulted();
return;
}
CXXSpecialMember Member = getSpecialMember(MD);
if (Member == CXXInvalid) {
if (!MD->isInvalidDecl())
Diag(DefaultLoc, diag::err_default_special_members);
return;
}
MD->setDefaulted();
MD->setExplicitlyDefaulted();
// Unset that we will have a body for this function. We might not,
// if it turns out to be trivial, and we don't need this marking now
// that we've marked it as defaulted.
MD->setWillHaveBody(false);
// If this definition appears within the record, do the checking when
// the record is complete.
const FunctionDecl *Primary = MD;
if (const FunctionDecl *Pattern = MD->getTemplateInstantiationPattern())
// Ask the template instantiation pattern that actually had the
// '= default' on it.
Primary = Pattern;
// If the method was defaulted on its first declaration, we will have
// already performed the checking in CheckCompletedCXXClass. Such a
// declaration doesn't trigger an implicit definition.
if (Primary->getCanonicalDecl()->isDefaulted())
return;
CheckExplicitlyDefaultedSpecialMember(MD);
if (!MD->isInvalidDecl())
DefineImplicitSpecialMember(*this, MD, DefaultLoc);
} else {
Diag(DefaultLoc, diag::err_default_special_members);
}
}
static void SearchForReturnInStmt(Sema &Self, Stmt *S) {
for (Stmt *SubStmt : S->children()) {
if (!SubStmt)
continue;
if (isa<ReturnStmt>(SubStmt))
Self.Diag(SubStmt->getLocStart(),
diag::err_return_in_constructor_handler);
if (!isa<Expr>(SubStmt))
SearchForReturnInStmt(Self, SubStmt);
}
}
void Sema::DiagnoseReturnInConstructorExceptionHandler(CXXTryStmt *TryBlock) {
for (unsigned I = 0, E = TryBlock->getNumHandlers(); I != E; ++I) {
CXXCatchStmt *Handler = TryBlock->getHandler(I);
SearchForReturnInStmt(*this, Handler);
}
}
bool Sema::CheckOverridingFunctionAttributes(const CXXMethodDecl *New,
const CXXMethodDecl *Old) {
const FunctionType *NewFT = New->getType()->getAs<FunctionType>();
const FunctionType *OldFT = Old->getType()->getAs<FunctionType>();
CallingConv NewCC = NewFT->getCallConv(), OldCC = OldFT->getCallConv();
// If the calling conventions match, everything is fine
if (NewCC == OldCC)
return false;
// If the calling conventions mismatch because the new function is static,
// suppress the calling convention mismatch error; the error about static
// function override (err_static_overrides_virtual from
// Sema::CheckFunctionDeclaration) is more clear.
if (New->getStorageClass() == SC_Static)
return false;
Diag(New->getLocation(),
diag::err_conflicting_overriding_cc_attributes)
<< New->getDeclName() << New->getType() << Old->getType();
Diag(Old->getLocation(), diag::note_overridden_virtual_function);
return true;
}
bool Sema::CheckOverridingFunctionReturnType(const CXXMethodDecl *New,
const CXXMethodDecl *Old) {
QualType NewTy = New->getType()->getAs<FunctionType>()->getReturnType();
QualType OldTy = Old->getType()->getAs<FunctionType>()->getReturnType();
if (Context.hasSameType(NewTy, OldTy) ||
NewTy->isDependentType() || OldTy->isDependentType())
return false;
// Check if the return types are covariant
QualType NewClassTy, OldClassTy;
/// Both types must be pointers or references to classes.
if (const PointerType *NewPT = NewTy->getAs<PointerType>()) {
if (const PointerType *OldPT = OldTy->getAs<PointerType>()) {
NewClassTy = NewPT->getPointeeType();
OldClassTy = OldPT->getPointeeType();
}
} else if (const ReferenceType *NewRT = NewTy->getAs<ReferenceType>()) {
if (const ReferenceType *OldRT = OldTy->getAs<ReferenceType>()) {
if (NewRT->getTypeClass() == OldRT->getTypeClass()) {
NewClassTy = NewRT->getPointeeType();
OldClassTy = OldRT->getPointeeType();
}
}
}
// The return types aren't either both pointers or references to a class type.
if (NewClassTy.isNull()) {
Diag(New->getLocation(),
diag::err_different_return_type_for_overriding_virtual_function)
<< New->getDeclName() << NewTy << OldTy
<< New->getReturnTypeSourceRange();
Diag(Old->getLocation(), diag::note_overridden_virtual_function)
<< Old->getReturnTypeSourceRange();
return true;
}
if (!Context.hasSameUnqualifiedType(NewClassTy, OldClassTy)) {
// C++14 [class.virtual]p8:
// If the class type in the covariant return type of D::f differs from
// that of B::f, the class type in the return type of D::f shall be
// complete at the point of declaration of D::f or shall be the class
// type D.
if (const RecordType *RT = NewClassTy->getAs<RecordType>()) {
if (!RT->isBeingDefined() &&
RequireCompleteType(New->getLocation(), NewClassTy,
diag::err_covariant_return_incomplete,
New->getDeclName()))
return true;
}
// Check if the new class derives from the old class.
if (!IsDerivedFrom(New->getLocation(), NewClassTy, OldClassTy)) {
Diag(New->getLocation(), diag::err_covariant_return_not_derived)
<< New->getDeclName() << NewTy << OldTy
<< New->getReturnTypeSourceRange();
Diag(Old->getLocation(), diag::note_overridden_virtual_function)
<< Old->getReturnTypeSourceRange();
return true;
}
// Check if we the conversion from derived to base is valid.
if (CheckDerivedToBaseConversion(
NewClassTy, OldClassTy,
diag::err_covariant_return_inaccessible_base,
diag::err_covariant_return_ambiguous_derived_to_base_conv,
New->getLocation(), New->getReturnTypeSourceRange(),
New->getDeclName(), nullptr)) {
// FIXME: this note won't trigger for delayed access control
// diagnostics, and it's impossible to get an undelayed error
// here from access control during the original parse because
// the ParsingDeclSpec/ParsingDeclarator are still in scope.
Diag(Old->getLocation(), diag::note_overridden_virtual_function)
<< Old->getReturnTypeSourceRange();
return true;
}
}
// The qualifiers of the return types must be the same.
if (NewTy.getLocalCVRQualifiers() != OldTy.getLocalCVRQualifiers()) {
Diag(New->getLocation(),
diag::err_covariant_return_type_different_qualifications)
<< New->getDeclName() << NewTy << OldTy
<< New->getReturnTypeSourceRange();
Diag(Old->getLocation(), diag::note_overridden_virtual_function)
<< Old->getReturnTypeSourceRange();
return true;
}
// The new class type must have the same or less qualifiers as the old type.
if (NewClassTy.isMoreQualifiedThan(OldClassTy)) {
Diag(New->getLocation(),
diag::err_covariant_return_type_class_type_more_qualified)
<< New->getDeclName() << NewTy << OldTy
<< New->getReturnTypeSourceRange();
Diag(Old->getLocation(), diag::note_overridden_virtual_function)
<< Old->getReturnTypeSourceRange();
return true;
}
return false;
}
/// \brief Mark the given method pure.
///
/// \param Method the method to be marked pure.
///
/// \param InitRange the source range that covers the "0" initializer.
bool Sema::CheckPureMethod(CXXMethodDecl *Method, SourceRange InitRange) {
SourceLocation EndLoc = InitRange.getEnd();
if (EndLoc.isValid())
Method->setRangeEnd(EndLoc);
if (Method->isVirtual() || Method->getParent()->isDependentContext()) {
Method->setPure();
return false;
}
if (!Method->isInvalidDecl())
Diag(Method->getLocation(), diag::err_non_virtual_pure)
<< Method->getDeclName() << InitRange;
return true;
}
void Sema::ActOnPureSpecifier(Decl *D, SourceLocation ZeroLoc) {
if (D->getFriendObjectKind())
Diag(D->getLocation(), diag::err_pure_friend);
else if (auto *M = dyn_cast<CXXMethodDecl>(D))
CheckPureMethod(M, ZeroLoc);
else
Diag(D->getLocation(), diag::err_illegal_initializer);
}
/// \brief Determine whether the given declaration is a static data member.
static bool isStaticDataMember(const Decl *D) {
if (const VarDecl *Var = dyn_cast_or_null<VarDecl>(D))
return Var->isStaticDataMember();
return false;
}
/// ActOnCXXEnterDeclInitializer - Invoked when we are about to parse
/// an initializer for the out-of-line declaration 'Dcl'. The scope
/// is a fresh scope pushed for just this purpose.
///
/// After this method is called, according to [C++ 3.4.1p13], if 'Dcl' is a
/// static data member of class X, names should be looked up in the scope of
/// class X.
void Sema::ActOnCXXEnterDeclInitializer(Scope *S, Decl *D) {
// If there is no declaration, there was an error parsing it.
if (!D || D->isInvalidDecl())
return;
// We will always have a nested name specifier here, but this declaration
// might not be out of line if the specifier names the current namespace:
// extern int n;
// int ::n = 0;
if (D->isOutOfLine())
EnterDeclaratorContext(S, D->getDeclContext());
// If we are parsing the initializer for a static data member, push a
// new expression evaluation context that is associated with this static
// data member.
if (isStaticDataMember(D))
PushExpressionEvaluationContext(
ExpressionEvaluationContext::PotentiallyEvaluated, D);
}
/// ActOnCXXExitDeclInitializer - Invoked after we are finished parsing an
/// initializer for the out-of-line declaration 'D'.
void Sema::ActOnCXXExitDeclInitializer(Scope *S, Decl *D) {
// If there is no declaration, there was an error parsing it.
if (!D || D->isInvalidDecl())
return;
if (isStaticDataMember(D))
PopExpressionEvaluationContext();
if (D->isOutOfLine())
ExitDeclaratorContext(S);
}
/// ActOnCXXConditionDeclarationExpr - Parsed a condition declaration of a
/// C++ if/switch/while/for statement.
/// e.g: "if (int x = f()) {...}"
DeclResult Sema::ActOnCXXConditionDeclaration(Scope *S, Declarator &D) {
// C++ 6.4p2:
// The declarator shall not specify a function or an array.
// The type-specifier-seq shall not contain typedef and shall not declare a
// new class or enumeration.
assert(D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef &&
"Parser allowed 'typedef' as storage class of condition decl.");
Decl *Dcl = ActOnDeclarator(S, D);
if (!Dcl)
return true;
if (isa<FunctionDecl>(Dcl)) { // The declarator shall not specify a function.
Diag(Dcl->getLocation(), diag::err_invalid_use_of_function_type)
<< D.getSourceRange();
return true;
}
return Dcl;
}
void Sema::LoadExternalVTableUses() {
if (!ExternalSource)
return;
SmallVector<ExternalVTableUse, 4> VTables;
ExternalSource->ReadUsedVTables(VTables);
SmallVector<VTableUse, 4> NewUses;
for (unsigned I = 0, N = VTables.size(); I != N; ++I) {
llvm::DenseMap<CXXRecordDecl *, bool>::iterator Pos
= VTablesUsed.find(VTables[I].Record);
// Even if a definition wasn't required before, it may be required now.
if (Pos != VTablesUsed.end()) {
if (!Pos->second && VTables[I].DefinitionRequired)
Pos->second = true;
continue;
}
VTablesUsed[VTables[I].Record] = VTables[I].DefinitionRequired;
NewUses.push_back(VTableUse(VTables[I].Record, VTables[I].Location));
}
VTableUses.insert(VTableUses.begin(), NewUses.begin(), NewUses.end());
}
void Sema::MarkVTableUsed(SourceLocation Loc, CXXRecordDecl *Class,
bool DefinitionRequired) {
// Ignore any vtable uses in unevaluated operands or for classes that do
// not have a vtable.
if (!Class->isDynamicClass() || Class->isDependentContext() ||
CurContext->isDependentContext() || isUnevaluatedContext())
return;
// Try to insert this class into the map.
LoadExternalVTableUses();
Class = cast<CXXRecordDecl>(Class->getCanonicalDecl());
std::pair<llvm::DenseMap<CXXRecordDecl *, bool>::iterator, bool>
Pos = VTablesUsed.insert(std::make_pair(Class, DefinitionRequired));
if (!Pos.second) {
// If we already had an entry, check to see if we are promoting this vtable
// to require a definition. If so, we need to reappend to the VTableUses
// list, since we may have already processed the first entry.
if (DefinitionRequired && !Pos.first->second) {
Pos.first->second = true;
} else {
// Otherwise, we can early exit.
return;
}
} else {
// The Microsoft ABI requires that we perform the destructor body
// checks (i.e. operator delete() lookup) when the vtable is marked used, as
// the deleting destructor is emitted with the vtable, not with the
// destructor definition as in the Itanium ABI.
if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
CXXDestructorDecl *DD = Class->getDestructor();
if (DD && DD->isVirtual() && !DD->isDeleted()) {
if (Class->hasUserDeclaredDestructor() && !DD->isDefined()) {
// If this is an out-of-line declaration, marking it referenced will
// not do anything. Manually call CheckDestructor to look up operator
// delete().
ContextRAII SavedContext(*this, DD);
CheckDestructor(DD);
} else {
MarkFunctionReferenced(Loc, Class->getDestructor());
}
}
}
}
// Local classes need to have their virtual members marked
// immediately. For all other classes, we mark their virtual members
// at the end of the translation unit.
if (Class->isLocalClass())
MarkVirtualMembersReferenced(Loc, Class);
else
VTableUses.push_back(std::make_pair(Class, Loc));
}
bool Sema::DefineUsedVTables() {
LoadExternalVTableUses();
if (VTableUses.empty())
return false;
// Note: The VTableUses vector could grow as a result of marking
// the members of a class as "used", so we check the size each
// time through the loop and prefer indices (which are stable) to
// iterators (which are not).
bool DefinedAnything = false;
for (unsigned I = 0; I != VTableUses.size(); ++I) {
CXXRecordDecl *Class = VTableUses[I].first->getDefinition();
if (!Class)
continue;
TemplateSpecializationKind ClassTSK =
Class->getTemplateSpecializationKind();
SourceLocation Loc = VTableUses[I].second;
bool DefineVTable = true;
// If this class has a key function, but that key function is
// defined in another translation unit, we don't need to emit the
// vtable even though we're using it.
const CXXMethodDecl *KeyFunction = Context.getCurrentKeyFunction(Class);
if (KeyFunction && !KeyFunction->hasBody()) {
// The key function is in another translation unit.
DefineVTable = false;
TemplateSpecializationKind TSK =
KeyFunction->getTemplateSpecializationKind();
assert(TSK != TSK_ExplicitInstantiationDefinition &&
TSK != TSK_ImplicitInstantiation &&
"Instantiations don't have key functions");
(void)TSK;
} else if (!KeyFunction) {
// If we have a class with no key function that is the subject
// of an explicit instantiation declaration, suppress the
// vtable; it will live with the explicit instantiation
// definition.
bool IsExplicitInstantiationDeclaration =
ClassTSK == TSK_ExplicitInstantiationDeclaration;
for (auto R : Class->redecls()) {
TemplateSpecializationKind TSK
= cast<CXXRecordDecl>(R)->getTemplateSpecializationKind();
if (TSK == TSK_ExplicitInstantiationDeclaration)
IsExplicitInstantiationDeclaration = true;
else if (TSK == TSK_ExplicitInstantiationDefinition) {
IsExplicitInstantiationDeclaration = false;
break;
}
}
if (IsExplicitInstantiationDeclaration)
DefineVTable = false;
}
// The exception specifications for all virtual members may be needed even
// if we are not providing an authoritative form of the vtable in this TU.
// We may choose to emit it available_externally anyway.
if (!DefineVTable) {
MarkVirtualMemberExceptionSpecsNeeded(Loc, Class);
continue;
}
// Mark all of the virtual members of this class as referenced, so
// that we can build a vtable. Then, tell the AST consumer that a
// vtable for this class is required.
DefinedAnything = true;
MarkVirtualMembersReferenced(Loc, Class);
CXXRecordDecl *Canonical = cast<CXXRecordDecl>(Class->getCanonicalDecl());
if (VTablesUsed[Canonical])
Consumer.HandleVTable(Class);
// Warn if we're emitting a weak vtable. The vtable will be weak if there is
// no key function or the key function is inlined. Don't warn in C++ ABIs
// that lack key functions, since the user won't be able to make one.
if (Context.getTargetInfo().getCXXABI().hasKeyFunctions() &&
Class->isExternallyVisible() && ClassTSK != TSK_ImplicitInstantiation) {
const FunctionDecl *KeyFunctionDef = nullptr;
if (!KeyFunction || (KeyFunction->hasBody(KeyFunctionDef) &&
KeyFunctionDef->isInlined())) {
Diag(Class->getLocation(),
ClassTSK == TSK_ExplicitInstantiationDefinition
? diag::warn_weak_template_vtable
: diag::warn_weak_vtable)
<< Class;
}
}
}
VTableUses.clear();
return DefinedAnything;
}
void Sema::MarkVirtualMemberExceptionSpecsNeeded(SourceLocation Loc,
const CXXRecordDecl *RD) {
for (const auto *I : RD->methods())
if (I->isVirtual() && !I->isPure())
ResolveExceptionSpec(Loc, I->getType()->castAs<FunctionProtoType>());
}
void Sema::MarkVirtualMembersReferenced(SourceLocation Loc,
const CXXRecordDecl *RD) {
// Mark all functions which will appear in RD's vtable as used.
CXXFinalOverriderMap FinalOverriders;
RD->getFinalOverriders(FinalOverriders);
for (CXXFinalOverriderMap::const_iterator I = FinalOverriders.begin(),
E = FinalOverriders.end();
I != E; ++I) {
for (OverridingMethods::const_iterator OI = I->second.begin(),
OE = I->second.end();
OI != OE; ++OI) {
assert(OI->second.size() > 0 && "no final overrider");
CXXMethodDecl *Overrider = OI->second.front().Method;
// C++ [basic.def.odr]p2:
// [...] A virtual member function is used if it is not pure. [...]
if (!Overrider->isPure())
MarkFunctionReferenced(Loc, Overrider);
}
}
// Only classes that have virtual bases need a VTT.
if (RD->getNumVBases() == 0)
return;
for (const auto &I : RD->bases()) {
const CXXRecordDecl *Base =
cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
if (Base->getNumVBases() == 0)
continue;
MarkVirtualMembersReferenced(Loc, Base);
}
}
/// SetIvarInitializers - This routine builds initialization ASTs for the
/// Objective-C implementation whose ivars need be initialized.
void Sema::SetIvarInitializers(ObjCImplementationDecl *ObjCImplementation) {
if (!getLangOpts().CPlusPlus)
return;
if (ObjCInterfaceDecl *OID = ObjCImplementation->getClassInterface()) {
SmallVector<ObjCIvarDecl*, 8> ivars;
CollectIvarsToConstructOrDestruct(OID, ivars);
if (ivars.empty())
return;
SmallVector<CXXCtorInitializer*, 32> AllToInit;
for (unsigned i = 0; i < ivars.size(); i++) {
FieldDecl *Field = ivars[i];
if (Field->isInvalidDecl())
continue;
CXXCtorInitializer *Member;
InitializedEntity InitEntity = InitializedEntity::InitializeMember(Field);
InitializationKind InitKind =
InitializationKind::CreateDefault(ObjCImplementation->getLocation());
InitializationSequence InitSeq(*this, InitEntity, InitKind, None);
ExprResult MemberInit =
InitSeq.Perform(*this, InitEntity, InitKind, None);
MemberInit = MaybeCreateExprWithCleanups(MemberInit);
// Note, MemberInit could actually come back empty if no initialization
// is required (e.g., because it would call a trivial default constructor)
if (!MemberInit.get() || MemberInit.isInvalid())
continue;
Member =
new (Context) CXXCtorInitializer(Context, Field, SourceLocation(),
SourceLocation(),
MemberInit.getAs<Expr>(),
SourceLocation());
AllToInit.push_back(Member);
// Be sure that the destructor is accessible and is marked as referenced.
if (const RecordType *RecordTy =
Context.getBaseElementType(Field->getType())
->getAs<RecordType>()) {
CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
if (CXXDestructorDecl *Destructor = LookupDestructor(RD)) {
MarkFunctionReferenced(Field->getLocation(), Destructor);
CheckDestructorAccess(Field->getLocation(), Destructor,
PDiag(diag::err_access_dtor_ivar)
<< Context.getBaseElementType(Field->getType()));
}
}
}
ObjCImplementation->setIvarInitializers(Context,
AllToInit.data(), AllToInit.size());
}
}
static
void DelegatingCycleHelper(CXXConstructorDecl* Ctor,
llvm::SmallSet<CXXConstructorDecl*, 4> &Valid,
llvm::SmallSet<CXXConstructorDecl*, 4> &Invalid,
llvm::SmallSet<CXXConstructorDecl*, 4> &Current,
Sema &S) {
if (Ctor->isInvalidDecl())
return;
CXXConstructorDecl *Target = Ctor->getTargetConstructor();
// Target may not be determinable yet, for instance if this is a dependent
// call in an uninstantiated template.
if (Target) {
const FunctionDecl *FNTarget = nullptr;
(void)Target->hasBody(FNTarget);
Target = const_cast<CXXConstructorDecl*>(
cast_or_null<CXXConstructorDecl>(FNTarget));
}
CXXConstructorDecl *Canonical = Ctor->getCanonicalDecl(),
// Avoid dereferencing a null pointer here.
*TCanonical = Target? Target->getCanonicalDecl() : nullptr;
if (!Current.insert(Canonical).second)
return;
// We know that beyond here, we aren't chaining into a cycle.
if (!Target || !Target->isDelegatingConstructor() ||
Target->isInvalidDecl() || Valid.count(TCanonical)) {
Valid.insert(Current.begin(), Current.end());
Current.clear();
// We've hit a cycle.
} else if (TCanonical == Canonical || Invalid.count(TCanonical) ||
Current.count(TCanonical)) {
// If we haven't diagnosed this cycle yet, do so now.
if (!Invalid.count(TCanonical)) {
S.Diag((*Ctor->init_begin())->getSourceLocation(),
diag::warn_delegating_ctor_cycle)
<< Ctor;
// Don't add a note for a function delegating directly to itself.
if (TCanonical != Canonical)
S.Diag(Target->getLocation(), diag::note_it_delegates_to);
CXXConstructorDecl *C = Target;
while (C->getCanonicalDecl() != Canonical) {
const FunctionDecl *FNTarget = nullptr;
(void)C->getTargetConstructor()->hasBody(FNTarget);
assert(FNTarget && "Ctor cycle through bodiless function");
C = const_cast<CXXConstructorDecl*>(
cast<CXXConstructorDecl>(FNTarget));
S.Diag(C->getLocation(), diag::note_which_delegates_to);
}
}
Invalid.insert(Current.begin(), Current.end());
Current.clear();
} else {
DelegatingCycleHelper(Target, Valid, Invalid, Current, S);
}
}
void Sema::CheckDelegatingCtorCycles() {
llvm::SmallSet<CXXConstructorDecl*, 4> Valid, Invalid, Current;
for (DelegatingCtorDeclsType::iterator
I = DelegatingCtorDecls.begin(ExternalSource),
E = DelegatingCtorDecls.end();
I != E; ++I)
DelegatingCycleHelper(*I, Valid, Invalid, Current, *this);
for (llvm::SmallSet<CXXConstructorDecl *, 4>::iterator CI = Invalid.begin(),
CE = Invalid.end();
CI != CE; ++CI)
(*CI)->setInvalidDecl();
}
namespace {
/// \brief AST visitor that finds references to the 'this' expression.
class FindCXXThisExpr : public RecursiveASTVisitor<FindCXXThisExpr> {
Sema &S;
public:
explicit FindCXXThisExpr(Sema &S) : S(S) { }
bool VisitCXXThisExpr(CXXThisExpr *E) {
S.Diag(E->getLocation(), diag::err_this_static_member_func)
<< E->isImplicit();
return false;
}
};
}
bool Sema::checkThisInStaticMemberFunctionType(CXXMethodDecl *Method) {
TypeSourceInfo *TSInfo = Method->getTypeSourceInfo();
if (!TSInfo)
return false;
TypeLoc TL = TSInfo->getTypeLoc();
FunctionProtoTypeLoc ProtoTL = TL.getAs<FunctionProtoTypeLoc>();
if (!ProtoTL)
return false;
// C++11 [expr.prim.general]p3:
// [The expression this] shall not appear before the optional
// cv-qualifier-seq and it shall not appear within the declaration of a
// static member function (although its type and value category are defined
// within a static member function as they are within a non-static member
// function). [ Note: this is because declaration matching does not occur
// until the complete declarator is known. - end note ]
const FunctionProtoType *Proto = ProtoTL.getTypePtr();
FindCXXThisExpr Finder(*this);
// If the return type came after the cv-qualifier-seq, check it now.
if (Proto->hasTrailingReturn() &&
!Finder.TraverseTypeLoc(ProtoTL.getReturnLoc()))
return true;
// Check the exception specification.
if (checkThisInStaticMemberFunctionExceptionSpec(Method))
return true;
return checkThisInStaticMemberFunctionAttributes(Method);
}
bool Sema::checkThisInStaticMemberFunctionExceptionSpec(CXXMethodDecl *Method) {
TypeSourceInfo *TSInfo = Method->getTypeSourceInfo();
if (!TSInfo)
return false;
TypeLoc TL = TSInfo->getTypeLoc();
FunctionProtoTypeLoc ProtoTL = TL.getAs<FunctionProtoTypeLoc>();
if (!ProtoTL)
return false;
const FunctionProtoType *Proto = ProtoTL.getTypePtr();
FindCXXThisExpr Finder(*this);
switch (Proto->getExceptionSpecType()) {
case EST_Unparsed:
case EST_Uninstantiated:
case EST_Unevaluated:
case EST_BasicNoexcept:
case EST_DynamicNone:
case EST_MSAny:
case EST_None:
break;
case EST_ComputedNoexcept:
if (!Finder.TraverseStmt(Proto->getNoexceptExpr()))
return true;
LLVM_FALLTHROUGH;
case EST_Dynamic:
for (const auto &E : Proto->exceptions()) {
if (!Finder.TraverseType(E))
return true;
}
break;
}
return false;
}
bool Sema::checkThisInStaticMemberFunctionAttributes(CXXMethodDecl *Method) {
FindCXXThisExpr Finder(*this);
// Check attributes.
for (const auto *A : Method->attrs()) {
// FIXME: This should be emitted by tblgen.
Expr *Arg = nullptr;
ArrayRef<Expr *> Args;
if (const auto *G = dyn_cast<GuardedByAttr>(A))
Arg = G->getArg();
else if (const auto *G = dyn_cast<PtGuardedByAttr>(A))
Arg = G->getArg();
else if (const auto *AA = dyn_cast<AcquiredAfterAttr>(A))
Args = llvm::makeArrayRef(AA->args_begin(), AA->args_size());
else if (const auto *AB = dyn_cast<AcquiredBeforeAttr>(A))
Args = llvm::makeArrayRef(AB->args_begin(), AB->args_size());
else if (const auto *ETLF = dyn_cast<ExclusiveTrylockFunctionAttr>(A)) {
Arg = ETLF->getSuccessValue();
Args = llvm::makeArrayRef(ETLF->args_begin(), ETLF->args_size());
} else if (const auto *STLF = dyn_cast<SharedTrylockFunctionAttr>(A)) {
Arg = STLF->getSuccessValue();
Args = llvm::makeArrayRef(STLF->args_begin(), STLF->args_size());
} else if (const auto *LR = dyn_cast<LockReturnedAttr>(A))
Arg = LR->getArg();
else if (const auto *LE = dyn_cast<LocksExcludedAttr>(A))
Args = llvm::makeArrayRef(LE->args_begin(), LE->args_size());
else if (const auto *RC = dyn_cast<RequiresCapabilityAttr>(A))
Args = llvm::makeArrayRef(RC->args_begin(), RC->args_size());
else if (const auto *AC = dyn_cast<AcquireCapabilityAttr>(A))
Args = llvm::makeArrayRef(AC->args_begin(), AC->args_size());
else if (const auto *AC = dyn_cast<TryAcquireCapabilityAttr>(A))
Args = llvm::makeArrayRef(AC->args_begin(), AC->args_size());
else if (const auto *RC = dyn_cast<ReleaseCapabilityAttr>(A))
Args = llvm::makeArrayRef(RC->args_begin(), RC->args_size());
if (Arg && !Finder.TraverseStmt(Arg))
return true;
for (unsigned I = 0, N = Args.size(); I != N; ++I) {
if (!Finder.TraverseStmt(Args[I]))
return true;
}
}
return false;
}
void Sema::checkExceptionSpecification(
bool IsTopLevel, ExceptionSpecificationType EST,
ArrayRef<ParsedType> DynamicExceptions,
ArrayRef<SourceRange> DynamicExceptionRanges, Expr *NoexceptExpr,
SmallVectorImpl<QualType> &Exceptions,
FunctionProtoType::ExceptionSpecInfo &ESI) {
Exceptions.clear();
ESI.Type = EST;
if (EST == EST_Dynamic) {
Exceptions.reserve(DynamicExceptions.size());
for (unsigned ei = 0, ee = DynamicExceptions.size(); ei != ee; ++ei) {
// FIXME: Preserve type source info.
QualType ET = GetTypeFromParser(DynamicExceptions[ei]);
if (IsTopLevel) {
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
collectUnexpandedParameterPacks(ET, Unexpanded);
if (!Unexpanded.empty()) {
DiagnoseUnexpandedParameterPacks(
DynamicExceptionRanges[ei].getBegin(), UPPC_ExceptionType,
Unexpanded);
continue;
}
}
// Check that the type is valid for an exception spec, and
// drop it if not.
if (!CheckSpecifiedExceptionType(ET, DynamicExceptionRanges[ei]))
Exceptions.push_back(ET);
}
ESI.Exceptions = Exceptions;
return;
}
if (EST == EST_ComputedNoexcept) {
// If an error occurred, there's no expression here.
if (NoexceptExpr) {
assert((NoexceptExpr->isTypeDependent() ||
NoexceptExpr->getType()->getCanonicalTypeUnqualified() ==
Context.BoolTy) &&
"Parser should have made sure that the expression is boolean");
if (IsTopLevel && NoexceptExpr &&
DiagnoseUnexpandedParameterPack(NoexceptExpr)) {
ESI.Type = EST_BasicNoexcept;
return;
}
if (!NoexceptExpr->isValueDependent())
NoexceptExpr = VerifyIntegerConstantExpression(NoexceptExpr, nullptr,
diag::err_noexcept_needs_constant_expression,
/*AllowFold*/ false).get();
ESI.NoexceptExpr = NoexceptExpr;
}
return;
}
}
void Sema::actOnDelayedExceptionSpecification(Decl *MethodD,
ExceptionSpecificationType EST,
SourceRange SpecificationRange,
ArrayRef<ParsedType> DynamicExceptions,
ArrayRef<SourceRange> DynamicExceptionRanges,
Expr *NoexceptExpr) {
if (!MethodD)
return;
// Dig out the method we're referring to.
if (FunctionTemplateDecl *FunTmpl = dyn_cast<FunctionTemplateDecl>(MethodD))
MethodD = FunTmpl->getTemplatedDecl();
CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(MethodD);
if (!Method)
return;
// Check the exception specification.
llvm::SmallVector<QualType, 4> Exceptions;
FunctionProtoType::ExceptionSpecInfo ESI;
checkExceptionSpecification(/*IsTopLevel*/true, EST, DynamicExceptions,
DynamicExceptionRanges, NoexceptExpr, Exceptions,
ESI);
// Update the exception specification on the function type.
Context.adjustExceptionSpec(Method, ESI, /*AsWritten*/true);
if (Method->isStatic())
checkThisInStaticMemberFunctionExceptionSpec(Method);
if (Method->isVirtual()) {
// Check overrides, which we previously had to delay.
for (CXXMethodDecl::method_iterator O = Method->begin_overridden_methods(),
OEnd = Method->end_overridden_methods();
O != OEnd; ++O)
CheckOverridingFunctionExceptionSpec(Method, *O);
}
}
/// HandleMSProperty - Analyze a __delcspec(property) field of a C++ class.
///
MSPropertyDecl *Sema::HandleMSProperty(Scope *S, RecordDecl *Record,
SourceLocation DeclStart,
Declarator &D, Expr *BitWidth,
InClassInitStyle InitStyle,
AccessSpecifier AS,
AttributeList *MSPropertyAttr) {
IdentifierInfo *II = D.getIdentifier();
if (!II) {
Diag(DeclStart, diag::err_anonymous_property);
return nullptr;
}
SourceLocation Loc = D.getIdentifierLoc();
TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
QualType T = TInfo->getType();
if (getLangOpts().CPlusPlus) {
CheckExtraCXXDefaultArguments(D);
if (DiagnoseUnexpandedParameterPack(D.getIdentifierLoc(), TInfo,
UPPC_DataMemberType)) {
D.setInvalidType();
T = Context.IntTy;
TInfo = Context.getTrivialTypeSourceInfo(T, Loc);
}
}
DiagnoseFunctionSpecifiers(D.getDeclSpec());
if (D.getDeclSpec().isInlineSpecified())
Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
<< getLangOpts().CPlusPlus1z;
if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec())
Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
diag::err_invalid_thread)
<< DeclSpec::getSpecifierName(TSCS);
// Check to see if this name was declared as a member previously
NamedDecl *PrevDecl = nullptr;
LookupResult Previous(*this, II, Loc, LookupMemberName, ForRedeclaration);
LookupName(Previous, S);
switch (Previous.getResultKind()) {
case LookupResult::Found:
case LookupResult::FoundUnresolvedValue:
PrevDecl = Previous.getAsSingle<NamedDecl>();
break;
case LookupResult::FoundOverloaded:
PrevDecl = Previous.getRepresentativeDecl();
break;
case LookupResult::NotFound:
case LookupResult::NotFoundInCurrentInstantiation:
case LookupResult::Ambiguous:
break;
}
if (PrevDecl && PrevDecl->isTemplateParameter()) {
// Maybe we will complain about the shadowed template parameter.
DiagnoseTemplateParameterShadow(D.getIdentifierLoc(), PrevDecl);
// Just pretend that we didn't see the previous declaration.
PrevDecl = nullptr;
}
if (PrevDecl && !isDeclInScope(PrevDecl, Record, S))
PrevDecl = nullptr;
SourceLocation TSSL = D.getLocStart();
const AttributeList::PropertyData &Data = MSPropertyAttr->getPropertyData();
MSPropertyDecl *NewPD = MSPropertyDecl::Create(
Context, Record, Loc, II, T, TInfo, TSSL, Data.GetterId, Data.SetterId);
ProcessDeclAttributes(TUScope, NewPD, D);
NewPD->setAccess(AS);
if (NewPD->isInvalidDecl())
Record->setInvalidDecl();
if (D.getDeclSpec().isModulePrivateSpecified())
NewPD->setModulePrivate();
if (NewPD->isInvalidDecl() && PrevDecl) {
// Don't introduce NewFD into scope; there's already something
// with the same name in the same scope.
} else if (II) {
PushOnScopeChains(NewPD, S);
} else
Record->addDecl(NewPD);
return NewPD;
}
Index: head/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp (revision 322855)
@@ -1,2681 +1,2681 @@
//===--- SemaObjCProperty.cpp - Semantic Analysis for ObjC @property ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements semantic analysis for Objective C @property and
// @synthesize declarations.
//
//===----------------------------------------------------------------------===//
#include "clang/Sema/SemaInternal.h"
#include "clang/AST/ASTMutationListener.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprObjC.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Initialization.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallString.h"
using namespace clang;
//===----------------------------------------------------------------------===//
// Grammar actions.
//===----------------------------------------------------------------------===//
/// getImpliedARCOwnership - Given a set of property attributes and a
/// type, infer an expected lifetime. The type's ownership qualification
/// is not considered.
///
/// Returns OCL_None if the attributes as stated do not imply an ownership.
/// Never returns OCL_Autoreleasing.
static Qualifiers::ObjCLifetime getImpliedARCOwnership(
ObjCPropertyDecl::PropertyAttributeKind attrs,
QualType type) {
// retain, strong, copy, weak, and unsafe_unretained are only legal
// on properties of retainable pointer type.
if (attrs & (ObjCPropertyDecl::OBJC_PR_retain |
ObjCPropertyDecl::OBJC_PR_strong |
ObjCPropertyDecl::OBJC_PR_copy)) {
return Qualifiers::OCL_Strong;
} else if (attrs & ObjCPropertyDecl::OBJC_PR_weak) {
return Qualifiers::OCL_Weak;
} else if (attrs & ObjCPropertyDecl::OBJC_PR_unsafe_unretained) {
return Qualifiers::OCL_ExplicitNone;
}
// assign can appear on other types, so we have to check the
// property type.
if (attrs & ObjCPropertyDecl::OBJC_PR_assign &&
type->isObjCRetainableType()) {
return Qualifiers::OCL_ExplicitNone;
}
return Qualifiers::OCL_None;
}
/// Check the internal consistency of a property declaration with
/// an explicit ownership qualifier.
static void checkPropertyDeclWithOwnership(Sema &S,
ObjCPropertyDecl *property) {
if (property->isInvalidDecl()) return;
ObjCPropertyDecl::PropertyAttributeKind propertyKind
= property->getPropertyAttributes();
Qualifiers::ObjCLifetime propertyLifetime
= property->getType().getObjCLifetime();
assert(propertyLifetime != Qualifiers::OCL_None);
Qualifiers::ObjCLifetime expectedLifetime
= getImpliedARCOwnership(propertyKind, property->getType());
if (!expectedLifetime) {
// We have a lifetime qualifier but no dominating property
// attribute. That's okay, but restore reasonable invariants by
// setting the property attribute according to the lifetime
// qualifier.
ObjCPropertyDecl::PropertyAttributeKind attr;
if (propertyLifetime == Qualifiers::OCL_Strong) {
attr = ObjCPropertyDecl::OBJC_PR_strong;
} else if (propertyLifetime == Qualifiers::OCL_Weak) {
attr = ObjCPropertyDecl::OBJC_PR_weak;
} else {
assert(propertyLifetime == Qualifiers::OCL_ExplicitNone);
attr = ObjCPropertyDecl::OBJC_PR_unsafe_unretained;
}
property->setPropertyAttributes(attr);
return;
}
if (propertyLifetime == expectedLifetime) return;
property->setInvalidDecl();
S.Diag(property->getLocation(),
diag::err_arc_inconsistent_property_ownership)
<< property->getDeclName()
<< expectedLifetime
<< propertyLifetime;
}
/// \brief Check this Objective-C property against a property declared in the
/// given protocol.
static void
CheckPropertyAgainstProtocol(Sema &S, ObjCPropertyDecl *Prop,
ObjCProtocolDecl *Proto,
llvm::SmallPtrSetImpl<ObjCProtocolDecl *> &Known) {
// Have we seen this protocol before?
if (!Known.insert(Proto).second)
return;
// Look for a property with the same name.
DeclContext::lookup_result R = Proto->lookup(Prop->getDeclName());
for (unsigned I = 0, N = R.size(); I != N; ++I) {
if (ObjCPropertyDecl *ProtoProp = dyn_cast<ObjCPropertyDecl>(R[I])) {
S.DiagnosePropertyMismatch(Prop, ProtoProp, Proto->getIdentifier(), true);
return;
}
}
// Check this property against any protocols we inherit.
for (auto *P : Proto->protocols())
CheckPropertyAgainstProtocol(S, Prop, P, Known);
}
static unsigned deducePropertyOwnershipFromType(Sema &S, QualType T) {
// In GC mode, just look for the __weak qualifier.
if (S.getLangOpts().getGC() != LangOptions::NonGC) {
if (T.isObjCGCWeak()) return ObjCDeclSpec::DQ_PR_weak;
// In ARC/MRC, look for an explicit ownership qualifier.
// For some reason, this only applies to __weak.
} else if (auto ownership = T.getObjCLifetime()) {
switch (ownership) {
case Qualifiers::OCL_Weak:
return ObjCDeclSpec::DQ_PR_weak;
case Qualifiers::OCL_Strong:
return ObjCDeclSpec::DQ_PR_strong;
case Qualifiers::OCL_ExplicitNone:
return ObjCDeclSpec::DQ_PR_unsafe_unretained;
case Qualifiers::OCL_Autoreleasing:
case Qualifiers::OCL_None:
return 0;
}
llvm_unreachable("bad qualifier");
}
return 0;
}
static const unsigned OwnershipMask =
(ObjCPropertyDecl::OBJC_PR_assign |
ObjCPropertyDecl::OBJC_PR_retain |
ObjCPropertyDecl::OBJC_PR_copy |
ObjCPropertyDecl::OBJC_PR_weak |
ObjCPropertyDecl::OBJC_PR_strong |
ObjCPropertyDecl::OBJC_PR_unsafe_unretained);
static unsigned getOwnershipRule(unsigned attr) {
unsigned result = attr & OwnershipMask;
// From an ownership perspective, assign and unsafe_unretained are
// identical; make sure one also implies the other.
if (result & (ObjCPropertyDecl::OBJC_PR_assign |
ObjCPropertyDecl::OBJC_PR_unsafe_unretained)) {
result |= ObjCPropertyDecl::OBJC_PR_assign |
ObjCPropertyDecl::OBJC_PR_unsafe_unretained;
}
return result;
}
Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc,
SourceLocation LParenLoc,
FieldDeclarator &FD,
ObjCDeclSpec &ODS,
Selector GetterSel,
Selector SetterSel,
tok::ObjCKeywordKind MethodImplKind,
DeclContext *lexicalDC) {
unsigned Attributes = ODS.getPropertyAttributes();
FD.D.setObjCWeakProperty((Attributes & ObjCDeclSpec::DQ_PR_weak) != 0);
TypeSourceInfo *TSI = GetTypeForDeclarator(FD.D, S);
QualType T = TSI->getType();
if (!getOwnershipRule(Attributes)) {
Attributes |= deducePropertyOwnershipFromType(*this, T);
}
bool isReadWrite = ((Attributes & ObjCDeclSpec::DQ_PR_readwrite) ||
// default is readwrite!
!(Attributes & ObjCDeclSpec::DQ_PR_readonly));
// Proceed with constructing the ObjCPropertyDecls.
ObjCContainerDecl *ClassDecl = cast<ObjCContainerDecl>(CurContext);
ObjCPropertyDecl *Res = nullptr;
if (ObjCCategoryDecl *CDecl = dyn_cast<ObjCCategoryDecl>(ClassDecl)) {
if (CDecl->IsClassExtension()) {
Res = HandlePropertyInClassExtension(S, AtLoc, LParenLoc,
FD,
GetterSel, ODS.getGetterNameLoc(),
SetterSel, ODS.getSetterNameLoc(),
isReadWrite, Attributes,
ODS.getPropertyAttributes(),
T, TSI, MethodImplKind);
if (!Res)
return nullptr;
}
}
if (!Res) {
Res = CreatePropertyDecl(S, ClassDecl, AtLoc, LParenLoc, FD,
GetterSel, ODS.getGetterNameLoc(), SetterSel,
ODS.getSetterNameLoc(), isReadWrite, Attributes,
ODS.getPropertyAttributes(), T, TSI,
MethodImplKind);
if (lexicalDC)
Res->setLexicalDeclContext(lexicalDC);
}
// Validate the attributes on the @property.
CheckObjCPropertyAttributes(Res, AtLoc, Attributes,
(isa<ObjCInterfaceDecl>(ClassDecl) ||
isa<ObjCProtocolDecl>(ClassDecl)));
// Check consistency if the type has explicit ownership qualification.
if (Res->getType().getObjCLifetime())
checkPropertyDeclWithOwnership(*this, Res);
llvm::SmallPtrSet<ObjCProtocolDecl *, 16> KnownProtos;
if (ObjCInterfaceDecl *IFace = dyn_cast<ObjCInterfaceDecl>(ClassDecl)) {
// For a class, compare the property against a property in our superclass.
bool FoundInSuper = false;
ObjCInterfaceDecl *CurrentInterfaceDecl = IFace;
while (ObjCInterfaceDecl *Super = CurrentInterfaceDecl->getSuperClass()) {
DeclContext::lookup_result R = Super->lookup(Res->getDeclName());
for (unsigned I = 0, N = R.size(); I != N; ++I) {
if (ObjCPropertyDecl *SuperProp = dyn_cast<ObjCPropertyDecl>(R[I])) {
DiagnosePropertyMismatch(Res, SuperProp, Super->getIdentifier(), false);
FoundInSuper = true;
break;
}
}
if (FoundInSuper)
break;
else
CurrentInterfaceDecl = Super;
}
if (FoundInSuper) {
// Also compare the property against a property in our protocols.
for (auto *P : CurrentInterfaceDecl->protocols()) {
CheckPropertyAgainstProtocol(*this, Res, P, KnownProtos);
}
} else {
// Slower path: look in all protocols we referenced.
for (auto *P : IFace->all_referenced_protocols()) {
CheckPropertyAgainstProtocol(*this, Res, P, KnownProtos);
}
}
} else if (ObjCCategoryDecl *Cat = dyn_cast<ObjCCategoryDecl>(ClassDecl)) {
// We don't check if class extension. Because properties in class extension
// are meant to override some of the attributes and checking has already done
// when property in class extension is constructed.
if (!Cat->IsClassExtension())
for (auto *P : Cat->protocols())
CheckPropertyAgainstProtocol(*this, Res, P, KnownProtos);
} else {
ObjCProtocolDecl *Proto = cast<ObjCProtocolDecl>(ClassDecl);
for (auto *P : Proto->protocols())
CheckPropertyAgainstProtocol(*this, Res, P, KnownProtos);
}
ActOnDocumentableDecl(Res);
return Res;
}
static ObjCPropertyDecl::PropertyAttributeKind
makePropertyAttributesAsWritten(unsigned Attributes) {
unsigned attributesAsWritten = 0;
if (Attributes & ObjCDeclSpec::DQ_PR_readonly)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_readonly;
if (Attributes & ObjCDeclSpec::DQ_PR_readwrite)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_readwrite;
if (Attributes & ObjCDeclSpec::DQ_PR_getter)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_getter;
if (Attributes & ObjCDeclSpec::DQ_PR_setter)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_setter;
if (Attributes & ObjCDeclSpec::DQ_PR_assign)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_assign;
if (Attributes & ObjCDeclSpec::DQ_PR_retain)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_retain;
if (Attributes & ObjCDeclSpec::DQ_PR_strong)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_strong;
if (Attributes & ObjCDeclSpec::DQ_PR_weak)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_weak;
if (Attributes & ObjCDeclSpec::DQ_PR_copy)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_copy;
if (Attributes & ObjCDeclSpec::DQ_PR_unsafe_unretained)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_unsafe_unretained;
if (Attributes & ObjCDeclSpec::DQ_PR_nonatomic)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_nonatomic;
if (Attributes & ObjCDeclSpec::DQ_PR_atomic)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_atomic;
if (Attributes & ObjCDeclSpec::DQ_PR_class)
attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_class;
return (ObjCPropertyDecl::PropertyAttributeKind)attributesAsWritten;
}
static bool LocPropertyAttribute( ASTContext &Context, const char *attrName,
SourceLocation LParenLoc, SourceLocation &Loc) {
if (LParenLoc.isMacroID())
return false;
SourceManager &SM = Context.getSourceManager();
std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(LParenLoc);
// Try to load the file buffer.
bool invalidTemp = false;
StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);
if (invalidTemp)
return false;
const char *tokenBegin = file.data() + locInfo.second;
// Lex from the start of the given location.
Lexer lexer(SM.getLocForStartOfFile(locInfo.first),
Context.getLangOpts(),
file.begin(), tokenBegin, file.end());
Token Tok;
do {
lexer.LexFromRawLexer(Tok);
if (Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == attrName) {
Loc = Tok.getLocation();
return true;
}
} while (Tok.isNot(tok::r_paren));
return false;
}
/// Check for a mismatch in the atomicity of the given properties.
static void checkAtomicPropertyMismatch(Sema &S,
ObjCPropertyDecl *OldProperty,
ObjCPropertyDecl *NewProperty,
bool PropagateAtomicity) {
// If the atomicity of both matches, we're done.
bool OldIsAtomic =
(OldProperty->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_nonatomic)
== 0;
bool NewIsAtomic =
(NewProperty->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_nonatomic)
== 0;
if (OldIsAtomic == NewIsAtomic) return;
// Determine whether the given property is readonly and implicitly
// atomic.
auto isImplicitlyReadonlyAtomic = [](ObjCPropertyDecl *Property) -> bool {
// Is it readonly?
auto Attrs = Property->getPropertyAttributes();
if ((Attrs & ObjCPropertyDecl::OBJC_PR_readonly) == 0) return false;
// Is it nonatomic?
if (Attrs & ObjCPropertyDecl::OBJC_PR_nonatomic) return false;
// Was 'atomic' specified directly?
if (Property->getPropertyAttributesAsWritten() &
ObjCPropertyDecl::OBJC_PR_atomic)
return false;
return true;
};
// If we're allowed to propagate atomicity, and the new property did
// not specify atomicity at all, propagate.
const unsigned AtomicityMask =
(ObjCPropertyDecl::OBJC_PR_atomic | ObjCPropertyDecl::OBJC_PR_nonatomic);
if (PropagateAtomicity &&
((NewProperty->getPropertyAttributesAsWritten() & AtomicityMask) == 0)) {
unsigned Attrs = NewProperty->getPropertyAttributes();
Attrs = Attrs & ~AtomicityMask;
if (OldIsAtomic)
Attrs |= ObjCPropertyDecl::OBJC_PR_atomic;
else
Attrs |= ObjCPropertyDecl::OBJC_PR_nonatomic;
NewProperty->overwritePropertyAttributes(Attrs);
return;
}
// One of the properties is atomic; if it's a readonly property, and
// 'atomic' wasn't explicitly specified, we're okay.
if ((OldIsAtomic && isImplicitlyReadonlyAtomic(OldProperty)) ||
(NewIsAtomic && isImplicitlyReadonlyAtomic(NewProperty)))
return;
// Diagnose the conflict.
const IdentifierInfo *OldContextName;
auto *OldDC = OldProperty->getDeclContext();
if (auto Category = dyn_cast<ObjCCategoryDecl>(OldDC))
OldContextName = Category->getClassInterface()->getIdentifier();
else
OldContextName = cast<ObjCContainerDecl>(OldDC)->getIdentifier();
S.Diag(NewProperty->getLocation(), diag::warn_property_attribute)
<< NewProperty->getDeclName() << "atomic"
<< OldContextName;
S.Diag(OldProperty->getLocation(), diag::note_property_declare);
}
ObjCPropertyDecl *
Sema::HandlePropertyInClassExtension(Scope *S,
SourceLocation AtLoc,
SourceLocation LParenLoc,
FieldDeclarator &FD,
Selector GetterSel,
SourceLocation GetterNameLoc,
Selector SetterSel,
SourceLocation SetterNameLoc,
const bool isReadWrite,
unsigned &Attributes,
const unsigned AttributesAsWritten,
QualType T,
TypeSourceInfo *TSI,
tok::ObjCKeywordKind MethodImplKind) {
ObjCCategoryDecl *CDecl = cast<ObjCCategoryDecl>(CurContext);
// Diagnose if this property is already in continuation class.
DeclContext *DC = CurContext;
IdentifierInfo *PropertyId = FD.D.getIdentifier();
ObjCInterfaceDecl *CCPrimary = CDecl->getClassInterface();
// We need to look in the @interface to see if the @property was
// already declared.
if (!CCPrimary) {
Diag(CDecl->getLocation(), diag::err_continuation_class);
return nullptr;
}
bool isClassProperty = (AttributesAsWritten & ObjCDeclSpec::DQ_PR_class) ||
(Attributes & ObjCDeclSpec::DQ_PR_class);
// Find the property in the extended class's primary class or
// extensions.
ObjCPropertyDecl *PIDecl = CCPrimary->FindPropertyVisibleInPrimaryClass(
PropertyId, ObjCPropertyDecl::getQueryKind(isClassProperty));
// If we found a property in an extension, complain.
if (PIDecl && isa<ObjCCategoryDecl>(PIDecl->getDeclContext())) {
Diag(AtLoc, diag::err_duplicate_property);
Diag(PIDecl->getLocation(), diag::note_property_declare);
return nullptr;
}
// Check for consistency with the previous declaration, if there is one.
if (PIDecl) {
// A readonly property declared in the primary class can be refined
// by adding a readwrite property within an extension.
// Anything else is an error.
if (!(PIDecl->isReadOnly() && isReadWrite)) {
// Tailor the diagnostics for the common case where a readwrite
// property is declared both in the @interface and the continuation.
// This is a common error where the user often intended the original
// declaration to be readonly.
unsigned diag =
(Attributes & ObjCDeclSpec::DQ_PR_readwrite) &&
(PIDecl->getPropertyAttributesAsWritten() &
ObjCPropertyDecl::OBJC_PR_readwrite)
? diag::err_use_continuation_class_redeclaration_readwrite
: diag::err_use_continuation_class;
Diag(AtLoc, diag)
<< CCPrimary->getDeclName();
Diag(PIDecl->getLocation(), diag::note_property_declare);
return nullptr;
}
// Check for consistency of getters.
if (PIDecl->getGetterName() != GetterSel) {
// If the getter was written explicitly, complain.
if (AttributesAsWritten & ObjCDeclSpec::DQ_PR_getter) {
Diag(AtLoc, diag::warn_property_redecl_getter_mismatch)
<< PIDecl->getGetterName() << GetterSel;
Diag(PIDecl->getLocation(), diag::note_property_declare);
}
// Always adopt the getter from the original declaration.
GetterSel = PIDecl->getGetterName();
Attributes |= ObjCDeclSpec::DQ_PR_getter;
}
// Check consistency of ownership.
unsigned ExistingOwnership
= getOwnershipRule(PIDecl->getPropertyAttributes());
unsigned NewOwnership = getOwnershipRule(Attributes);
if (ExistingOwnership && NewOwnership != ExistingOwnership) {
// If the ownership was written explicitly, complain.
if (getOwnershipRule(AttributesAsWritten)) {
Diag(AtLoc, diag::warn_property_attr_mismatch);
Diag(PIDecl->getLocation(), diag::note_property_declare);
}
// Take the ownership from the original property.
Attributes = (Attributes & ~OwnershipMask) | ExistingOwnership;
}
// If the redeclaration is 'weak' but the original property is not,
if ((Attributes & ObjCPropertyDecl::OBJC_PR_weak) &&
!(PIDecl->getPropertyAttributesAsWritten()
& ObjCPropertyDecl::OBJC_PR_weak) &&
PIDecl->getType()->getAs<ObjCObjectPointerType>() &&
PIDecl->getType().getObjCLifetime() == Qualifiers::OCL_None) {
Diag(AtLoc, diag::warn_property_implicitly_mismatched);
Diag(PIDecl->getLocation(), diag::note_property_declare);
}
}
// Create a new ObjCPropertyDecl with the DeclContext being
// the class extension.
ObjCPropertyDecl *PDecl = CreatePropertyDecl(S, CDecl, AtLoc, LParenLoc,
FD, GetterSel, GetterNameLoc,
SetterSel, SetterNameLoc,
isReadWrite,
Attributes, AttributesAsWritten,
T, TSI, MethodImplKind, DC);
// If there was no declaration of a property with the same name in
// the primary class, we're done.
if (!PIDecl) {
ProcessPropertyDecl(PDecl);
return PDecl;
}
if (!Context.hasSameType(PIDecl->getType(), PDecl->getType())) {
bool IncompatibleObjC = false;
QualType ConvertedType;
// Relax the strict type matching for property type in continuation class.
// Allow property object type of continuation class to be different as long
// as it narrows the object type in its primary class property. Note that
// this conversion is safe only because the wider type is for a 'readonly'
// property in primary class and 'narrowed' type for a 'readwrite' property
// in continuation class.
QualType PrimaryClassPropertyT = Context.getCanonicalType(PIDecl->getType());
QualType ClassExtPropertyT = Context.getCanonicalType(PDecl->getType());
if (!isa<ObjCObjectPointerType>(PrimaryClassPropertyT) ||
!isa<ObjCObjectPointerType>(ClassExtPropertyT) ||
(!isObjCPointerConversion(ClassExtPropertyT, PrimaryClassPropertyT,
ConvertedType, IncompatibleObjC))
|| IncompatibleObjC) {
Diag(AtLoc,
diag::err_type_mismatch_continuation_class) << PDecl->getType();
Diag(PIDecl->getLocation(), diag::note_property_declare);
return nullptr;
}
}
// Check that atomicity of property in class extension matches the previous
// declaration.
checkAtomicPropertyMismatch(*this, PIDecl, PDecl, true);
// Make sure getter/setter are appropriately synthesized.
ProcessPropertyDecl(PDecl);
return PDecl;
}
ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
ObjCContainerDecl *CDecl,
SourceLocation AtLoc,
SourceLocation LParenLoc,
FieldDeclarator &FD,
Selector GetterSel,
SourceLocation GetterNameLoc,
Selector SetterSel,
SourceLocation SetterNameLoc,
const bool isReadWrite,
const unsigned Attributes,
const unsigned AttributesAsWritten,
QualType T,
TypeSourceInfo *TInfo,
tok::ObjCKeywordKind MethodImplKind,
DeclContext *lexicalDC){
IdentifierInfo *PropertyId = FD.D.getIdentifier();
// Property defaults to 'assign' if it is readwrite, unless this is ARC
// and the type is retainable.
bool isAssign;
if (Attributes & (ObjCDeclSpec::DQ_PR_assign |
ObjCDeclSpec::DQ_PR_unsafe_unretained)) {
isAssign = true;
} else if (getOwnershipRule(Attributes) || !isReadWrite) {
isAssign = false;
} else {
isAssign = (!getLangOpts().ObjCAutoRefCount ||
!T->isObjCRetainableType());
}
// Issue a warning if property is 'assign' as default and its
// object, which is gc'able conforms to NSCopying protocol
if (getLangOpts().getGC() != LangOptions::NonGC &&
isAssign && !(Attributes & ObjCDeclSpec::DQ_PR_assign)) {
if (const ObjCObjectPointerType *ObjPtrTy =
T->getAs<ObjCObjectPointerType>()) {
ObjCInterfaceDecl *IDecl = ObjPtrTy->getObjectType()->getInterface();
if (IDecl)
if (ObjCProtocolDecl* PNSCopying =
LookupProtocol(&Context.Idents.get("NSCopying"), AtLoc))
if (IDecl->ClassImplementsProtocol(PNSCopying, true))
Diag(AtLoc, diag::warn_implements_nscopying) << PropertyId;
}
}
if (T->isObjCObjectType()) {
SourceLocation StarLoc = TInfo->getTypeLoc().getLocEnd();
StarLoc = getLocForEndOfToken(StarLoc);
Diag(FD.D.getIdentifierLoc(), diag::err_statically_allocated_object)
<< FixItHint::CreateInsertion(StarLoc, "*");
T = Context.getObjCObjectPointerType(T);
SourceLocation TLoc = TInfo->getTypeLoc().getLocStart();
TInfo = Context.getTrivialTypeSourceInfo(T, TLoc);
}
DeclContext *DC = cast<DeclContext>(CDecl);
ObjCPropertyDecl *PDecl = ObjCPropertyDecl::Create(Context, DC,
FD.D.getIdentifierLoc(),
PropertyId, AtLoc,
LParenLoc, T, TInfo);
bool isClassProperty = (AttributesAsWritten & ObjCDeclSpec::DQ_PR_class) ||
(Attributes & ObjCDeclSpec::DQ_PR_class);
// Class property and instance property can have the same name.
if (ObjCPropertyDecl *prevDecl = ObjCPropertyDecl::findPropertyDecl(
DC, PropertyId, ObjCPropertyDecl::getQueryKind(isClassProperty))) {
Diag(PDecl->getLocation(), diag::err_duplicate_property);
Diag(prevDecl->getLocation(), diag::note_property_declare);
PDecl->setInvalidDecl();
}
else {
DC->addDecl(PDecl);
if (lexicalDC)
PDecl->setLexicalDeclContext(lexicalDC);
}
if (T->isArrayType() || T->isFunctionType()) {
Diag(AtLoc, diag::err_property_type) << T;
PDecl->setInvalidDecl();
}
ProcessDeclAttributes(S, PDecl, FD.D);
// Regardless of setter/getter attribute, we save the default getter/setter
// selector names in anticipation of declaration of setter/getter methods.
PDecl->setGetterName(GetterSel, GetterNameLoc);
PDecl->setSetterName(SetterSel, SetterNameLoc);
PDecl->setPropertyAttributesAsWritten(
makePropertyAttributesAsWritten(AttributesAsWritten));
if (Attributes & ObjCDeclSpec::DQ_PR_readonly)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_readonly);
if (Attributes & ObjCDeclSpec::DQ_PR_getter)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_getter);
if (Attributes & ObjCDeclSpec::DQ_PR_setter)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_setter);
if (isReadWrite)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_readwrite);
if (Attributes & ObjCDeclSpec::DQ_PR_retain)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_retain);
if (Attributes & ObjCDeclSpec::DQ_PR_strong)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
if (Attributes & ObjCDeclSpec::DQ_PR_weak)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_weak);
if (Attributes & ObjCDeclSpec::DQ_PR_copy)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_copy);
if (Attributes & ObjCDeclSpec::DQ_PR_unsafe_unretained)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_unsafe_unretained);
if (isAssign)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_assign);
// In the semantic attributes, one of nonatomic or atomic is always set.
if (Attributes & ObjCDeclSpec::DQ_PR_nonatomic)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_nonatomic);
else
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_atomic);
// 'unsafe_unretained' is alias for 'assign'.
if (Attributes & ObjCDeclSpec::DQ_PR_unsafe_unretained)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_assign);
if (isAssign)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_unsafe_unretained);
if (MethodImplKind == tok::objc_required)
PDecl->setPropertyImplementation(ObjCPropertyDecl::Required);
else if (MethodImplKind == tok::objc_optional)
PDecl->setPropertyImplementation(ObjCPropertyDecl::Optional);
if (Attributes & ObjCDeclSpec::DQ_PR_nullability)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_nullability);
if (Attributes & ObjCDeclSpec::DQ_PR_null_resettable)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_null_resettable);
if (Attributes & ObjCDeclSpec::DQ_PR_class)
PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_class);
return PDecl;
}
static void checkARCPropertyImpl(Sema &S, SourceLocation propertyImplLoc,
ObjCPropertyDecl *property,
ObjCIvarDecl *ivar) {
if (property->isInvalidDecl() || ivar->isInvalidDecl()) return;
QualType ivarType = ivar->getType();
Qualifiers::ObjCLifetime ivarLifetime = ivarType.getObjCLifetime();
// The lifetime implied by the property's attributes.
Qualifiers::ObjCLifetime propertyLifetime =
getImpliedARCOwnership(property->getPropertyAttributes(),
property->getType());
// We're fine if they match.
if (propertyLifetime == ivarLifetime) return;
// None isn't a valid lifetime for an object ivar in ARC, and
// __autoreleasing is never valid; don't diagnose twice.
if ((ivarLifetime == Qualifiers::OCL_None &&
S.getLangOpts().ObjCAutoRefCount) ||
ivarLifetime == Qualifiers::OCL_Autoreleasing)
return;
// If the ivar is private, and it's implicitly __unsafe_unretained
// becaues of its type, then pretend it was actually implicitly
// __strong. This is only sound because we're processing the
// property implementation before parsing any method bodies.
if (ivarLifetime == Qualifiers::OCL_ExplicitNone &&
propertyLifetime == Qualifiers::OCL_Strong &&
ivar->getAccessControl() == ObjCIvarDecl::Private) {
SplitQualType split = ivarType.split();
if (split.Quals.hasObjCLifetime()) {
assert(ivarType->isObjCARCImplicitlyUnretainedType());
split.Quals.setObjCLifetime(Qualifiers::OCL_Strong);
ivarType = S.Context.getQualifiedType(split);
ivar->setType(ivarType);
return;
}
}
switch (propertyLifetime) {
case Qualifiers::OCL_Strong:
S.Diag(ivar->getLocation(), diag::err_arc_strong_property_ownership)
<< property->getDeclName()
<< ivar->getDeclName()
<< ivarLifetime;
break;
case Qualifiers::OCL_Weak:
S.Diag(ivar->getLocation(), diag::err_weak_property)
<< property->getDeclName()
<< ivar->getDeclName();
break;
case Qualifiers::OCL_ExplicitNone:
S.Diag(ivar->getLocation(), diag::err_arc_assign_property_ownership)
<< property->getDeclName()
<< ivar->getDeclName()
<< ((property->getPropertyAttributesAsWritten()
& ObjCPropertyDecl::OBJC_PR_assign) != 0);
break;
case Qualifiers::OCL_Autoreleasing:
llvm_unreachable("properties cannot be autoreleasing");
case Qualifiers::OCL_None:
// Any other property should be ignored.
return;
}
S.Diag(property->getLocation(), diag::note_property_declare);
if (propertyImplLoc.isValid())
S.Diag(propertyImplLoc, diag::note_property_synthesize);
}
/// setImpliedPropertyAttributeForReadOnlyProperty -
/// This routine evaludates life-time attributes for a 'readonly'
/// property with no known lifetime of its own, using backing
/// 'ivar's attribute, if any. If no backing 'ivar', property's
/// life-time is assumed 'strong'.
static void setImpliedPropertyAttributeForReadOnlyProperty(
ObjCPropertyDecl *property, ObjCIvarDecl *ivar) {
Qualifiers::ObjCLifetime propertyLifetime =
getImpliedARCOwnership(property->getPropertyAttributes(),
property->getType());
if (propertyLifetime != Qualifiers::OCL_None)
return;
if (!ivar) {
// if no backing ivar, make property 'strong'.
property->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
return;
}
// property assumes owenership of backing ivar.
QualType ivarType = ivar->getType();
Qualifiers::ObjCLifetime ivarLifetime = ivarType.getObjCLifetime();
if (ivarLifetime == Qualifiers::OCL_Strong)
property->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
else if (ivarLifetime == Qualifiers::OCL_Weak)
property->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_weak);
}
static bool
isIncompatiblePropertyAttribute(unsigned Attr1, unsigned Attr2,
ObjCPropertyDecl::PropertyAttributeKind Kind) {
return (Attr1 & Kind) != (Attr2 & Kind);
}
static bool areIncompatiblePropertyAttributes(unsigned Attr1, unsigned Attr2,
unsigned Kinds) {
return ((Attr1 & Kinds) != 0) != ((Attr2 & Kinds) != 0);
}
/// SelectPropertyForSynthesisFromProtocols - Finds the most appropriate
/// property declaration that should be synthesised in all of the inherited
/// protocols. It also diagnoses properties declared in inherited protocols with
/// mismatched types or attributes, since any of them can be candidate for
/// synthesis.
static ObjCPropertyDecl *
SelectPropertyForSynthesisFromProtocols(Sema &S, SourceLocation AtLoc,
ObjCInterfaceDecl *ClassDecl,
ObjCPropertyDecl *Property) {
assert(isa<ObjCProtocolDecl>(Property->getDeclContext()) &&
"Expected a property from a protocol");
ObjCInterfaceDecl::ProtocolPropertySet ProtocolSet;
ObjCInterfaceDecl::PropertyDeclOrder Properties;
for (const auto *PI : ClassDecl->all_referenced_protocols()) {
if (const ObjCProtocolDecl *PDecl = PI->getDefinition())
PDecl->collectInheritedProtocolProperties(Property, ProtocolSet,
Properties);
}
if (ObjCInterfaceDecl *SDecl = ClassDecl->getSuperClass()) {
while (SDecl) {
for (const auto *PI : SDecl->all_referenced_protocols()) {
if (const ObjCProtocolDecl *PDecl = PI->getDefinition())
PDecl->collectInheritedProtocolProperties(Property, ProtocolSet,
Properties);
}
SDecl = SDecl->getSuperClass();
}
}
if (Properties.empty())
return Property;
ObjCPropertyDecl *OriginalProperty = Property;
size_t SelectedIndex = 0;
for (const auto &Prop : llvm::enumerate(Properties)) {
// Select the 'readwrite' property if such property exists.
if (Property->isReadOnly() && !Prop.value()->isReadOnly()) {
Property = Prop.value();
SelectedIndex = Prop.index();
}
}
if (Property != OriginalProperty) {
// Check that the old property is compatible with the new one.
Properties[SelectedIndex] = OriginalProperty;
}
QualType RHSType = S.Context.getCanonicalType(Property->getType());
- unsigned OriginalAttributes = Property->getPropertyAttributes();
+ unsigned OriginalAttributes = Property->getPropertyAttributesAsWritten();
enum MismatchKind {
IncompatibleType = 0,
HasNoExpectedAttribute,
HasUnexpectedAttribute,
DifferentGetter,
DifferentSetter
};
// Represents a property from another protocol that conflicts with the
// selected declaration.
struct MismatchingProperty {
const ObjCPropertyDecl *Prop;
MismatchKind Kind;
StringRef AttributeName;
};
SmallVector<MismatchingProperty, 4> Mismatches;
for (ObjCPropertyDecl *Prop : Properties) {
// Verify the property attributes.
- unsigned Attr = Prop->getPropertyAttributes();
+ unsigned Attr = Prop->getPropertyAttributesAsWritten();
if (Attr != OriginalAttributes) {
auto Diag = [&](bool OriginalHasAttribute, StringRef AttributeName) {
MismatchKind Kind = OriginalHasAttribute ? HasNoExpectedAttribute
: HasUnexpectedAttribute;
Mismatches.push_back({Prop, Kind, AttributeName});
};
if (isIncompatiblePropertyAttribute(OriginalAttributes, Attr,
ObjCPropertyDecl::OBJC_PR_copy)) {
Diag(OriginalAttributes & ObjCPropertyDecl::OBJC_PR_copy, "copy");
continue;
}
if (areIncompatiblePropertyAttributes(
OriginalAttributes, Attr, ObjCPropertyDecl::OBJC_PR_retain |
ObjCPropertyDecl::OBJC_PR_strong)) {
Diag(OriginalAttributes & (ObjCPropertyDecl::OBJC_PR_retain |
ObjCPropertyDecl::OBJC_PR_strong),
"retain (or strong)");
continue;
}
if (isIncompatiblePropertyAttribute(OriginalAttributes, Attr,
ObjCPropertyDecl::OBJC_PR_atomic)) {
Diag(OriginalAttributes & ObjCPropertyDecl::OBJC_PR_atomic, "atomic");
continue;
}
}
if (Property->getGetterName() != Prop->getGetterName()) {
Mismatches.push_back({Prop, DifferentGetter, ""});
continue;
}
if (!Property->isReadOnly() && !Prop->isReadOnly() &&
Property->getSetterName() != Prop->getSetterName()) {
Mismatches.push_back({Prop, DifferentSetter, ""});
continue;
}
QualType LHSType = S.Context.getCanonicalType(Prop->getType());
if (!S.Context.propertyTypesAreCompatible(LHSType, RHSType)) {
bool IncompatibleObjC = false;
QualType ConvertedType;
if (!S.isObjCPointerConversion(RHSType, LHSType, ConvertedType, IncompatibleObjC)
|| IncompatibleObjC) {
Mismatches.push_back({Prop, IncompatibleType, ""});
continue;
}
}
}
if (Mismatches.empty())
return Property;
// Diagnose incompability.
{
bool HasIncompatibleAttributes = false;
for (const auto &Note : Mismatches)
HasIncompatibleAttributes =
Note.Kind != IncompatibleType ? true : HasIncompatibleAttributes;
// Promote the warning to an error if there are incompatible attributes or
// incompatible types together with readwrite/readonly incompatibility.
auto Diag = S.Diag(Property->getLocation(),
Property != OriginalProperty || HasIncompatibleAttributes
? diag::err_protocol_property_mismatch
: diag::warn_protocol_property_mismatch);
Diag << Mismatches[0].Kind;
switch (Mismatches[0].Kind) {
case IncompatibleType:
Diag << Property->getType();
break;
case HasNoExpectedAttribute:
case HasUnexpectedAttribute:
Diag << Mismatches[0].AttributeName;
break;
case DifferentGetter:
Diag << Property->getGetterName();
break;
case DifferentSetter:
Diag << Property->getSetterName();
break;
}
}
for (const auto &Note : Mismatches) {
auto Diag =
S.Diag(Note.Prop->getLocation(), diag::note_protocol_property_declare)
<< Note.Kind;
switch (Note.Kind) {
case IncompatibleType:
Diag << Note.Prop->getType();
break;
case HasNoExpectedAttribute:
case HasUnexpectedAttribute:
Diag << Note.AttributeName;
break;
case DifferentGetter:
Diag << Note.Prop->getGetterName();
break;
case DifferentSetter:
Diag << Note.Prop->getSetterName();
break;
}
}
if (AtLoc.isValid())
S.Diag(AtLoc, diag::note_property_synthesize);
return Property;
}
/// Determine whether any storage attributes were written on the property.
static bool hasWrittenStorageAttribute(ObjCPropertyDecl *Prop,
ObjCPropertyQueryKind QueryKind) {
if (Prop->getPropertyAttributesAsWritten() & OwnershipMask) return true;
// If this is a readwrite property in a class extension that refines
// a readonly property in the original class definition, check it as
// well.
// If it's a readonly property, we're not interested.
if (Prop->isReadOnly()) return false;
// Is it declared in an extension?
auto Category = dyn_cast<ObjCCategoryDecl>(Prop->getDeclContext());
if (!Category || !Category->IsClassExtension()) return false;
// Find the corresponding property in the primary class definition.
auto OrigClass = Category->getClassInterface();
for (auto Found : OrigClass->lookup(Prop->getDeclName())) {
if (ObjCPropertyDecl *OrigProp = dyn_cast<ObjCPropertyDecl>(Found))
return OrigProp->getPropertyAttributesAsWritten() & OwnershipMask;
}
// Look through all of the protocols.
for (const auto *Proto : OrigClass->all_referenced_protocols()) {
if (ObjCPropertyDecl *OrigProp = Proto->FindPropertyDeclaration(
Prop->getIdentifier(), QueryKind))
return OrigProp->getPropertyAttributesAsWritten() & OwnershipMask;
}
return false;
}
/// ActOnPropertyImplDecl - This routine performs semantic checks and
/// builds the AST node for a property implementation declaration; declared
/// as \@synthesize or \@dynamic.
///
Decl *Sema::ActOnPropertyImplDecl(Scope *S,
SourceLocation AtLoc,
SourceLocation PropertyLoc,
bool Synthesize,
IdentifierInfo *PropertyId,
IdentifierInfo *PropertyIvar,
SourceLocation PropertyIvarLoc,
ObjCPropertyQueryKind QueryKind) {
ObjCContainerDecl *ClassImpDecl =
dyn_cast<ObjCContainerDecl>(CurContext);
// Make sure we have a context for the property implementation declaration.
if (!ClassImpDecl) {
Diag(AtLoc, diag::err_missing_property_context);
return nullptr;
}
if (PropertyIvarLoc.isInvalid())
PropertyIvarLoc = PropertyLoc;
SourceLocation PropertyDiagLoc = PropertyLoc;
if (PropertyDiagLoc.isInvalid())
PropertyDiagLoc = ClassImpDecl->getLocStart();
ObjCPropertyDecl *property = nullptr;
ObjCInterfaceDecl *IDecl = nullptr;
// Find the class or category class where this property must have
// a declaration.
ObjCImplementationDecl *IC = nullptr;
ObjCCategoryImplDecl *CatImplClass = nullptr;
if ((IC = dyn_cast<ObjCImplementationDecl>(ClassImpDecl))) {
IDecl = IC->getClassInterface();
// We always synthesize an interface for an implementation
// without an interface decl. So, IDecl is always non-zero.
assert(IDecl &&
"ActOnPropertyImplDecl - @implementation without @interface");
// Look for this property declaration in the @implementation's @interface
property = IDecl->FindPropertyDeclaration(PropertyId, QueryKind);
if (!property) {
Diag(PropertyLoc, diag::err_bad_property_decl) << IDecl->getDeclName();
return nullptr;
}
if (property->isClassProperty() && Synthesize) {
Diag(PropertyLoc, diag::err_synthesize_on_class_property) << PropertyId;
return nullptr;
}
unsigned PIkind = property->getPropertyAttributesAsWritten();
if ((PIkind & (ObjCPropertyDecl::OBJC_PR_atomic |
ObjCPropertyDecl::OBJC_PR_nonatomic) ) == 0) {
if (AtLoc.isValid())
Diag(AtLoc, diag::warn_implicit_atomic_property);
else
Diag(IC->getLocation(), diag::warn_auto_implicit_atomic_property);
Diag(property->getLocation(), diag::note_property_declare);
}
if (const ObjCCategoryDecl *CD =
dyn_cast<ObjCCategoryDecl>(property->getDeclContext())) {
if (!CD->IsClassExtension()) {
Diag(PropertyLoc, diag::err_category_property) << CD->getDeclName();
Diag(property->getLocation(), diag::note_property_declare);
return nullptr;
}
}
if (Synthesize&&
(PIkind & ObjCPropertyDecl::OBJC_PR_readonly) &&
property->hasAttr<IBOutletAttr>() &&
!AtLoc.isValid()) {
bool ReadWriteProperty = false;
// Search into the class extensions and see if 'readonly property is
// redeclared 'readwrite', then no warning is to be issued.
for (auto *Ext : IDecl->known_extensions()) {
DeclContext::lookup_result R = Ext->lookup(property->getDeclName());
if (!R.empty())
if (ObjCPropertyDecl *ExtProp = dyn_cast<ObjCPropertyDecl>(R[0])) {
PIkind = ExtProp->getPropertyAttributesAsWritten();
if (PIkind & ObjCPropertyDecl::OBJC_PR_readwrite) {
ReadWriteProperty = true;
break;
}
}
}
if (!ReadWriteProperty) {
Diag(property->getLocation(), diag::warn_auto_readonly_iboutlet_property)
<< property;
SourceLocation readonlyLoc;
if (LocPropertyAttribute(Context, "readonly",
property->getLParenLoc(), readonlyLoc)) {
SourceLocation endLoc =
readonlyLoc.getLocWithOffset(strlen("readonly")-1);
SourceRange ReadonlySourceRange(readonlyLoc, endLoc);
Diag(property->getLocation(),
diag::note_auto_readonly_iboutlet_fixup_suggest) <<
FixItHint::CreateReplacement(ReadonlySourceRange, "readwrite");
}
}
}
if (Synthesize && isa<ObjCProtocolDecl>(property->getDeclContext()))
property = SelectPropertyForSynthesisFromProtocols(*this, AtLoc, IDecl,
property);
} else if ((CatImplClass = dyn_cast<ObjCCategoryImplDecl>(ClassImpDecl))) {
if (Synthesize) {
Diag(AtLoc, diag::err_synthesize_category_decl);
return nullptr;
}
IDecl = CatImplClass->getClassInterface();
if (!IDecl) {
Diag(AtLoc, diag::err_missing_property_interface);
return nullptr;
}
ObjCCategoryDecl *Category =
IDecl->FindCategoryDeclaration(CatImplClass->getIdentifier());
// If category for this implementation not found, it is an error which
// has already been reported eralier.
if (!Category)
return nullptr;
// Look for this property declaration in @implementation's category
property = Category->FindPropertyDeclaration(PropertyId, QueryKind);
if (!property) {
Diag(PropertyLoc, diag::err_bad_category_property_decl)
<< Category->getDeclName();
return nullptr;
}
} else {
Diag(AtLoc, diag::err_bad_property_context);
return nullptr;
}
ObjCIvarDecl *Ivar = nullptr;
bool CompleteTypeErr = false;
bool compat = true;
// Check that we have a valid, previously declared ivar for @synthesize
if (Synthesize) {
// @synthesize
if (!PropertyIvar)
PropertyIvar = PropertyId;
// Check that this is a previously declared 'ivar' in 'IDecl' interface
ObjCInterfaceDecl *ClassDeclared;
Ivar = IDecl->lookupInstanceVariable(PropertyIvar, ClassDeclared);
QualType PropType = property->getType();
QualType PropertyIvarType = PropType.getNonReferenceType();
if (RequireCompleteType(PropertyDiagLoc, PropertyIvarType,
diag::err_incomplete_synthesized_property,
property->getDeclName())) {
Diag(property->getLocation(), diag::note_property_declare);
CompleteTypeErr = true;
}
if (getLangOpts().ObjCAutoRefCount &&
(property->getPropertyAttributesAsWritten() &
ObjCPropertyDecl::OBJC_PR_readonly) &&
PropertyIvarType->isObjCRetainableType()) {
setImpliedPropertyAttributeForReadOnlyProperty(property, Ivar);
}
ObjCPropertyDecl::PropertyAttributeKind kind
= property->getPropertyAttributes();
bool isARCWeak = false;
if (kind & ObjCPropertyDecl::OBJC_PR_weak) {
// Add GC __weak to the ivar type if the property is weak.
if (getLangOpts().getGC() != LangOptions::NonGC) {
assert(!getLangOpts().ObjCAutoRefCount);
if (PropertyIvarType.isObjCGCStrong()) {
Diag(PropertyDiagLoc, diag::err_gc_weak_property_strong_type);
Diag(property->getLocation(), diag::note_property_declare);
} else {
PropertyIvarType =
Context.getObjCGCQualType(PropertyIvarType, Qualifiers::Weak);
}
// Otherwise, check whether ARC __weak is enabled and works with
// the property type.
} else {
if (!getLangOpts().ObjCWeak) {
// Only complain here when synthesizing an ivar.
if (!Ivar) {
Diag(PropertyDiagLoc,
getLangOpts().ObjCWeakRuntime
? diag::err_synthesizing_arc_weak_property_disabled
: diag::err_synthesizing_arc_weak_property_no_runtime);
Diag(property->getLocation(), diag::note_property_declare);
}
CompleteTypeErr = true; // suppress later diagnostics about the ivar
} else {
isARCWeak = true;
if (const ObjCObjectPointerType *ObjT =
PropertyIvarType->getAs<ObjCObjectPointerType>()) {
const ObjCInterfaceDecl *ObjI = ObjT->getInterfaceDecl();
if (ObjI && ObjI->isArcWeakrefUnavailable()) {
Diag(property->getLocation(),
diag::err_arc_weak_unavailable_property)
<< PropertyIvarType;
Diag(ClassImpDecl->getLocation(), diag::note_implemented_by_class)
<< ClassImpDecl->getName();
}
}
}
}
}
if (AtLoc.isInvalid()) {
// Check when default synthesizing a property that there is
// an ivar matching property name and issue warning; since this
// is the most common case of not using an ivar used for backing
// property in non-default synthesis case.
ObjCInterfaceDecl *ClassDeclared=nullptr;
ObjCIvarDecl *originalIvar =
IDecl->lookupInstanceVariable(property->getIdentifier(),
ClassDeclared);
if (originalIvar) {
Diag(PropertyDiagLoc,
diag::warn_autosynthesis_property_ivar_match)
<< PropertyId << (Ivar == nullptr) << PropertyIvar
<< originalIvar->getIdentifier();
Diag(property->getLocation(), diag::note_property_declare);
Diag(originalIvar->getLocation(), diag::note_ivar_decl);
}
}
if (!Ivar) {
// In ARC, give the ivar a lifetime qualifier based on the
// property attributes.
if ((getLangOpts().ObjCAutoRefCount || isARCWeak) &&
!PropertyIvarType.getObjCLifetime() &&
PropertyIvarType->isObjCRetainableType()) {
// It's an error if we have to do this and the user didn't
// explicitly write an ownership attribute on the property.
if (!hasWrittenStorageAttribute(property, QueryKind) &&
!(kind & ObjCPropertyDecl::OBJC_PR_strong)) {
Diag(PropertyDiagLoc,
diag::err_arc_objc_property_default_assign_on_object);
Diag(property->getLocation(), diag::note_property_declare);
} else {
Qualifiers::ObjCLifetime lifetime =
getImpliedARCOwnership(kind, PropertyIvarType);
assert(lifetime && "no lifetime for property?");
Qualifiers qs;
qs.addObjCLifetime(lifetime);
PropertyIvarType = Context.getQualifiedType(PropertyIvarType, qs);
}
}
Ivar = ObjCIvarDecl::Create(Context, ClassImpDecl,
PropertyIvarLoc,PropertyIvarLoc, PropertyIvar,
PropertyIvarType, /*Dinfo=*/nullptr,
ObjCIvarDecl::Private,
(Expr *)nullptr, true);
if (RequireNonAbstractType(PropertyIvarLoc,
PropertyIvarType,
diag::err_abstract_type_in_decl,
AbstractSynthesizedIvarType)) {
Diag(property->getLocation(), diag::note_property_declare);
// An abstract type is as bad as an incomplete type.
CompleteTypeErr = true;
}
if (CompleteTypeErr)
Ivar->setInvalidDecl();
ClassImpDecl->addDecl(Ivar);
IDecl->makeDeclVisibleInContext(Ivar);
if (getLangOpts().ObjCRuntime.isFragile())
Diag(PropertyDiagLoc, diag::err_missing_property_ivar_decl)
<< PropertyId;
// Note! I deliberately want it to fall thru so, we have a
// a property implementation and to avoid future warnings.
} else if (getLangOpts().ObjCRuntime.isNonFragile() &&
!declaresSameEntity(ClassDeclared, IDecl)) {
Diag(PropertyDiagLoc, diag::err_ivar_in_superclass_use)
<< property->getDeclName() << Ivar->getDeclName()
<< ClassDeclared->getDeclName();
Diag(Ivar->getLocation(), diag::note_previous_access_declaration)
<< Ivar << Ivar->getName();
// Note! I deliberately want it to fall thru so more errors are caught.
}
property->setPropertyIvarDecl(Ivar);
QualType IvarType = Context.getCanonicalType(Ivar->getType());
// Check that type of property and its ivar are type compatible.
if (!Context.hasSameType(PropertyIvarType, IvarType)) {
if (isa<ObjCObjectPointerType>(PropertyIvarType)
&& isa<ObjCObjectPointerType>(IvarType))
compat =
Context.canAssignObjCInterfaces(
PropertyIvarType->getAs<ObjCObjectPointerType>(),
IvarType->getAs<ObjCObjectPointerType>());
else {
compat = (CheckAssignmentConstraints(PropertyIvarLoc, PropertyIvarType,
IvarType)
== Compatible);
}
if (!compat) {
Diag(PropertyDiagLoc, diag::err_property_ivar_type)
<< property->getDeclName() << PropType
<< Ivar->getDeclName() << IvarType;
Diag(Ivar->getLocation(), diag::note_ivar_decl);
// Note! I deliberately want it to fall thru so, we have a
// a property implementation and to avoid future warnings.
}
else {
// FIXME! Rules for properties are somewhat different that those
// for assignments. Use a new routine to consolidate all cases;
// specifically for property redeclarations as well as for ivars.
QualType lhsType =Context.getCanonicalType(PropertyIvarType).getUnqualifiedType();
QualType rhsType =Context.getCanonicalType(IvarType).getUnqualifiedType();
if (lhsType != rhsType &&
lhsType->isArithmeticType()) {
Diag(PropertyDiagLoc, diag::err_property_ivar_type)
<< property->getDeclName() << PropType
<< Ivar->getDeclName() << IvarType;
Diag(Ivar->getLocation(), diag::note_ivar_decl);
// Fall thru - see previous comment
}
}
// __weak is explicit. So it works on Canonical type.
if ((PropType.isObjCGCWeak() && !IvarType.isObjCGCWeak() &&
getLangOpts().getGC() != LangOptions::NonGC)) {
Diag(PropertyDiagLoc, diag::err_weak_property)
<< property->getDeclName() << Ivar->getDeclName();
Diag(Ivar->getLocation(), diag::note_ivar_decl);
// Fall thru - see previous comment
}
// Fall thru - see previous comment
if ((property->getType()->isObjCObjectPointerType() ||
PropType.isObjCGCStrong()) && IvarType.isObjCGCWeak() &&
getLangOpts().getGC() != LangOptions::NonGC) {
Diag(PropertyDiagLoc, diag::err_strong_property)
<< property->getDeclName() << Ivar->getDeclName();
// Fall thru - see previous comment
}
}
if (getLangOpts().ObjCAutoRefCount || isARCWeak ||
Ivar->getType().getObjCLifetime())
checkARCPropertyImpl(*this, PropertyLoc, property, Ivar);
} else if (PropertyIvar)
// @dynamic
Diag(PropertyDiagLoc, diag::err_dynamic_property_ivar_decl);
assert (property && "ActOnPropertyImplDecl - property declaration missing");
ObjCPropertyImplDecl *PIDecl =
ObjCPropertyImplDecl::Create(Context, CurContext, AtLoc, PropertyLoc,
property,
(Synthesize ?
ObjCPropertyImplDecl::Synthesize
: ObjCPropertyImplDecl::Dynamic),
Ivar, PropertyIvarLoc);
if (CompleteTypeErr || !compat)
PIDecl->setInvalidDecl();
if (ObjCMethodDecl *getterMethod = property->getGetterMethodDecl()) {
getterMethod->createImplicitParams(Context, IDecl);
if (getLangOpts().CPlusPlus && Synthesize && !CompleteTypeErr &&
Ivar->getType()->isRecordType()) {
// For Objective-C++, need to synthesize the AST for the IVAR object to be
// returned by the getter as it must conform to C++'s copy-return rules.
// FIXME. Eventually we want to do this for Objective-C as well.
SynthesizedFunctionScope Scope(*this, getterMethod);
ImplicitParamDecl *SelfDecl = getterMethod->getSelfDecl();
DeclRefExpr *SelfExpr =
new (Context) DeclRefExpr(SelfDecl, false, SelfDecl->getType(),
VK_LValue, PropertyDiagLoc);
MarkDeclRefReferenced(SelfExpr);
Expr *LoadSelfExpr =
ImplicitCastExpr::Create(Context, SelfDecl->getType(),
CK_LValueToRValue, SelfExpr, nullptr,
VK_RValue);
Expr *IvarRefExpr =
new (Context) ObjCIvarRefExpr(Ivar,
Ivar->getUsageType(SelfDecl->getType()),
PropertyDiagLoc,
Ivar->getLocation(),
LoadSelfExpr, true, true);
ExprResult Res = PerformCopyInitialization(
InitializedEntity::InitializeResult(PropertyDiagLoc,
getterMethod->getReturnType(),
/*NRVO=*/false),
PropertyDiagLoc, IvarRefExpr);
if (!Res.isInvalid()) {
Expr *ResExpr = Res.getAs<Expr>();
if (ResExpr)
ResExpr = MaybeCreateExprWithCleanups(ResExpr);
PIDecl->setGetterCXXConstructor(ResExpr);
}
}
if (property->hasAttr<NSReturnsNotRetainedAttr>() &&
!getterMethod->hasAttr<NSReturnsNotRetainedAttr>()) {
Diag(getterMethod->getLocation(),
diag::warn_property_getter_owning_mismatch);
Diag(property->getLocation(), diag::note_property_declare);
}
if (getLangOpts().ObjCAutoRefCount && Synthesize)
switch (getterMethod->getMethodFamily()) {
case OMF_retain:
case OMF_retainCount:
case OMF_release:
case OMF_autorelease:
Diag(getterMethod->getLocation(), diag::err_arc_illegal_method_def)
<< 1 << getterMethod->getSelector();
break;
default:
break;
}
}
if (ObjCMethodDecl *setterMethod = property->getSetterMethodDecl()) {
setterMethod->createImplicitParams(Context, IDecl);
if (getLangOpts().CPlusPlus && Synthesize && !CompleteTypeErr &&
Ivar->getType()->isRecordType()) {
// FIXME. Eventually we want to do this for Objective-C as well.
SynthesizedFunctionScope Scope(*this, setterMethod);
ImplicitParamDecl *SelfDecl = setterMethod->getSelfDecl();
DeclRefExpr *SelfExpr =
new (Context) DeclRefExpr(SelfDecl, false, SelfDecl->getType(),
VK_LValue, PropertyDiagLoc);
MarkDeclRefReferenced(SelfExpr);
Expr *LoadSelfExpr =
ImplicitCastExpr::Create(Context, SelfDecl->getType(),
CK_LValueToRValue, SelfExpr, nullptr,
VK_RValue);
Expr *lhs =
new (Context) ObjCIvarRefExpr(Ivar,
Ivar->getUsageType(SelfDecl->getType()),
PropertyDiagLoc,
Ivar->getLocation(),
LoadSelfExpr, true, true);
ObjCMethodDecl::param_iterator P = setterMethod->param_begin();
ParmVarDecl *Param = (*P);
QualType T = Param->getType().getNonReferenceType();
DeclRefExpr *rhs = new (Context) DeclRefExpr(Param, false, T,
VK_LValue, PropertyDiagLoc);
MarkDeclRefReferenced(rhs);
ExprResult Res = BuildBinOp(S, PropertyDiagLoc,
BO_Assign, lhs, rhs);
if (property->getPropertyAttributes() &
ObjCPropertyDecl::OBJC_PR_atomic) {
Expr *callExpr = Res.getAs<Expr>();
if (const CXXOperatorCallExpr *CXXCE =
dyn_cast_or_null<CXXOperatorCallExpr>(callExpr))
if (const FunctionDecl *FuncDecl = CXXCE->getDirectCallee())
if (!FuncDecl->isTrivial())
if (property->getType()->isReferenceType()) {
Diag(PropertyDiagLoc,
diag::err_atomic_property_nontrivial_assign_op)
<< property->getType();
Diag(FuncDecl->getLocStart(),
diag::note_callee_decl) << FuncDecl;
}
}
PIDecl->setSetterCXXAssignment(Res.getAs<Expr>());
}
}
if (IC) {
if (Synthesize)
if (ObjCPropertyImplDecl *PPIDecl =
IC->FindPropertyImplIvarDecl(PropertyIvar)) {
Diag(PropertyLoc, diag::err_duplicate_ivar_use)
<< PropertyId << PPIDecl->getPropertyDecl()->getIdentifier()
<< PropertyIvar;
Diag(PPIDecl->getLocation(), diag::note_previous_use);
}
if (ObjCPropertyImplDecl *PPIDecl
= IC->FindPropertyImplDecl(PropertyId, QueryKind)) {
Diag(PropertyLoc, diag::err_property_implemented) << PropertyId;
Diag(PPIDecl->getLocation(), diag::note_previous_declaration);
return nullptr;
}
IC->addPropertyImplementation(PIDecl);
if (getLangOpts().ObjCDefaultSynthProperties &&
getLangOpts().ObjCRuntime.isNonFragile() &&
!IDecl->isObjCRequiresPropertyDefs()) {
// Diagnose if an ivar was lazily synthesdized due to a previous
// use and if 1) property is @dynamic or 2) property is synthesized
// but it requires an ivar of different name.
ObjCInterfaceDecl *ClassDeclared=nullptr;
ObjCIvarDecl *Ivar = nullptr;
if (!Synthesize)
Ivar = IDecl->lookupInstanceVariable(PropertyId, ClassDeclared);
else {
if (PropertyIvar && PropertyIvar != PropertyId)
Ivar = IDecl->lookupInstanceVariable(PropertyId, ClassDeclared);
}
// Issue diagnostics only if Ivar belongs to current class.
if (Ivar && Ivar->getSynthesize() &&
declaresSameEntity(IC->getClassInterface(), ClassDeclared)) {
Diag(Ivar->getLocation(), diag::err_undeclared_var_use)
<< PropertyId;
Ivar->setInvalidDecl();
}
}
} else {
if (Synthesize)
if (ObjCPropertyImplDecl *PPIDecl =
CatImplClass->FindPropertyImplIvarDecl(PropertyIvar)) {
Diag(PropertyDiagLoc, diag::err_duplicate_ivar_use)
<< PropertyId << PPIDecl->getPropertyDecl()->getIdentifier()
<< PropertyIvar;
Diag(PPIDecl->getLocation(), diag::note_previous_use);
}
if (ObjCPropertyImplDecl *PPIDecl =
CatImplClass->FindPropertyImplDecl(PropertyId, QueryKind)) {
Diag(PropertyDiagLoc, diag::err_property_implemented) << PropertyId;
Diag(PPIDecl->getLocation(), diag::note_previous_declaration);
return nullptr;
}
CatImplClass->addPropertyImplementation(PIDecl);
}
return PIDecl;
}
//===----------------------------------------------------------------------===//
// Helper methods.
//===----------------------------------------------------------------------===//
/// DiagnosePropertyMismatch - Compares two properties for their
/// attributes and types and warns on a variety of inconsistencies.
///
void
Sema::DiagnosePropertyMismatch(ObjCPropertyDecl *Property,
ObjCPropertyDecl *SuperProperty,
const IdentifierInfo *inheritedName,
bool OverridingProtocolProperty) {
ObjCPropertyDecl::PropertyAttributeKind CAttr =
Property->getPropertyAttributes();
ObjCPropertyDecl::PropertyAttributeKind SAttr =
SuperProperty->getPropertyAttributes();
// We allow readonly properties without an explicit ownership
// (assign/unsafe_unretained/weak/retain/strong/copy) in super class
// to be overridden by a property with any explicit ownership in the subclass.
if (!OverridingProtocolProperty &&
!getOwnershipRule(SAttr) && getOwnershipRule(CAttr))
;
else {
if ((CAttr & ObjCPropertyDecl::OBJC_PR_readonly)
&& (SAttr & ObjCPropertyDecl::OBJC_PR_readwrite))
Diag(Property->getLocation(), diag::warn_readonly_property)
<< Property->getDeclName() << inheritedName;
if ((CAttr & ObjCPropertyDecl::OBJC_PR_copy)
!= (SAttr & ObjCPropertyDecl::OBJC_PR_copy))
Diag(Property->getLocation(), diag::warn_property_attribute)
<< Property->getDeclName() << "copy" << inheritedName;
else if (!(SAttr & ObjCPropertyDecl::OBJC_PR_readonly)){
unsigned CAttrRetain =
(CAttr &
(ObjCPropertyDecl::OBJC_PR_retain | ObjCPropertyDecl::OBJC_PR_strong));
unsigned SAttrRetain =
(SAttr &
(ObjCPropertyDecl::OBJC_PR_retain | ObjCPropertyDecl::OBJC_PR_strong));
bool CStrong = (CAttrRetain != 0);
bool SStrong = (SAttrRetain != 0);
if (CStrong != SStrong)
Diag(Property->getLocation(), diag::warn_property_attribute)
<< Property->getDeclName() << "retain (or strong)" << inheritedName;
}
}
// Check for nonatomic; note that nonatomic is effectively
// meaningless for readonly properties, so don't diagnose if the
// atomic property is 'readonly'.
checkAtomicPropertyMismatch(*this, SuperProperty, Property, false);
if (Property->getSetterName() != SuperProperty->getSetterName()) {
Diag(Property->getLocation(), diag::warn_property_attribute)
<< Property->getDeclName() << "setter" << inheritedName;
Diag(SuperProperty->getLocation(), diag::note_property_declare);
}
if (Property->getGetterName() != SuperProperty->getGetterName()) {
Diag(Property->getLocation(), diag::warn_property_attribute)
<< Property->getDeclName() << "getter" << inheritedName;
Diag(SuperProperty->getLocation(), diag::note_property_declare);
}
QualType LHSType =
Context.getCanonicalType(SuperProperty->getType());
QualType RHSType =
Context.getCanonicalType(Property->getType());
if (!Context.propertyTypesAreCompatible(LHSType, RHSType)) {
// Do cases not handled in above.
// FIXME. For future support of covariant property types, revisit this.
bool IncompatibleObjC = false;
QualType ConvertedType;
if (!isObjCPointerConversion(RHSType, LHSType,
ConvertedType, IncompatibleObjC) ||
IncompatibleObjC) {
Diag(Property->getLocation(), diag::warn_property_types_are_incompatible)
<< Property->getType() << SuperProperty->getType() << inheritedName;
Diag(SuperProperty->getLocation(), diag::note_property_declare);
}
}
}
bool Sema::DiagnosePropertyAccessorMismatch(ObjCPropertyDecl *property,
ObjCMethodDecl *GetterMethod,
SourceLocation Loc) {
if (!GetterMethod)
return false;
QualType GetterType = GetterMethod->getReturnType().getNonReferenceType();
QualType PropertyRValueType =
property->getType().getNonReferenceType().getAtomicUnqualifiedType();
bool compat = Context.hasSameType(PropertyRValueType, GetterType);
if (!compat) {
const ObjCObjectPointerType *propertyObjCPtr = nullptr;
const ObjCObjectPointerType *getterObjCPtr = nullptr;
if ((propertyObjCPtr =
PropertyRValueType->getAs<ObjCObjectPointerType>()) &&
(getterObjCPtr = GetterType->getAs<ObjCObjectPointerType>()))
compat = Context.canAssignObjCInterfaces(getterObjCPtr, propertyObjCPtr);
else if (CheckAssignmentConstraints(Loc, GetterType, PropertyRValueType)
!= Compatible) {
Diag(Loc, diag::err_property_accessor_type)
<< property->getDeclName() << PropertyRValueType
<< GetterMethod->getSelector() << GetterType;
Diag(GetterMethod->getLocation(), diag::note_declared_at);
return true;
} else {
compat = true;
QualType lhsType = Context.getCanonicalType(PropertyRValueType);
QualType rhsType =Context.getCanonicalType(GetterType).getUnqualifiedType();
if (lhsType != rhsType && lhsType->isArithmeticType())
compat = false;
}
}
if (!compat) {
Diag(Loc, diag::warn_accessor_property_type_mismatch)
<< property->getDeclName()
<< GetterMethod->getSelector();
Diag(GetterMethod->getLocation(), diag::note_declared_at);
return true;
}
return false;
}
/// CollectImmediateProperties - This routine collects all properties in
/// the class and its conforming protocols; but not those in its super class.
static void
CollectImmediateProperties(ObjCContainerDecl *CDecl,
ObjCContainerDecl::PropertyMap &PropMap,
ObjCContainerDecl::PropertyMap &SuperPropMap,
bool CollectClassPropsOnly = false,
bool IncludeProtocols = true) {
if (ObjCInterfaceDecl *IDecl = dyn_cast<ObjCInterfaceDecl>(CDecl)) {
for (auto *Prop : IDecl->properties()) {
if (CollectClassPropsOnly && !Prop->isClassProperty())
continue;
PropMap[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] =
Prop;
}
// Collect the properties from visible extensions.
for (auto *Ext : IDecl->visible_extensions())
CollectImmediateProperties(Ext, PropMap, SuperPropMap,
CollectClassPropsOnly, IncludeProtocols);
if (IncludeProtocols) {
// Scan through class's protocols.
for (auto *PI : IDecl->all_referenced_protocols())
CollectImmediateProperties(PI, PropMap, SuperPropMap,
CollectClassPropsOnly);
}
}
if (ObjCCategoryDecl *CATDecl = dyn_cast<ObjCCategoryDecl>(CDecl)) {
for (auto *Prop : CATDecl->properties()) {
if (CollectClassPropsOnly && !Prop->isClassProperty())
continue;
PropMap[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] =
Prop;
}
if (IncludeProtocols) {
// Scan through class's protocols.
for (auto *PI : CATDecl->protocols())
CollectImmediateProperties(PI, PropMap, SuperPropMap,
CollectClassPropsOnly);
}
}
else if (ObjCProtocolDecl *PDecl = dyn_cast<ObjCProtocolDecl>(CDecl)) {
for (auto *Prop : PDecl->properties()) {
if (CollectClassPropsOnly && !Prop->isClassProperty())
continue;
ObjCPropertyDecl *PropertyFromSuper =
SuperPropMap[std::make_pair(Prop->getIdentifier(),
Prop->isClassProperty())];
// Exclude property for protocols which conform to class's super-class,
// as super-class has to implement the property.
if (!PropertyFromSuper ||
PropertyFromSuper->getIdentifier() != Prop->getIdentifier()) {
ObjCPropertyDecl *&PropEntry =
PropMap[std::make_pair(Prop->getIdentifier(),
Prop->isClassProperty())];
if (!PropEntry)
PropEntry = Prop;
}
}
// Scan through protocol's protocols.
for (auto *PI : PDecl->protocols())
CollectImmediateProperties(PI, PropMap, SuperPropMap,
CollectClassPropsOnly);
}
}
/// CollectSuperClassPropertyImplementations - This routine collects list of
/// properties to be implemented in super class(s) and also coming from their
/// conforming protocols.
static void CollectSuperClassPropertyImplementations(ObjCInterfaceDecl *CDecl,
ObjCInterfaceDecl::PropertyMap &PropMap) {
if (ObjCInterfaceDecl *SDecl = CDecl->getSuperClass()) {
ObjCInterfaceDecl::PropertyDeclOrder PO;
while (SDecl) {
SDecl->collectPropertiesToImplement(PropMap, PO);
SDecl = SDecl->getSuperClass();
}
}
}
/// IvarBacksCurrentMethodAccessor - This routine returns 'true' if 'IV' is
/// an ivar synthesized for 'Method' and 'Method' is a property accessor
/// declared in class 'IFace'.
bool
Sema::IvarBacksCurrentMethodAccessor(ObjCInterfaceDecl *IFace,
ObjCMethodDecl *Method, ObjCIvarDecl *IV) {
if (!IV->getSynthesize())
return false;
ObjCMethodDecl *IMD = IFace->lookupMethod(Method->getSelector(),
Method->isInstanceMethod());
if (!IMD || !IMD->isPropertyAccessor())
return false;
// look up a property declaration whose one of its accessors is implemented
// by this method.
for (const auto *Property : IFace->instance_properties()) {
if ((Property->getGetterName() == IMD->getSelector() ||
Property->getSetterName() == IMD->getSelector()) &&
(Property->getPropertyIvarDecl() == IV))
return true;
}
// Also look up property declaration in class extension whose one of its
// accessors is implemented by this method.
for (const auto *Ext : IFace->known_extensions())
for (const auto *Property : Ext->instance_properties())
if ((Property->getGetterName() == IMD->getSelector() ||
Property->getSetterName() == IMD->getSelector()) &&
(Property->getPropertyIvarDecl() == IV))
return true;
return false;
}
static bool SuperClassImplementsProperty(ObjCInterfaceDecl *IDecl,
ObjCPropertyDecl *Prop) {
bool SuperClassImplementsGetter = false;
bool SuperClassImplementsSetter = false;
if (Prop->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_readonly)
SuperClassImplementsSetter = true;
while (IDecl->getSuperClass()) {
ObjCInterfaceDecl *SDecl = IDecl->getSuperClass();
if (!SuperClassImplementsGetter && SDecl->getInstanceMethod(Prop->getGetterName()))
SuperClassImplementsGetter = true;
if (!SuperClassImplementsSetter && SDecl->getInstanceMethod(Prop->getSetterName()))
SuperClassImplementsSetter = true;
if (SuperClassImplementsGetter && SuperClassImplementsSetter)
return true;
IDecl = IDecl->getSuperClass();
}
return false;
}
/// \brief Default synthesizes all properties which must be synthesized
/// in class's \@implementation.
void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl *IMPDecl,
ObjCInterfaceDecl *IDecl,
SourceLocation AtEnd) {
ObjCInterfaceDecl::PropertyMap PropMap;
ObjCInterfaceDecl::PropertyDeclOrder PropertyOrder;
IDecl->collectPropertiesToImplement(PropMap, PropertyOrder);
if (PropMap.empty())
return;
ObjCInterfaceDecl::PropertyMap SuperPropMap;
CollectSuperClassPropertyImplementations(IDecl, SuperPropMap);
for (unsigned i = 0, e = PropertyOrder.size(); i != e; i++) {
ObjCPropertyDecl *Prop = PropertyOrder[i];
// Is there a matching property synthesize/dynamic?
if (Prop->isInvalidDecl() ||
Prop->isClassProperty() ||
Prop->getPropertyImplementation() == ObjCPropertyDecl::Optional)
continue;
// Property may have been synthesized by user.
if (IMPDecl->FindPropertyImplDecl(
Prop->getIdentifier(), Prop->getQueryKind()))
continue;
if (IMPDecl->getInstanceMethod(Prop->getGetterName())) {
if (Prop->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_readonly)
continue;
if (IMPDecl->getInstanceMethod(Prop->getSetterName()))
continue;
}
if (ObjCPropertyImplDecl *PID =
IMPDecl->FindPropertyImplIvarDecl(Prop->getIdentifier())) {
Diag(Prop->getLocation(), diag::warn_no_autosynthesis_shared_ivar_property)
<< Prop->getIdentifier();
if (PID->getLocation().isValid())
Diag(PID->getLocation(), diag::note_property_synthesize);
continue;
}
ObjCPropertyDecl *PropInSuperClass =
SuperPropMap[std::make_pair(Prop->getIdentifier(),
Prop->isClassProperty())];
if (ObjCProtocolDecl *Proto =
dyn_cast<ObjCProtocolDecl>(Prop->getDeclContext())) {
// We won't auto-synthesize properties declared in protocols.
// Suppress the warning if class's superclass implements property's
// getter and implements property's setter (if readwrite property).
// Or, if property is going to be implemented in its super class.
if (!SuperClassImplementsProperty(IDecl, Prop) && !PropInSuperClass) {
Diag(IMPDecl->getLocation(),
diag::warn_auto_synthesizing_protocol_property)
<< Prop << Proto;
Diag(Prop->getLocation(), diag::note_property_declare);
std::string FixIt =
(Twine("@synthesize ") + Prop->getName() + ";\n\n").str();
Diag(AtEnd, diag::note_add_synthesize_directive)
<< FixItHint::CreateInsertion(AtEnd, FixIt);
}
continue;
}
// If property to be implemented in the super class, ignore.
if (PropInSuperClass) {
if ((Prop->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_readwrite) &&
(PropInSuperClass->getPropertyAttributes() &
ObjCPropertyDecl::OBJC_PR_readonly) &&
!IMPDecl->getInstanceMethod(Prop->getSetterName()) &&
!IDecl->HasUserDeclaredSetterMethod(Prop)) {
Diag(Prop->getLocation(), diag::warn_no_autosynthesis_property)
<< Prop->getIdentifier();
Diag(PropInSuperClass->getLocation(), diag::note_property_declare);
}
else {
Diag(Prop->getLocation(), diag::warn_autosynthesis_property_in_superclass)
<< Prop->getIdentifier();
Diag(PropInSuperClass->getLocation(), diag::note_property_declare);
Diag(IMPDecl->getLocation(), diag::note_while_in_implementation);
}
continue;
}
// We use invalid SourceLocations for the synthesized ivars since they
// aren't really synthesized at a particular location; they just exist.
// Saying that they are located at the @implementation isn't really going
// to help users.
ObjCPropertyImplDecl *PIDecl = dyn_cast_or_null<ObjCPropertyImplDecl>(
ActOnPropertyImplDecl(S, SourceLocation(), SourceLocation(),
true,
/* property = */ Prop->getIdentifier(),
/* ivar = */ Prop->getDefaultSynthIvarName(Context),
Prop->getLocation(), Prop->getQueryKind()));
if (PIDecl) {
Diag(Prop->getLocation(), diag::warn_missing_explicit_synthesis);
Diag(IMPDecl->getLocation(), diag::note_while_in_implementation);
}
}
}
void Sema::DefaultSynthesizeProperties(Scope *S, Decl *D,
SourceLocation AtEnd) {
if (!LangOpts.ObjCDefaultSynthProperties || LangOpts.ObjCRuntime.isFragile())
return;
ObjCImplementationDecl *IC=dyn_cast_or_null<ObjCImplementationDecl>(D);
if (!IC)
return;
if (ObjCInterfaceDecl* IDecl = IC->getClassInterface())
if (!IDecl->isObjCRequiresPropertyDefs())
DefaultSynthesizeProperties(S, IC, IDecl, AtEnd);
}
static void DiagnoseUnimplementedAccessor(
Sema &S, ObjCInterfaceDecl *PrimaryClass, Selector Method,
ObjCImplDecl *IMPDecl, ObjCContainerDecl *CDecl, ObjCCategoryDecl *C,
ObjCPropertyDecl *Prop,
llvm::SmallPtrSet<const ObjCMethodDecl *, 8> &SMap) {
// Check to see if we have a corresponding selector in SMap and with the
// right method type.
auto I = std::find_if(SMap.begin(), SMap.end(),
[&](const ObjCMethodDecl *x) {
return x->getSelector() == Method &&
x->isClassMethod() == Prop->isClassProperty();
});
// When reporting on missing property setter/getter implementation in
// categories, do not report when they are declared in primary class,
// class's protocol, or one of it super classes. This is because,
// the class is going to implement them.
if (I == SMap.end() &&
(PrimaryClass == nullptr ||
!PrimaryClass->lookupPropertyAccessor(Method, C,
Prop->isClassProperty()))) {
unsigned diag =
isa<ObjCCategoryDecl>(CDecl)
? (Prop->isClassProperty()
? diag::warn_impl_required_in_category_for_class_property
: diag::warn_setter_getter_impl_required_in_category)
: (Prop->isClassProperty()
? diag::warn_impl_required_for_class_property
: diag::warn_setter_getter_impl_required);
S.Diag(IMPDecl->getLocation(), diag) << Prop->getDeclName() << Method;
S.Diag(Prop->getLocation(), diag::note_property_declare);
if (S.LangOpts.ObjCDefaultSynthProperties &&
S.LangOpts.ObjCRuntime.isNonFragile())
if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(CDecl))
if (const ObjCInterfaceDecl *RID = ID->isObjCRequiresPropertyDefs())
S.Diag(RID->getLocation(), diag::note_suppressed_class_declare);
}
}
void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl,
ObjCContainerDecl *CDecl,
bool SynthesizeProperties) {
ObjCContainerDecl::PropertyMap PropMap;
ObjCInterfaceDecl *IDecl = dyn_cast<ObjCInterfaceDecl>(CDecl);
// Since we don't synthesize class properties, we should emit diagnose even
// if SynthesizeProperties is true.
ObjCContainerDecl::PropertyMap NoNeedToImplPropMap;
// Gather properties which need not be implemented in this class
// or category.
if (!IDecl)
if (ObjCCategoryDecl *C = dyn_cast<ObjCCategoryDecl>(CDecl)) {
// For categories, no need to implement properties declared in
// its primary class (and its super classes) if property is
// declared in one of those containers.
if ((IDecl = C->getClassInterface())) {
ObjCInterfaceDecl::PropertyDeclOrder PO;
IDecl->collectPropertiesToImplement(NoNeedToImplPropMap, PO);
}
}
if (IDecl)
CollectSuperClassPropertyImplementations(IDecl, NoNeedToImplPropMap);
// When SynthesizeProperties is true, we only check class properties.
CollectImmediateProperties(CDecl, PropMap, NoNeedToImplPropMap,
SynthesizeProperties/*CollectClassPropsOnly*/);
// Scan the @interface to see if any of the protocols it adopts
// require an explicit implementation, via attribute
// 'objc_protocol_requires_explicit_implementation'.
if (IDecl) {
std::unique_ptr<ObjCContainerDecl::PropertyMap> LazyMap;
for (auto *PDecl : IDecl->all_referenced_protocols()) {
if (!PDecl->hasAttr<ObjCExplicitProtocolImplAttr>())
continue;
// Lazily construct a set of all the properties in the @interface
// of the class, without looking at the superclass. We cannot
// use the call to CollectImmediateProperties() above as that
// utilizes information from the super class's properties as well
// as scans the adopted protocols. This work only triggers for protocols
// with the attribute, which is very rare, and only occurs when
// analyzing the @implementation.
if (!LazyMap) {
ObjCContainerDecl::PropertyMap NoNeedToImplPropMap;
LazyMap.reset(new ObjCContainerDecl::PropertyMap());
CollectImmediateProperties(CDecl, *LazyMap, NoNeedToImplPropMap,
/* CollectClassPropsOnly */ false,
/* IncludeProtocols */ false);
}
// Add the properties of 'PDecl' to the list of properties that
// need to be implemented.
for (auto *PropDecl : PDecl->properties()) {
if ((*LazyMap)[std::make_pair(PropDecl->getIdentifier(),
PropDecl->isClassProperty())])
continue;
PropMap[std::make_pair(PropDecl->getIdentifier(),
PropDecl->isClassProperty())] = PropDecl;
}
}
}
if (PropMap.empty())
return;
llvm::DenseSet<ObjCPropertyDecl *> PropImplMap;
for (const auto *I : IMPDecl->property_impls())
PropImplMap.insert(I->getPropertyDecl());
llvm::SmallPtrSet<const ObjCMethodDecl *, 8> InsMap;
// Collect property accessors implemented in current implementation.
for (const auto *I : IMPDecl->methods())
InsMap.insert(I);
ObjCCategoryDecl *C = dyn_cast<ObjCCategoryDecl>(CDecl);
ObjCInterfaceDecl *PrimaryClass = nullptr;
if (C && !C->IsClassExtension())
if ((PrimaryClass = C->getClassInterface()))
// Report unimplemented properties in the category as well.
if (ObjCImplDecl *IMP = PrimaryClass->getImplementation()) {
// When reporting on missing setter/getters, do not report when
// setter/getter is implemented in category's primary class
// implementation.
for (const auto *I : IMP->methods())
InsMap.insert(I);
}
for (ObjCContainerDecl::PropertyMap::iterator
P = PropMap.begin(), E = PropMap.end(); P != E; ++P) {
ObjCPropertyDecl *Prop = P->second;
// Is there a matching property synthesize/dynamic?
if (Prop->isInvalidDecl() ||
Prop->getPropertyImplementation() == ObjCPropertyDecl::Optional ||
PropImplMap.count(Prop) ||
Prop->getAvailability() == AR_Unavailable)
continue;
// Diagnose unimplemented getters and setters.
DiagnoseUnimplementedAccessor(*this,
PrimaryClass, Prop->getGetterName(), IMPDecl, CDecl, C, Prop, InsMap);
if (!Prop->isReadOnly())
DiagnoseUnimplementedAccessor(*this,
PrimaryClass, Prop->getSetterName(),
IMPDecl, CDecl, C, Prop, InsMap);
}
}
void Sema::diagnoseNullResettableSynthesizedSetters(const ObjCImplDecl *impDecl) {
for (const auto *propertyImpl : impDecl->property_impls()) {
const auto *property = propertyImpl->getPropertyDecl();
// Warn about null_resettable properties with synthesized setters,
// because the setter won't properly handle nil.
if (propertyImpl->getPropertyImplementation()
== ObjCPropertyImplDecl::Synthesize &&
(property->getPropertyAttributes() &
ObjCPropertyDecl::OBJC_PR_null_resettable) &&
property->getGetterMethodDecl() &&
property->getSetterMethodDecl()) {
auto *getterMethod = property->getGetterMethodDecl();
auto *setterMethod = property->getSetterMethodDecl();
if (!impDecl->getInstanceMethod(setterMethod->getSelector()) &&
!impDecl->getInstanceMethod(getterMethod->getSelector())) {
SourceLocation loc = propertyImpl->getLocation();
if (loc.isInvalid())
loc = impDecl->getLocStart();
Diag(loc, diag::warn_null_resettable_setter)
<< setterMethod->getSelector() << property->getDeclName();
}
}
}
}
void
Sema::AtomicPropertySetterGetterRules (ObjCImplDecl* IMPDecl,
ObjCInterfaceDecl* IDecl) {
// Rules apply in non-GC mode only
if (getLangOpts().getGC() != LangOptions::NonGC)
return;
ObjCContainerDecl::PropertyMap PM;
for (auto *Prop : IDecl->properties())
PM[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] = Prop;
for (const auto *Ext : IDecl->known_extensions())
for (auto *Prop : Ext->properties())
PM[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] = Prop;
for (ObjCContainerDecl::PropertyMap::iterator I = PM.begin(), E = PM.end();
I != E; ++I) {
const ObjCPropertyDecl *Property = I->second;
ObjCMethodDecl *GetterMethod = nullptr;
ObjCMethodDecl *SetterMethod = nullptr;
bool LookedUpGetterSetter = false;
unsigned Attributes = Property->getPropertyAttributes();
unsigned AttributesAsWritten = Property->getPropertyAttributesAsWritten();
if (!(AttributesAsWritten & ObjCPropertyDecl::OBJC_PR_atomic) &&
!(AttributesAsWritten & ObjCPropertyDecl::OBJC_PR_nonatomic)) {
GetterMethod = Property->isClassProperty() ?
IMPDecl->getClassMethod(Property->getGetterName()) :
IMPDecl->getInstanceMethod(Property->getGetterName());
SetterMethod = Property->isClassProperty() ?
IMPDecl->getClassMethod(Property->getSetterName()) :
IMPDecl->getInstanceMethod(Property->getSetterName());
LookedUpGetterSetter = true;
if (GetterMethod) {
Diag(GetterMethod->getLocation(),
diag::warn_default_atomic_custom_getter_setter)
<< Property->getIdentifier() << 0;
Diag(Property->getLocation(), diag::note_property_declare);
}
if (SetterMethod) {
Diag(SetterMethod->getLocation(),
diag::warn_default_atomic_custom_getter_setter)
<< Property->getIdentifier() << 1;
Diag(Property->getLocation(), diag::note_property_declare);
}
}
// We only care about readwrite atomic property.
if ((Attributes & ObjCPropertyDecl::OBJC_PR_nonatomic) ||
!(Attributes & ObjCPropertyDecl::OBJC_PR_readwrite))
continue;
if (const ObjCPropertyImplDecl *PIDecl = IMPDecl->FindPropertyImplDecl(
Property->getIdentifier(), Property->getQueryKind())) {
if (PIDecl->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic)
continue;
if (!LookedUpGetterSetter) {
GetterMethod = Property->isClassProperty() ?
IMPDecl->getClassMethod(Property->getGetterName()) :
IMPDecl->getInstanceMethod(Property->getGetterName());
SetterMethod = Property->isClassProperty() ?
IMPDecl->getClassMethod(Property->getSetterName()) :
IMPDecl->getInstanceMethod(Property->getSetterName());
}
if ((GetterMethod && !SetterMethod) || (!GetterMethod && SetterMethod)) {
SourceLocation MethodLoc =
(GetterMethod ? GetterMethod->getLocation()
: SetterMethod->getLocation());
Diag(MethodLoc, diag::warn_atomic_property_rule)
<< Property->getIdentifier() << (GetterMethod != nullptr)
<< (SetterMethod != nullptr);
// fixit stuff.
if (Property->getLParenLoc().isValid() &&
!(AttributesAsWritten & ObjCPropertyDecl::OBJC_PR_atomic)) {
// @property () ... case.
SourceLocation AfterLParen =
getLocForEndOfToken(Property->getLParenLoc());
StringRef NonatomicStr = AttributesAsWritten? "nonatomic, "
: "nonatomic";
Diag(Property->getLocation(),
diag::note_atomic_property_fixup_suggest)
<< FixItHint::CreateInsertion(AfterLParen, NonatomicStr);
} else if (Property->getLParenLoc().isInvalid()) {
//@property id etc.
SourceLocation startLoc =
Property->getTypeSourceInfo()->getTypeLoc().getBeginLoc();
Diag(Property->getLocation(),
diag::note_atomic_property_fixup_suggest)
<< FixItHint::CreateInsertion(startLoc, "(nonatomic) ");
}
else
Diag(MethodLoc, diag::note_atomic_property_fixup_suggest);
Diag(Property->getLocation(), diag::note_property_declare);
}
}
}
}
void Sema::DiagnoseOwningPropertyGetterSynthesis(const ObjCImplementationDecl *D) {
if (getLangOpts().getGC() == LangOptions::GCOnly)
return;
for (const auto *PID : D->property_impls()) {
const ObjCPropertyDecl *PD = PID->getPropertyDecl();
if (PD && !PD->hasAttr<NSReturnsNotRetainedAttr>() &&
!PD->isClassProperty() &&
!D->getInstanceMethod(PD->getGetterName())) {
ObjCMethodDecl *method = PD->getGetterMethodDecl();
if (!method)
continue;
ObjCMethodFamily family = method->getMethodFamily();
if (family == OMF_alloc || family == OMF_copy ||
family == OMF_mutableCopy || family == OMF_new) {
if (getLangOpts().ObjCAutoRefCount)
Diag(PD->getLocation(), diag::err_cocoa_naming_owned_rule);
else
Diag(PD->getLocation(), diag::warn_cocoa_naming_owned_rule);
// Look for a getter explicitly declared alongside the property.
// If we find one, use its location for the note.
SourceLocation noteLoc = PD->getLocation();
SourceLocation fixItLoc;
for (auto *getterRedecl : method->redecls()) {
if (getterRedecl->isImplicit())
continue;
if (getterRedecl->getDeclContext() != PD->getDeclContext())
continue;
noteLoc = getterRedecl->getLocation();
fixItLoc = getterRedecl->getLocEnd();
}
Preprocessor &PP = getPreprocessor();
TokenValue tokens[] = {
tok::kw___attribute, tok::l_paren, tok::l_paren,
PP.getIdentifierInfo("objc_method_family"), tok::l_paren,
PP.getIdentifierInfo("none"), tok::r_paren,
tok::r_paren, tok::r_paren
};
StringRef spelling = "__attribute__((objc_method_family(none)))";
StringRef macroName = PP.getLastMacroWithSpelling(noteLoc, tokens);
if (!macroName.empty())
spelling = macroName;
auto noteDiag = Diag(noteLoc, diag::note_cocoa_naming_declare_family)
<< method->getDeclName() << spelling;
if (fixItLoc.isValid()) {
SmallString<64> fixItText(" ");
fixItText += spelling;
noteDiag << FixItHint::CreateInsertion(fixItLoc, fixItText);
}
}
}
}
}
void Sema::DiagnoseMissingDesignatedInitOverrides(
const ObjCImplementationDecl *ImplD,
const ObjCInterfaceDecl *IFD) {
assert(IFD->hasDesignatedInitializers());
const ObjCInterfaceDecl *SuperD = IFD->getSuperClass();
if (!SuperD)
return;
SelectorSet InitSelSet;
for (const auto *I : ImplD->instance_methods())
if (I->getMethodFamily() == OMF_init)
InitSelSet.insert(I->getSelector());
SmallVector<const ObjCMethodDecl *, 8> DesignatedInits;
SuperD->getDesignatedInitializers(DesignatedInits);
for (SmallVector<const ObjCMethodDecl *, 8>::iterator
I = DesignatedInits.begin(), E = DesignatedInits.end(); I != E; ++I) {
const ObjCMethodDecl *MD = *I;
if (!InitSelSet.count(MD->getSelector())) {
bool Ignore = false;
if (auto *IMD = IFD->getInstanceMethod(MD->getSelector())) {
Ignore = IMD->isUnavailable();
}
if (!Ignore) {
Diag(ImplD->getLocation(),
diag::warn_objc_implementation_missing_designated_init_override)
<< MD->getSelector();
Diag(MD->getLocation(), diag::note_objc_designated_init_marked_here);
}
}
}
}
/// AddPropertyAttrs - Propagates attributes from a property to the
/// implicitly-declared getter or setter for that property.
static void AddPropertyAttrs(Sema &S, ObjCMethodDecl *PropertyMethod,
ObjCPropertyDecl *Property) {
// Should we just clone all attributes over?
for (const auto *A : Property->attrs()) {
if (isa<DeprecatedAttr>(A) ||
isa<UnavailableAttr>(A) ||
isa<AvailabilityAttr>(A))
PropertyMethod->addAttr(A->clone(S.Context));
}
}
/// ProcessPropertyDecl - Make sure that any user-defined setter/getter methods
/// have the property type and issue diagnostics if they don't.
/// Also synthesize a getter/setter method if none exist (and update the
/// appropriate lookup tables.
void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property) {
ObjCMethodDecl *GetterMethod, *SetterMethod;
ObjCContainerDecl *CD = cast<ObjCContainerDecl>(property->getDeclContext());
if (CD->isInvalidDecl())
return;
bool IsClassProperty = property->isClassProperty();
GetterMethod = IsClassProperty ?
CD->getClassMethod(property->getGetterName()) :
CD->getInstanceMethod(property->getGetterName());
// if setter or getter is not found in class extension, it might be
// in the primary class.
if (!GetterMethod)
if (const ObjCCategoryDecl *CatDecl = dyn_cast<ObjCCategoryDecl>(CD))
if (CatDecl->IsClassExtension())
GetterMethod = IsClassProperty ? CatDecl->getClassInterface()->
getClassMethod(property->getGetterName()) :
CatDecl->getClassInterface()->
getInstanceMethod(property->getGetterName());
SetterMethod = IsClassProperty ?
CD->getClassMethod(property->getSetterName()) :
CD->getInstanceMethod(property->getSetterName());
if (!SetterMethod)
if (const ObjCCategoryDecl *CatDecl = dyn_cast<ObjCCategoryDecl>(CD))
if (CatDecl->IsClassExtension())
SetterMethod = IsClassProperty ? CatDecl->getClassInterface()->
getClassMethod(property->getSetterName()) :
CatDecl->getClassInterface()->
getInstanceMethod(property->getSetterName());
DiagnosePropertyAccessorMismatch(property, GetterMethod,
property->getLocation());
if (!property->isReadOnly() && SetterMethod) {
if (Context.getCanonicalType(SetterMethod->getReturnType()) !=
Context.VoidTy)
Diag(SetterMethod->getLocation(), diag::err_setter_type_void);
if (SetterMethod->param_size() != 1 ||
!Context.hasSameUnqualifiedType(
(*SetterMethod->param_begin())->getType().getNonReferenceType(),
property->getType().getNonReferenceType())) {
Diag(property->getLocation(),
diag::warn_accessor_property_type_mismatch)
<< property->getDeclName()
<< SetterMethod->getSelector();
Diag(SetterMethod->getLocation(), diag::note_declared_at);
}
}
// Synthesize getter/setter methods if none exist.
// Find the default getter and if one not found, add one.
// FIXME: The synthesized property we set here is misleading. We almost always
// synthesize these methods unless the user explicitly provided prototypes
// (which is odd, but allowed). Sema should be typechecking that the
// declarations jive in that situation (which it is not currently).
if (!GetterMethod) {
// No instance/class method of same name as property getter name was found.
// Declare a getter method and add it to the list of methods
// for this class.
SourceLocation Loc = property->getLocation();
// The getter returns the declared property type with all qualifiers
// removed.
QualType resultTy = property->getType().getAtomicUnqualifiedType();
// If the property is null_resettable, the getter returns nonnull.
if (property->getPropertyAttributes() &
ObjCPropertyDecl::OBJC_PR_null_resettable) {
QualType modifiedTy = resultTy;
if (auto nullability = AttributedType::stripOuterNullability(modifiedTy)) {
if (*nullability == NullabilityKind::Unspecified)
resultTy = Context.getAttributedType(AttributedType::attr_nonnull,
modifiedTy, modifiedTy);
}
}
GetterMethod = ObjCMethodDecl::Create(Context, Loc, Loc,
property->getGetterName(),
resultTy, nullptr, CD,
!IsClassProperty, /*isVariadic=*/false,
/*isPropertyAccessor=*/true,
/*isImplicitlyDeclared=*/true, /*isDefined=*/false,
(property->getPropertyImplementation() ==
ObjCPropertyDecl::Optional) ?
ObjCMethodDecl::Optional :
ObjCMethodDecl::Required);
CD->addDecl(GetterMethod);
AddPropertyAttrs(*this, GetterMethod, property);
if (property->hasAttr<NSReturnsNotRetainedAttr>())
GetterMethod->addAttr(NSReturnsNotRetainedAttr::CreateImplicit(Context,
Loc));
if (property->hasAttr<ObjCReturnsInnerPointerAttr>())
GetterMethod->addAttr(
ObjCReturnsInnerPointerAttr::CreateImplicit(Context, Loc));
if (const SectionAttr *SA = property->getAttr<SectionAttr>())
GetterMethod->addAttr(
SectionAttr::CreateImplicit(Context, SectionAttr::GNU_section,
SA->getName(), Loc));
if (getLangOpts().ObjCAutoRefCount)
CheckARCMethodDecl(GetterMethod);
} else
// A user declared getter will be synthesize when @synthesize of
// the property with the same name is seen in the @implementation
GetterMethod->setPropertyAccessor(true);
property->setGetterMethodDecl(GetterMethod);
// Skip setter if property is read-only.
if (!property->isReadOnly()) {
// Find the default setter and if one not found, add one.
if (!SetterMethod) {
// No instance/class method of same name as property setter name was
// found.
// Declare a setter method and add it to the list of methods
// for this class.
SourceLocation Loc = property->getLocation();
SetterMethod =
ObjCMethodDecl::Create(Context, Loc, Loc,
property->getSetterName(), Context.VoidTy,
nullptr, CD, !IsClassProperty,
/*isVariadic=*/false,
/*isPropertyAccessor=*/true,
/*isImplicitlyDeclared=*/true,
/*isDefined=*/false,
(property->getPropertyImplementation() ==
ObjCPropertyDecl::Optional) ?
ObjCMethodDecl::Optional :
ObjCMethodDecl::Required);
// Remove all qualifiers from the setter's parameter type.
QualType paramTy =
property->getType().getUnqualifiedType().getAtomicUnqualifiedType();
// If the property is null_resettable, the setter accepts a
// nullable value.
if (property->getPropertyAttributes() &
ObjCPropertyDecl::OBJC_PR_null_resettable) {
QualType modifiedTy = paramTy;
if (auto nullability = AttributedType::stripOuterNullability(modifiedTy)){
if (*nullability == NullabilityKind::Unspecified)
paramTy = Context.getAttributedType(AttributedType::attr_nullable,
modifiedTy, modifiedTy);
}
}
// Invent the arguments for the setter. We don't bother making a
// nice name for the argument.
ParmVarDecl *Argument = ParmVarDecl::Create(Context, SetterMethod,
Loc, Loc,
property->getIdentifier(),
paramTy,
/*TInfo=*/nullptr,
SC_None,
nullptr);
SetterMethod->setMethodParams(Context, Argument, None);
AddPropertyAttrs(*this, SetterMethod, property);
CD->addDecl(SetterMethod);
if (const SectionAttr *SA = property->getAttr<SectionAttr>())
SetterMethod->addAttr(
SectionAttr::CreateImplicit(Context, SectionAttr::GNU_section,
SA->getName(), Loc));
// It's possible for the user to have set a very odd custom
// setter selector that causes it to have a method family.
if (getLangOpts().ObjCAutoRefCount)
CheckARCMethodDecl(SetterMethod);
} else
// A user declared setter will be synthesize when @synthesize of
// the property with the same name is seen in the @implementation
SetterMethod->setPropertyAccessor(true);
property->setSetterMethodDecl(SetterMethod);
}
// Add any synthesized methods to the global pool. This allows us to
// handle the following, which is supported by GCC (and part of the design).
//
// @interface Foo
// @property double bar;
// @end
//
// void thisIsUnfortunate() {
// id foo;
// double bar = [foo bar];
// }
//
if (!IsClassProperty) {
if (GetterMethod)
AddInstanceMethodToGlobalPool(GetterMethod);
if (SetterMethod)
AddInstanceMethodToGlobalPool(SetterMethod);
} else {
if (GetterMethod)
AddFactoryMethodToGlobalPool(GetterMethod);
if (SetterMethod)
AddFactoryMethodToGlobalPool(SetterMethod);
}
ObjCInterfaceDecl *CurrentClass = dyn_cast<ObjCInterfaceDecl>(CD);
if (!CurrentClass) {
if (ObjCCategoryDecl *Cat = dyn_cast<ObjCCategoryDecl>(CD))
CurrentClass = Cat->getClassInterface();
else if (ObjCImplDecl *Impl = dyn_cast<ObjCImplDecl>(CD))
CurrentClass = Impl->getClassInterface();
}
if (GetterMethod)
CheckObjCMethodOverrides(GetterMethod, CurrentClass, Sema::RTC_Unknown);
if (SetterMethod)
CheckObjCMethodOverrides(SetterMethod, CurrentClass, Sema::RTC_Unknown);
}
void Sema::CheckObjCPropertyAttributes(Decl *PDecl,
SourceLocation Loc,
unsigned &Attributes,
bool propertyInPrimaryClass) {
// FIXME: Improve the reported location.
if (!PDecl || PDecl->isInvalidDecl())
return;
if ((Attributes & ObjCDeclSpec::DQ_PR_readonly) &&
(Attributes & ObjCDeclSpec::DQ_PR_readwrite))
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "readonly" << "readwrite";
ObjCPropertyDecl *PropertyDecl = cast<ObjCPropertyDecl>(PDecl);
QualType PropertyTy = PropertyDecl->getType();
// Check for copy or retain on non-object types.
if ((Attributes & (ObjCDeclSpec::DQ_PR_weak | ObjCDeclSpec::DQ_PR_copy |
ObjCDeclSpec::DQ_PR_retain | ObjCDeclSpec::DQ_PR_strong)) &&
!PropertyTy->isObjCRetainableType() &&
!PropertyDecl->hasAttr<ObjCNSObjectAttr>()) {
Diag(Loc, diag::err_objc_property_requires_object)
<< (Attributes & ObjCDeclSpec::DQ_PR_weak ? "weak" :
Attributes & ObjCDeclSpec::DQ_PR_copy ? "copy" : "retain (or strong)");
Attributes &= ~(ObjCDeclSpec::DQ_PR_weak | ObjCDeclSpec::DQ_PR_copy |
ObjCDeclSpec::DQ_PR_retain | ObjCDeclSpec::DQ_PR_strong);
PropertyDecl->setInvalidDecl();
}
// Check for more than one of { assign, copy, retain }.
if (Attributes & ObjCDeclSpec::DQ_PR_assign) {
if (Attributes & ObjCDeclSpec::DQ_PR_copy) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "assign" << "copy";
Attributes &= ~ObjCDeclSpec::DQ_PR_copy;
}
if (Attributes & ObjCDeclSpec::DQ_PR_retain) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "assign" << "retain";
Attributes &= ~ObjCDeclSpec::DQ_PR_retain;
}
if (Attributes & ObjCDeclSpec::DQ_PR_strong) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "assign" << "strong";
Attributes &= ~ObjCDeclSpec::DQ_PR_strong;
}
if (getLangOpts().ObjCAutoRefCount &&
(Attributes & ObjCDeclSpec::DQ_PR_weak)) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "assign" << "weak";
Attributes &= ~ObjCDeclSpec::DQ_PR_weak;
}
if (PropertyDecl->hasAttr<IBOutletCollectionAttr>())
Diag(Loc, diag::warn_iboutletcollection_property_assign);
} else if (Attributes & ObjCDeclSpec::DQ_PR_unsafe_unretained) {
if (Attributes & ObjCDeclSpec::DQ_PR_copy) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "unsafe_unretained" << "copy";
Attributes &= ~ObjCDeclSpec::DQ_PR_copy;
}
if (Attributes & ObjCDeclSpec::DQ_PR_retain) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "unsafe_unretained" << "retain";
Attributes &= ~ObjCDeclSpec::DQ_PR_retain;
}
if (Attributes & ObjCDeclSpec::DQ_PR_strong) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "unsafe_unretained" << "strong";
Attributes &= ~ObjCDeclSpec::DQ_PR_strong;
}
if (getLangOpts().ObjCAutoRefCount &&
(Attributes & ObjCDeclSpec::DQ_PR_weak)) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "unsafe_unretained" << "weak";
Attributes &= ~ObjCDeclSpec::DQ_PR_weak;
}
} else if (Attributes & ObjCDeclSpec::DQ_PR_copy) {
if (Attributes & ObjCDeclSpec::DQ_PR_retain) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "copy" << "retain";
Attributes &= ~ObjCDeclSpec::DQ_PR_retain;
}
if (Attributes & ObjCDeclSpec::DQ_PR_strong) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "copy" << "strong";
Attributes &= ~ObjCDeclSpec::DQ_PR_strong;
}
if (Attributes & ObjCDeclSpec::DQ_PR_weak) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "copy" << "weak";
Attributes &= ~ObjCDeclSpec::DQ_PR_weak;
}
}
else if ((Attributes & ObjCDeclSpec::DQ_PR_retain) &&
(Attributes & ObjCDeclSpec::DQ_PR_weak)) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "retain" << "weak";
Attributes &= ~ObjCDeclSpec::DQ_PR_retain;
}
else if ((Attributes & ObjCDeclSpec::DQ_PR_strong) &&
(Attributes & ObjCDeclSpec::DQ_PR_weak)) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "strong" << "weak";
Attributes &= ~ObjCDeclSpec::DQ_PR_weak;
}
if (Attributes & ObjCDeclSpec::DQ_PR_weak) {
// 'weak' and 'nonnull' are mutually exclusive.
if (auto nullability = PropertyTy->getNullability(Context)) {
if (*nullability == NullabilityKind::NonNull)
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "nonnull" << "weak";
}
}
if ((Attributes & ObjCDeclSpec::DQ_PR_atomic) &&
(Attributes & ObjCDeclSpec::DQ_PR_nonatomic)) {
Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
<< "atomic" << "nonatomic";
Attributes &= ~ObjCDeclSpec::DQ_PR_atomic;
}
// Warn if user supplied no assignment attribute, property is
// readwrite, and this is an object type.
if (!getOwnershipRule(Attributes) && PropertyTy->isObjCRetainableType()) {
if (Attributes & ObjCDeclSpec::DQ_PR_readonly) {
// do nothing
} else if (getLangOpts().ObjCAutoRefCount) {
// With arc, @property definitions should default to strong when
// not specified.
PropertyDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
} else if (PropertyTy->isObjCObjectPointerType()) {
bool isAnyClassTy =
(PropertyTy->isObjCClassType() ||
PropertyTy->isObjCQualifiedClassType());
// In non-gc, non-arc mode, 'Class' is treated as a 'void *' no need to
// issue any warning.
if (isAnyClassTy && getLangOpts().getGC() == LangOptions::NonGC)
;
else if (propertyInPrimaryClass) {
// Don't issue warning on property with no life time in class
// extension as it is inherited from property in primary class.
// Skip this warning in gc-only mode.
if (getLangOpts().getGC() != LangOptions::GCOnly)
Diag(Loc, diag::warn_objc_property_no_assignment_attribute);
// If non-gc code warn that this is likely inappropriate.
if (getLangOpts().getGC() == LangOptions::NonGC)
Diag(Loc, diag::warn_objc_property_default_assign_on_object);
}
}
// FIXME: Implement warning dependent on NSCopying being
// implemented. See also:
// <rdar://5168496&4855821&5607453&5096644&4947311&5698469&4947014&5168496>
// (please trim this list while you are at it).
}
if (!(Attributes & ObjCDeclSpec::DQ_PR_copy)
&&!(Attributes & ObjCDeclSpec::DQ_PR_readonly)
&& getLangOpts().getGC() == LangOptions::GCOnly
&& PropertyTy->isBlockPointerType())
Diag(Loc, diag::warn_objc_property_copy_missing_on_block);
else if ((Attributes & ObjCDeclSpec::DQ_PR_retain) &&
!(Attributes & ObjCDeclSpec::DQ_PR_readonly) &&
!(Attributes & ObjCDeclSpec::DQ_PR_strong) &&
PropertyTy->isBlockPointerType())
Diag(Loc, diag::warn_objc_property_retain_of_block);
if ((Attributes & ObjCDeclSpec::DQ_PR_readonly) &&
(Attributes & ObjCDeclSpec::DQ_PR_setter))
Diag(Loc, diag::warn_objc_readonly_property_has_setter);
}
Index: head/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp (revision 322855)
@@ -1,4196 +1,4202 @@
//===--- ASTReaderDecl.cpp - Decl Deserialization ---------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ASTReader::ReadDeclRecord method, which is the
// entrypoint for loading a decl.
//
//===----------------------------------------------------------------------===//
#include "ASTCommon.h"
#include "ASTReaderInternals.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclGroup.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/DeclVisitor.h"
#include "clang/AST/Expr.h"
#include "clang/Sema/IdentifierResolver.h"
#include "clang/Sema/SemaDiagnostic.h"
#include "clang/Serialization/ASTReader.h"
#include "llvm/Support/SaveAndRestore.h"
using namespace clang;
using namespace clang::serialization;
//===----------------------------------------------------------------------===//
// Declaration deserialization
//===----------------------------------------------------------------------===//
namespace clang {
class ASTDeclReader : public DeclVisitor<ASTDeclReader, void> {
ASTReader &Reader;
ASTRecordReader &Record;
ASTReader::RecordLocation Loc;
const DeclID ThisDeclID;
const SourceLocation ThisDeclLoc;
typedef ASTReader::RecordData RecordData;
TypeID TypeIDForTypeDecl;
unsigned AnonymousDeclNumber;
GlobalDeclID NamedDeclForTagDecl;
IdentifierInfo *TypedefNameForLinkage;
bool HasPendingBody;
///\brief A flag to carry the information for a decl from the entity is
/// used. We use it to delay the marking of the canonical decl as used until
/// the entire declaration is deserialized and merged.
bool IsDeclMarkedUsed;
uint64_t GetCurrentCursorOffset();
uint64_t ReadLocalOffset() {
uint64_t LocalOffset = Record.readInt();
assert(LocalOffset < Loc.Offset && "offset point after current record");
return LocalOffset ? Loc.Offset - LocalOffset : 0;
}
uint64_t ReadGlobalOffset() {
uint64_t Local = ReadLocalOffset();
return Local ? Record.getGlobalBitOffset(Local) : 0;
}
SourceLocation ReadSourceLocation() {
return Record.readSourceLocation();
}
SourceRange ReadSourceRange() {
return Record.readSourceRange();
}
TypeSourceInfo *GetTypeSourceInfo() {
return Record.getTypeSourceInfo();
}
serialization::DeclID ReadDeclID() {
return Record.readDeclID();
}
std::string ReadString() {
return Record.readString();
}
void ReadDeclIDList(SmallVectorImpl<DeclID> &IDs) {
for (unsigned I = 0, Size = Record.readInt(); I != Size; ++I)
IDs.push_back(ReadDeclID());
}
Decl *ReadDecl() {
return Record.readDecl();
}
template<typename T>
T *ReadDeclAs() {
return Record.readDeclAs<T>();
}
void ReadQualifierInfo(QualifierInfo &Info) {
Record.readQualifierInfo(Info);
}
void ReadDeclarationNameLoc(DeclarationNameLoc &DNLoc, DeclarationName Name) {
Record.readDeclarationNameLoc(DNLoc, Name);
}
serialization::SubmoduleID readSubmoduleID() {
if (Record.getIdx() == Record.size())
return 0;
return Record.getGlobalSubmoduleID(Record.readInt());
}
Module *readModule() {
return Record.getSubmodule(readSubmoduleID());
}
void ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update);
void ReadCXXDefinitionData(struct CXXRecordDecl::DefinitionData &Data,
const CXXRecordDecl *D);
void MergeDefinitionData(CXXRecordDecl *D,
struct CXXRecordDecl::DefinitionData &&NewDD);
void ReadObjCDefinitionData(struct ObjCInterfaceDecl::DefinitionData &Data);
void MergeDefinitionData(ObjCInterfaceDecl *D,
struct ObjCInterfaceDecl::DefinitionData &&NewDD);
void ReadObjCDefinitionData(struct ObjCProtocolDecl::DefinitionData &Data);
void MergeDefinitionData(ObjCProtocolDecl *D,
struct ObjCProtocolDecl::DefinitionData &&NewDD);
static NamedDecl *getAnonymousDeclForMerging(ASTReader &Reader,
DeclContext *DC,
unsigned Index);
static void setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC,
unsigned Index, NamedDecl *D);
/// Results from loading a RedeclarableDecl.
class RedeclarableResult {
Decl *MergeWith;
GlobalDeclID FirstID;
bool IsKeyDecl;
public:
RedeclarableResult(Decl *MergeWith, GlobalDeclID FirstID, bool IsKeyDecl)
: MergeWith(MergeWith), FirstID(FirstID), IsKeyDecl(IsKeyDecl) {}
/// \brief Retrieve the first ID.
GlobalDeclID getFirstID() const { return FirstID; }
/// \brief Is this declaration a key declaration?
bool isKeyDecl() const { return IsKeyDecl; }
/// \brief Get a known declaration that this should be merged with, if
/// any.
Decl *getKnownMergeTarget() const { return MergeWith; }
};
/// \brief Class used to capture the result of searching for an existing
/// declaration of a specific kind and name, along with the ability
/// to update the place where this result was found (the declaration
/// chain hanging off an identifier or the DeclContext we searched in)
/// if requested.
class FindExistingResult {
ASTReader &Reader;
NamedDecl *New;
NamedDecl *Existing;
bool AddResult;
unsigned AnonymousDeclNumber;
IdentifierInfo *TypedefNameForLinkage;
void operator=(FindExistingResult &&) = delete;
public:
FindExistingResult(ASTReader &Reader)
: Reader(Reader), New(nullptr), Existing(nullptr), AddResult(false),
AnonymousDeclNumber(0), TypedefNameForLinkage(nullptr) {}
FindExistingResult(ASTReader &Reader, NamedDecl *New, NamedDecl *Existing,
unsigned AnonymousDeclNumber,
IdentifierInfo *TypedefNameForLinkage)
: Reader(Reader), New(New), Existing(Existing), AddResult(true),
AnonymousDeclNumber(AnonymousDeclNumber),
TypedefNameForLinkage(TypedefNameForLinkage) {}
FindExistingResult(FindExistingResult &&Other)
: Reader(Other.Reader), New(Other.New), Existing(Other.Existing),
AddResult(Other.AddResult),
AnonymousDeclNumber(Other.AnonymousDeclNumber),
TypedefNameForLinkage(Other.TypedefNameForLinkage) {
Other.AddResult = false;
}
~FindExistingResult();
/// \brief Suppress the addition of this result into the known set of
/// names.
void suppress() { AddResult = false; }
operator NamedDecl*() const { return Existing; }
template<typename T>
operator T*() const { return dyn_cast_or_null<T>(Existing); }
};
static DeclContext *getPrimaryContextForMerging(ASTReader &Reader,
DeclContext *DC);
FindExistingResult findExisting(NamedDecl *D);
public:
ASTDeclReader(ASTReader &Reader, ASTRecordReader &Record,
ASTReader::RecordLocation Loc,
DeclID thisDeclID, SourceLocation ThisDeclLoc)
: Reader(Reader), Record(Record), Loc(Loc),
ThisDeclID(thisDeclID), ThisDeclLoc(ThisDeclLoc),
TypeIDForTypeDecl(0), NamedDeclForTagDecl(0),
TypedefNameForLinkage(nullptr), HasPendingBody(false),
IsDeclMarkedUsed(false) {}
template <typename T> static
void AddLazySpecializations(T *D,
SmallVectorImpl<serialization::DeclID>& IDs) {
if (IDs.empty())
return;
// FIXME: We should avoid this pattern of getting the ASTContext.
ASTContext &C = D->getASTContext();
auto *&LazySpecializations = D->getCommonPtr()->LazySpecializations;
if (auto &Old = LazySpecializations) {
IDs.insert(IDs.end(), Old + 1, Old + 1 + Old[0]);
std::sort(IDs.begin(), IDs.end());
IDs.erase(std::unique(IDs.begin(), IDs.end()), IDs.end());
}
auto *Result = new (C) serialization::DeclID[1 + IDs.size()];
*Result = IDs.size();
std::copy(IDs.begin(), IDs.end(), Result + 1);
LazySpecializations = Result;
}
template <typename DeclT>
static Decl *getMostRecentDeclImpl(Redeclarable<DeclT> *D);
static Decl *getMostRecentDeclImpl(...);
static Decl *getMostRecentDecl(Decl *D);
template <typename DeclT>
static void attachPreviousDeclImpl(ASTReader &Reader,
Redeclarable<DeclT> *D, Decl *Previous,
Decl *Canon);
static void attachPreviousDeclImpl(ASTReader &Reader, ...);
static void attachPreviousDecl(ASTReader &Reader, Decl *D, Decl *Previous,
Decl *Canon);
template <typename DeclT>
static void attachLatestDeclImpl(Redeclarable<DeclT> *D, Decl *Latest);
static void attachLatestDeclImpl(...);
static void attachLatestDecl(Decl *D, Decl *latest);
template <typename DeclT>
static void markIncompleteDeclChainImpl(Redeclarable<DeclT> *D);
static void markIncompleteDeclChainImpl(...);
/// \brief Determine whether this declaration has a pending body.
bool hasPendingBody() const { return HasPendingBody; }
void ReadFunctionDefinition(FunctionDecl *FD);
void Visit(Decl *D);
void UpdateDecl(Decl *D, llvm::SmallVectorImpl<serialization::DeclID>&);
static void setNextObjCCategory(ObjCCategoryDecl *Cat,
ObjCCategoryDecl *Next) {
Cat->NextClassCategory = Next;
}
void VisitDecl(Decl *D);
void VisitPragmaCommentDecl(PragmaCommentDecl *D);
void VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D);
void VisitTranslationUnitDecl(TranslationUnitDecl *TU);
void VisitNamedDecl(NamedDecl *ND);
void VisitLabelDecl(LabelDecl *LD);
void VisitNamespaceDecl(NamespaceDecl *D);
void VisitUsingDirectiveDecl(UsingDirectiveDecl *D);
void VisitNamespaceAliasDecl(NamespaceAliasDecl *D);
void VisitTypeDecl(TypeDecl *TD);
RedeclarableResult VisitTypedefNameDecl(TypedefNameDecl *TD);
void VisitTypedefDecl(TypedefDecl *TD);
void VisitTypeAliasDecl(TypeAliasDecl *TD);
void VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D);
RedeclarableResult VisitTagDecl(TagDecl *TD);
void VisitEnumDecl(EnumDecl *ED);
RedeclarableResult VisitRecordDeclImpl(RecordDecl *RD);
void VisitRecordDecl(RecordDecl *RD) { VisitRecordDeclImpl(RD); }
RedeclarableResult VisitCXXRecordDeclImpl(CXXRecordDecl *D);
void VisitCXXRecordDecl(CXXRecordDecl *D) { VisitCXXRecordDeclImpl(D); }
RedeclarableResult VisitClassTemplateSpecializationDeclImpl(
ClassTemplateSpecializationDecl *D);
void VisitClassTemplateSpecializationDecl(
ClassTemplateSpecializationDecl *D) {
VisitClassTemplateSpecializationDeclImpl(D);
}
void VisitClassTemplatePartialSpecializationDecl(
ClassTemplatePartialSpecializationDecl *D);
void VisitClassScopeFunctionSpecializationDecl(
ClassScopeFunctionSpecializationDecl *D);
RedeclarableResult
VisitVarTemplateSpecializationDeclImpl(VarTemplateSpecializationDecl *D);
void VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D) {
VisitVarTemplateSpecializationDeclImpl(D);
}
void VisitVarTemplatePartialSpecializationDecl(
VarTemplatePartialSpecializationDecl *D);
void VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D);
void VisitValueDecl(ValueDecl *VD);
void VisitEnumConstantDecl(EnumConstantDecl *ECD);
void VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D);
void VisitDeclaratorDecl(DeclaratorDecl *DD);
void VisitFunctionDecl(FunctionDecl *FD);
void VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *GD);
void VisitCXXMethodDecl(CXXMethodDecl *D);
void VisitCXXConstructorDecl(CXXConstructorDecl *D);
void VisitCXXDestructorDecl(CXXDestructorDecl *D);
void VisitCXXConversionDecl(CXXConversionDecl *D);
void VisitFieldDecl(FieldDecl *FD);
void VisitMSPropertyDecl(MSPropertyDecl *FD);
void VisitIndirectFieldDecl(IndirectFieldDecl *FD);
RedeclarableResult VisitVarDeclImpl(VarDecl *D);
void VisitVarDecl(VarDecl *VD) { VisitVarDeclImpl(VD); }
void VisitImplicitParamDecl(ImplicitParamDecl *PD);
void VisitParmVarDecl(ParmVarDecl *PD);
void VisitDecompositionDecl(DecompositionDecl *DD);
void VisitBindingDecl(BindingDecl *BD);
void VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D);
DeclID VisitTemplateDecl(TemplateDecl *D);
RedeclarableResult VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D);
void VisitClassTemplateDecl(ClassTemplateDecl *D);
void VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D);
void VisitVarTemplateDecl(VarTemplateDecl *D);
void VisitFunctionTemplateDecl(FunctionTemplateDecl *D);
void VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D);
void VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D);
void VisitUsingDecl(UsingDecl *D);
void VisitUsingPackDecl(UsingPackDecl *D);
void VisitUsingShadowDecl(UsingShadowDecl *D);
void VisitConstructorUsingShadowDecl(ConstructorUsingShadowDecl *D);
void VisitLinkageSpecDecl(LinkageSpecDecl *D);
void VisitExportDecl(ExportDecl *D);
void VisitFileScopeAsmDecl(FileScopeAsmDecl *AD);
void VisitImportDecl(ImportDecl *D);
void VisitAccessSpecDecl(AccessSpecDecl *D);
void VisitFriendDecl(FriendDecl *D);
void VisitFriendTemplateDecl(FriendTemplateDecl *D);
void VisitStaticAssertDecl(StaticAssertDecl *D);
void VisitBlockDecl(BlockDecl *BD);
void VisitCapturedDecl(CapturedDecl *CD);
void VisitEmptyDecl(EmptyDecl *D);
std::pair<uint64_t, uint64_t> VisitDeclContext(DeclContext *DC);
template<typename T>
RedeclarableResult VisitRedeclarable(Redeclarable<T> *D);
template<typename T>
void mergeRedeclarable(Redeclarable<T> *D, RedeclarableResult &Redecl,
DeclID TemplatePatternID = 0);
template<typename T>
void mergeRedeclarable(Redeclarable<T> *D, T *Existing,
RedeclarableResult &Redecl,
DeclID TemplatePatternID = 0);
template<typename T>
void mergeMergeable(Mergeable<T> *D);
void mergeTemplatePattern(RedeclarableTemplateDecl *D,
RedeclarableTemplateDecl *Existing,
DeclID DsID, bool IsKeyDecl);
ObjCTypeParamList *ReadObjCTypeParamList();
// FIXME: Reorder according to DeclNodes.td?
void VisitObjCMethodDecl(ObjCMethodDecl *D);
void VisitObjCTypeParamDecl(ObjCTypeParamDecl *D);
void VisitObjCContainerDecl(ObjCContainerDecl *D);
void VisitObjCInterfaceDecl(ObjCInterfaceDecl *D);
void VisitObjCIvarDecl(ObjCIvarDecl *D);
void VisitObjCProtocolDecl(ObjCProtocolDecl *D);
void VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D);
void VisitObjCCategoryDecl(ObjCCategoryDecl *D);
void VisitObjCImplDecl(ObjCImplDecl *D);
void VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D);
void VisitObjCImplementationDecl(ObjCImplementationDecl *D);
void VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D);
void VisitObjCPropertyDecl(ObjCPropertyDecl *D);
void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D);
void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D);
void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D);
void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D);
};
} // end namespace clang
namespace {
/// Iterator over the redeclarations of a declaration that have already
/// been merged into the same redeclaration chain.
template<typename DeclT>
class MergedRedeclIterator {
DeclT *Start, *Canonical, *Current;
public:
MergedRedeclIterator() : Current(nullptr) {}
MergedRedeclIterator(DeclT *Start)
: Start(Start), Canonical(nullptr), Current(Start) {}
DeclT *operator*() { return Current; }
MergedRedeclIterator &operator++() {
if (Current->isFirstDecl()) {
Canonical = Current;
Current = Current->getMostRecentDecl();
} else
Current = Current->getPreviousDecl();
// If we started in the merged portion, we'll reach our start position
// eventually. Otherwise, we'll never reach it, but the second declaration
// we reached was the canonical declaration, so stop when we see that one
// again.
if (Current == Start || Current == Canonical)
Current = nullptr;
return *this;
}
friend bool operator!=(const MergedRedeclIterator &A,
const MergedRedeclIterator &B) {
return A.Current != B.Current;
}
};
} // end anonymous namespace
template <typename DeclT>
static llvm::iterator_range<MergedRedeclIterator<DeclT>>
merged_redecls(DeclT *D) {
return llvm::make_range(MergedRedeclIterator<DeclT>(D),
MergedRedeclIterator<DeclT>());
}
uint64_t ASTDeclReader::GetCurrentCursorOffset() {
return Loc.F->DeclsCursor.GetCurrentBitNo() + Loc.F->GlobalBitOffset;
}
void ASTDeclReader::ReadFunctionDefinition(FunctionDecl *FD) {
if (Record.readInt())
Reader.BodySource[FD] = Loc.F->Kind == ModuleKind::MK_MainFile;
if (auto *CD = dyn_cast<CXXConstructorDecl>(FD)) {
CD->NumCtorInitializers = Record.readInt();
if (CD->NumCtorInitializers)
CD->CtorInitializers = ReadGlobalOffset();
}
// Store the offset of the body so we can lazily load it later.
Reader.PendingBodies[FD] = GetCurrentCursorOffset();
HasPendingBody = true;
}
void ASTDeclReader::Visit(Decl *D) {
DeclVisitor<ASTDeclReader, void>::Visit(D);
// At this point we have deserialized and merged the decl and it is safe to
// update its canonical decl to signal that the entire entity is used.
D->getCanonicalDecl()->Used |= IsDeclMarkedUsed;
IsDeclMarkedUsed = false;
if (DeclaratorDecl *DD = dyn_cast<DeclaratorDecl>(D)) {
if (DD->DeclInfo) {
DeclaratorDecl::ExtInfo *Info =
DD->DeclInfo.get<DeclaratorDecl::ExtInfo *>();
Info->TInfo = GetTypeSourceInfo();
}
else {
DD->DeclInfo = GetTypeSourceInfo();
}
}
if (TypeDecl *TD = dyn_cast<TypeDecl>(D)) {
// We have a fully initialized TypeDecl. Read its type now.
TD->setTypeForDecl(Reader.GetType(TypeIDForTypeDecl).getTypePtrOrNull());
// If this is a tag declaration with a typedef name for linkage, it's safe
// to load that typedef now.
if (NamedDeclForTagDecl)
cast<TagDecl>(D)->TypedefNameDeclOrQualifier =
cast<TypedefNameDecl>(Reader.GetDecl(NamedDeclForTagDecl));
} else if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D)) {
// if we have a fully initialized TypeDecl, we can safely read its type now.
ID->TypeForDecl = Reader.GetType(TypeIDForTypeDecl).getTypePtrOrNull();
} else if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
// FunctionDecl's body was written last after all other Stmts/Exprs.
// We only read it if FD doesn't already have a body (e.g., from another
// module).
// FIXME: Can we diagnose ODR violations somehow?
if (Record.readInt())
ReadFunctionDefinition(FD);
}
}
void ASTDeclReader::VisitDecl(Decl *D) {
if (D->isTemplateParameter() || D->isTemplateParameterPack() ||
isa<ParmVarDecl>(D)) {
// We don't want to deserialize the DeclContext of a template
// parameter or of a parameter of a function template immediately. These
// entities might be used in the formulation of its DeclContext (for
// example, a function parameter can be used in decltype() in trailing
// return type of the function). Use the translation unit DeclContext as a
// placeholder.
GlobalDeclID SemaDCIDForTemplateParmDecl = ReadDeclID();
GlobalDeclID LexicalDCIDForTemplateParmDecl = ReadDeclID();
if (!LexicalDCIDForTemplateParmDecl)
LexicalDCIDForTemplateParmDecl = SemaDCIDForTemplateParmDecl;
Reader.addPendingDeclContextInfo(D,
SemaDCIDForTemplateParmDecl,
LexicalDCIDForTemplateParmDecl);
D->setDeclContext(Reader.getContext().getTranslationUnitDecl());
} else {
DeclContext *SemaDC = ReadDeclAs<DeclContext>();
DeclContext *LexicalDC = ReadDeclAs<DeclContext>();
if (!LexicalDC)
LexicalDC = SemaDC;
DeclContext *MergedSemaDC = Reader.MergedDeclContexts.lookup(SemaDC);
// Avoid calling setLexicalDeclContext() directly because it uses
// Decl::getASTContext() internally which is unsafe during derialization.
D->setDeclContextsImpl(MergedSemaDC ? MergedSemaDC : SemaDC, LexicalDC,
Reader.getContext());
}
D->setLocation(ThisDeclLoc);
D->setInvalidDecl(Record.readInt());
if (Record.readInt()) { // hasAttrs
AttrVec Attrs;
Record.readAttributes(Attrs);
// Avoid calling setAttrs() directly because it uses Decl::getASTContext()
// internally which is unsafe during derialization.
D->setAttrsImpl(Attrs, Reader.getContext());
}
D->setImplicit(Record.readInt());
D->Used = Record.readInt();
IsDeclMarkedUsed |= D->Used;
D->setReferenced(Record.readInt());
D->setTopLevelDeclInObjCContainer(Record.readInt());
D->setAccess((AccessSpecifier)Record.readInt());
D->FromASTFile = true;
bool ModulePrivate = Record.readInt();
// Determine whether this declaration is part of a (sub)module. If so, it
// may not yet be visible.
if (unsigned SubmoduleID = readSubmoduleID()) {
// Store the owning submodule ID in the declaration.
D->setModuleOwnershipKind(
ModulePrivate ? Decl::ModuleOwnershipKind::ModulePrivate
: Decl::ModuleOwnershipKind::VisibleWhenImported);
D->setOwningModuleID(SubmoduleID);
if (ModulePrivate) {
// Module-private declarations are never visible, so there is no work to
// do.
} else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
// If local visibility is being tracked, this declaration will become
// hidden and visible as the owning module does.
} else if (Module *Owner = Reader.getSubmodule(SubmoduleID)) {
// Mark the declaration as visible when its owning module becomes visible.
if (Owner->NameVisibility == Module::AllVisible)
D->setVisibleDespiteOwningModule();
else
Reader.HiddenNamesMap[Owner].push_back(D);
}
} else if (ModulePrivate) {
D->setModuleOwnershipKind(Decl::ModuleOwnershipKind::ModulePrivate);
}
}
void ASTDeclReader::VisitPragmaCommentDecl(PragmaCommentDecl *D) {
VisitDecl(D);
D->setLocation(ReadSourceLocation());
D->CommentKind = (PragmaMSCommentKind)Record.readInt();
std::string Arg = ReadString();
memcpy(D->getTrailingObjects<char>(), Arg.data(), Arg.size());
D->getTrailingObjects<char>()[Arg.size()] = '\0';
}
void ASTDeclReader::VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D) {
VisitDecl(D);
D->setLocation(ReadSourceLocation());
std::string Name = ReadString();
memcpy(D->getTrailingObjects<char>(), Name.data(), Name.size());
D->getTrailingObjects<char>()[Name.size()] = '\0';
D->ValueStart = Name.size() + 1;
std::string Value = ReadString();
memcpy(D->getTrailingObjects<char>() + D->ValueStart, Value.data(),
Value.size());
D->getTrailingObjects<char>()[D->ValueStart + Value.size()] = '\0';
}
void ASTDeclReader::VisitTranslationUnitDecl(TranslationUnitDecl *TU) {
llvm_unreachable("Translation units are not serialized");
}
void ASTDeclReader::VisitNamedDecl(NamedDecl *ND) {
VisitDecl(ND);
ND->setDeclName(Record.readDeclarationName());
AnonymousDeclNumber = Record.readInt();
}
void ASTDeclReader::VisitTypeDecl(TypeDecl *TD) {
VisitNamedDecl(TD);
TD->setLocStart(ReadSourceLocation());
// Delay type reading until after we have fully initialized the decl.
TypeIDForTypeDecl = Record.getGlobalTypeID(Record.readInt());
}
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitTypedefNameDecl(TypedefNameDecl *TD) {
RedeclarableResult Redecl = VisitRedeclarable(TD);
VisitTypeDecl(TD);
TypeSourceInfo *TInfo = GetTypeSourceInfo();
if (Record.readInt()) { // isModed
QualType modedT = Record.readType();
TD->setModedTypeSourceInfo(TInfo, modedT);
} else
TD->setTypeSourceInfo(TInfo);
// Read and discard the declaration for which this is a typedef name for
// linkage, if it exists. We cannot rely on our type to pull in this decl,
// because it might have been merged with a type from another module and
// thus might not refer to our version of the declaration.
ReadDecl();
return Redecl;
}
void ASTDeclReader::VisitTypedefDecl(TypedefDecl *TD) {
RedeclarableResult Redecl = VisitTypedefNameDecl(TD);
mergeRedeclarable(TD, Redecl);
}
void ASTDeclReader::VisitTypeAliasDecl(TypeAliasDecl *TD) {
RedeclarableResult Redecl = VisitTypedefNameDecl(TD);
if (auto *Template = ReadDeclAs<TypeAliasTemplateDecl>())
// Merged when we merge the template.
TD->setDescribedAliasTemplate(Template);
else
mergeRedeclarable(TD, Redecl);
}
ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTagDecl(TagDecl *TD) {
RedeclarableResult Redecl = VisitRedeclarable(TD);
VisitTypeDecl(TD);
TD->IdentifierNamespace = Record.readInt();
TD->setTagKind((TagDecl::TagKind)Record.readInt());
if (!isa<CXXRecordDecl>(TD))
TD->setCompleteDefinition(Record.readInt());
TD->setEmbeddedInDeclarator(Record.readInt());
TD->setFreeStanding(Record.readInt());
TD->setCompleteDefinitionRequired(Record.readInt());
TD->setBraceRange(ReadSourceRange());
switch (Record.readInt()) {
case 0:
break;
case 1: { // ExtInfo
TagDecl::ExtInfo *Info = new (Reader.getContext()) TagDecl::ExtInfo();
ReadQualifierInfo(*Info);
TD->TypedefNameDeclOrQualifier = Info;
break;
}
case 2: // TypedefNameForAnonDecl
NamedDeclForTagDecl = ReadDeclID();
TypedefNameForLinkage = Record.getIdentifierInfo();
break;
default:
llvm_unreachable("unexpected tag info kind");
}
if (!isa<CXXRecordDecl>(TD))
mergeRedeclarable(TD, Redecl);
return Redecl;
}
void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) {
VisitTagDecl(ED);
if (TypeSourceInfo *TI = GetTypeSourceInfo())
ED->setIntegerTypeSourceInfo(TI);
else
ED->setIntegerType(Record.readType());
ED->setPromotionType(Record.readType());
ED->setNumPositiveBits(Record.readInt());
ED->setNumNegativeBits(Record.readInt());
ED->IsScoped = Record.readInt();
ED->IsScopedUsingClassTag = Record.readInt();
ED->IsFixed = Record.readInt();
// If this is a definition subject to the ODR, and we already have a
// definition, merge this one into it.
if (ED->IsCompleteDefinition &&
Reader.getContext().getLangOpts().Modules &&
Reader.getContext().getLangOpts().CPlusPlus) {
EnumDecl *&OldDef = Reader.EnumDefinitions[ED->getCanonicalDecl()];
if (!OldDef) {
// This is the first time we've seen an imported definition. Look for a
// local definition before deciding that we are the first definition.
for (auto *D : merged_redecls(ED->getCanonicalDecl())) {
if (!D->isFromASTFile() && D->isCompleteDefinition()) {
OldDef = D;
break;
}
}
}
if (OldDef) {
Reader.MergedDeclContexts.insert(std::make_pair(ED, OldDef));
ED->IsCompleteDefinition = false;
Reader.mergeDefinitionVisibility(OldDef, ED);
} else {
OldDef = ED;
}
}
if (EnumDecl *InstED = ReadDeclAs<EnumDecl>()) {
TemplateSpecializationKind TSK =
(TemplateSpecializationKind)Record.readInt();
SourceLocation POI = ReadSourceLocation();
ED->setInstantiationOfMemberEnum(Reader.getContext(), InstED, TSK);
ED->getMemberSpecializationInfo()->setPointOfInstantiation(POI);
}
}
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) {
RedeclarableResult Redecl = VisitTagDecl(RD);
RD->setHasFlexibleArrayMember(Record.readInt());
RD->setAnonymousStructOrUnion(Record.readInt());
RD->setHasObjectMember(Record.readInt());
RD->setHasVolatileMember(Record.readInt());
return Redecl;
}
void ASTDeclReader::VisitValueDecl(ValueDecl *VD) {
VisitNamedDecl(VD);
VD->setType(Record.readType());
}
void ASTDeclReader::VisitEnumConstantDecl(EnumConstantDecl *ECD) {
VisitValueDecl(ECD);
if (Record.readInt())
ECD->setInitExpr(Record.readExpr());
ECD->setInitVal(Record.readAPSInt());
mergeMergeable(ECD);
}
void ASTDeclReader::VisitDeclaratorDecl(DeclaratorDecl *DD) {
VisitValueDecl(DD);
DD->setInnerLocStart(ReadSourceLocation());
if (Record.readInt()) { // hasExtInfo
DeclaratorDecl::ExtInfo *Info
= new (Reader.getContext()) DeclaratorDecl::ExtInfo();
ReadQualifierInfo(*Info);
DD->DeclInfo = Info;
}
}
void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
RedeclarableResult Redecl = VisitRedeclarable(FD);
VisitDeclaratorDecl(FD);
ReadDeclarationNameLoc(FD->DNLoc, FD->getDeclName());
FD->IdentifierNamespace = Record.readInt();
// FunctionDecl's body is handled last at ASTDeclReader::Visit,
// after everything else is read.
FD->SClass = (StorageClass)Record.readInt();
FD->IsInline = Record.readInt();
FD->IsInlineSpecified = Record.readInt();
FD->IsExplicitSpecified = Record.readInt();
FD->IsVirtualAsWritten = Record.readInt();
FD->IsPure = Record.readInt();
FD->HasInheritedPrototype = Record.readInt();
FD->HasWrittenPrototype = Record.readInt();
FD->IsDeleted = Record.readInt();
FD->IsTrivial = Record.readInt();
FD->IsDefaulted = Record.readInt();
FD->IsExplicitlyDefaulted = Record.readInt();
FD->HasImplicitReturnZero = Record.readInt();
FD->IsConstexpr = Record.readInt();
FD->UsesSEHTry = Record.readInt();
FD->HasSkippedBody = Record.readInt();
FD->IsLateTemplateParsed = Record.readInt();
FD->setCachedLinkage(Linkage(Record.readInt()));
FD->EndRangeLoc = ReadSourceLocation();
switch ((FunctionDecl::TemplatedKind)Record.readInt()) {
case FunctionDecl::TK_NonTemplate:
mergeRedeclarable(FD, Redecl);
break;
case FunctionDecl::TK_FunctionTemplate:
// Merged when we merge the template.
FD->setDescribedFunctionTemplate(ReadDeclAs<FunctionTemplateDecl>());
break;
case FunctionDecl::TK_MemberSpecialization: {
FunctionDecl *InstFD = ReadDeclAs<FunctionDecl>();
TemplateSpecializationKind TSK =
(TemplateSpecializationKind)Record.readInt();
SourceLocation POI = ReadSourceLocation();
FD->setInstantiationOfMemberFunction(Reader.getContext(), InstFD, TSK);
FD->getMemberSpecializationInfo()->setPointOfInstantiation(POI);
mergeRedeclarable(FD, Redecl);
break;
}
case FunctionDecl::TK_FunctionTemplateSpecialization: {
FunctionTemplateDecl *Template = ReadDeclAs<FunctionTemplateDecl>();
TemplateSpecializationKind TSK =
(TemplateSpecializationKind)Record.readInt();
// Template arguments.
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true);
// Template args as written.
SmallVector<TemplateArgumentLoc, 8> TemplArgLocs;
SourceLocation LAngleLoc, RAngleLoc;
bool HasTemplateArgumentsAsWritten = Record.readInt();
if (HasTemplateArgumentsAsWritten) {
unsigned NumTemplateArgLocs = Record.readInt();
TemplArgLocs.reserve(NumTemplateArgLocs);
for (unsigned i=0; i != NumTemplateArgLocs; ++i)
TemplArgLocs.push_back(Record.readTemplateArgumentLoc());
LAngleLoc = ReadSourceLocation();
RAngleLoc = ReadSourceLocation();
}
SourceLocation POI = ReadSourceLocation();
ASTContext &C = Reader.getContext();
TemplateArgumentList *TemplArgList
= TemplateArgumentList::CreateCopy(C, TemplArgs);
TemplateArgumentListInfo TemplArgsInfo(LAngleLoc, RAngleLoc);
for (unsigned i=0, e = TemplArgLocs.size(); i != e; ++i)
TemplArgsInfo.addArgument(TemplArgLocs[i]);
FunctionTemplateSpecializationInfo *FTInfo
= FunctionTemplateSpecializationInfo::Create(C, FD, Template, TSK,
TemplArgList,
HasTemplateArgumentsAsWritten ? &TemplArgsInfo
: nullptr,
POI);
FD->TemplateOrSpecialization = FTInfo;
if (FD->isCanonicalDecl()) { // if canonical add to template's set.
// The template that contains the specializations set. It's not safe to
// use getCanonicalDecl on Template since it may still be initializing.
FunctionTemplateDecl *CanonTemplate = ReadDeclAs<FunctionTemplateDecl>();
// Get the InsertPos by FindNodeOrInsertPos() instead of calling
// InsertNode(FTInfo) directly to avoid the getASTContext() call in
// FunctionTemplateSpecializationInfo's Profile().
// We avoid getASTContext because a decl in the parent hierarchy may
// be initializing.
llvm::FoldingSetNodeID ID;
FunctionTemplateSpecializationInfo::Profile(ID, TemplArgs, C);
void *InsertPos = nullptr;
FunctionTemplateDecl::Common *CommonPtr = CanonTemplate->getCommonPtr();
FunctionTemplateSpecializationInfo *ExistingInfo =
CommonPtr->Specializations.FindNodeOrInsertPos(ID, InsertPos);
if (InsertPos)
CommonPtr->Specializations.InsertNode(FTInfo, InsertPos);
else {
assert(Reader.getContext().getLangOpts().Modules &&
"already deserialized this template specialization");
mergeRedeclarable(FD, ExistingInfo->Function, Redecl);
}
}
break;
}
case FunctionDecl::TK_DependentFunctionTemplateSpecialization: {
// Templates.
UnresolvedSet<8> TemplDecls;
unsigned NumTemplates = Record.readInt();
while (NumTemplates--)
TemplDecls.addDecl(ReadDeclAs<NamedDecl>());
// Templates args.
TemplateArgumentListInfo TemplArgs;
unsigned NumArgs = Record.readInt();
while (NumArgs--)
TemplArgs.addArgument(Record.readTemplateArgumentLoc());
TemplArgs.setLAngleLoc(ReadSourceLocation());
TemplArgs.setRAngleLoc(ReadSourceLocation());
FD->setDependentTemplateSpecialization(Reader.getContext(),
TemplDecls, TemplArgs);
// These are not merged; we don't need to merge redeclarations of dependent
// template friends.
break;
}
}
// Read in the parameters.
unsigned NumParams = Record.readInt();
SmallVector<ParmVarDecl *, 16> Params;
Params.reserve(NumParams);
for (unsigned I = 0; I != NumParams; ++I)
Params.push_back(ReadDeclAs<ParmVarDecl>());
FD->setParams(Reader.getContext(), Params);
}
void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) {
VisitNamedDecl(MD);
if (Record.readInt()) {
// Load the body on-demand. Most clients won't care, because method
// definitions rarely show up in headers.
Reader.PendingBodies[MD] = GetCurrentCursorOffset();
HasPendingBody = true;
MD->setSelfDecl(ReadDeclAs<ImplicitParamDecl>());
MD->setCmdDecl(ReadDeclAs<ImplicitParamDecl>());
}
MD->setInstanceMethod(Record.readInt());
MD->setVariadic(Record.readInt());
MD->setPropertyAccessor(Record.readInt());
MD->setDefined(Record.readInt());
MD->IsOverriding = Record.readInt();
MD->HasSkippedBody = Record.readInt();
MD->IsRedeclaration = Record.readInt();
MD->HasRedeclaration = Record.readInt();
if (MD->HasRedeclaration)
Reader.getContext().setObjCMethodRedeclaration(MD,
ReadDeclAs<ObjCMethodDecl>());
MD->setDeclImplementation((ObjCMethodDecl::ImplementationControl)Record.readInt());
MD->setObjCDeclQualifier((Decl::ObjCDeclQualifier)Record.readInt());
MD->SetRelatedResultType(Record.readInt());
MD->setReturnType(Record.readType());
MD->setReturnTypeSourceInfo(GetTypeSourceInfo());
MD->DeclEndLoc = ReadSourceLocation();
unsigned NumParams = Record.readInt();
SmallVector<ParmVarDecl *, 16> Params;
Params.reserve(NumParams);
for (unsigned I = 0; I != NumParams; ++I)
Params.push_back(ReadDeclAs<ParmVarDecl>());
MD->SelLocsKind = Record.readInt();
unsigned NumStoredSelLocs = Record.readInt();
SmallVector<SourceLocation, 16> SelLocs;
SelLocs.reserve(NumStoredSelLocs);
for (unsigned i = 0; i != NumStoredSelLocs; ++i)
SelLocs.push_back(ReadSourceLocation());
MD->setParamsAndSelLocs(Reader.getContext(), Params, SelLocs);
}
void ASTDeclReader::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) {
VisitTypedefNameDecl(D);
D->Variance = Record.readInt();
D->Index = Record.readInt();
D->VarianceLoc = ReadSourceLocation();
D->ColonLoc = ReadSourceLocation();
}
void ASTDeclReader::VisitObjCContainerDecl(ObjCContainerDecl *CD) {
VisitNamedDecl(CD);
CD->setAtStartLoc(ReadSourceLocation());
CD->setAtEndRange(ReadSourceRange());
}
ObjCTypeParamList *ASTDeclReader::ReadObjCTypeParamList() {
unsigned numParams = Record.readInt();
if (numParams == 0)
return nullptr;
SmallVector<ObjCTypeParamDecl *, 4> typeParams;
typeParams.reserve(numParams);
for (unsigned i = 0; i != numParams; ++i) {
auto typeParam = ReadDeclAs<ObjCTypeParamDecl>();
if (!typeParam)
return nullptr;
typeParams.push_back(typeParam);
}
SourceLocation lAngleLoc = ReadSourceLocation();
SourceLocation rAngleLoc = ReadSourceLocation();
return ObjCTypeParamList::create(Reader.getContext(), lAngleLoc,
typeParams, rAngleLoc);
}
void ASTDeclReader::ReadObjCDefinitionData(
struct ObjCInterfaceDecl::DefinitionData &Data) {
// Read the superclass.
Data.SuperClassTInfo = GetTypeSourceInfo();
Data.EndLoc = ReadSourceLocation();
Data.HasDesignatedInitializers = Record.readInt();
// Read the directly referenced protocols and their SourceLocations.
unsigned NumProtocols = Record.readInt();
SmallVector<ObjCProtocolDecl *, 16> Protocols;
Protocols.reserve(NumProtocols);
for (unsigned I = 0; I != NumProtocols; ++I)
Protocols.push_back(ReadDeclAs<ObjCProtocolDecl>());
SmallVector<SourceLocation, 16> ProtoLocs;
ProtoLocs.reserve(NumProtocols);
for (unsigned I = 0; I != NumProtocols; ++I)
ProtoLocs.push_back(ReadSourceLocation());
Data.ReferencedProtocols.set(Protocols.data(), NumProtocols, ProtoLocs.data(),
Reader.getContext());
// Read the transitive closure of protocols referenced by this class.
NumProtocols = Record.readInt();
Protocols.clear();
Protocols.reserve(NumProtocols);
for (unsigned I = 0; I != NumProtocols; ++I)
Protocols.push_back(ReadDeclAs<ObjCProtocolDecl>());
Data.AllReferencedProtocols.set(Protocols.data(), NumProtocols,
Reader.getContext());
}
void ASTDeclReader::MergeDefinitionData(ObjCInterfaceDecl *D,
struct ObjCInterfaceDecl::DefinitionData &&NewDD) {
// FIXME: odr checking?
}
void ASTDeclReader::VisitObjCInterfaceDecl(ObjCInterfaceDecl *ID) {
RedeclarableResult Redecl = VisitRedeclarable(ID);
VisitObjCContainerDecl(ID);
TypeIDForTypeDecl = Record.getGlobalTypeID(Record.readInt());
mergeRedeclarable(ID, Redecl);
ID->TypeParamList = ReadObjCTypeParamList();
if (Record.readInt()) {
// Read the definition.
ID->allocateDefinitionData();
ReadObjCDefinitionData(ID->data());
ObjCInterfaceDecl *Canon = ID->getCanonicalDecl();
if (Canon->Data.getPointer()) {
// If we already have a definition, keep the definition invariant and
// merge the data.
MergeDefinitionData(Canon, std::move(ID->data()));
ID->Data = Canon->Data;
} else {
// Set the definition data of the canonical declaration, so other
// redeclarations will see it.
ID->getCanonicalDecl()->Data = ID->Data;
// We will rebuild this list lazily.
ID->setIvarList(nullptr);
}
// Note that we have deserialized a definition.
Reader.PendingDefinitions.insert(ID);
// Note that we've loaded this Objective-C class.
Reader.ObjCClassesLoaded.push_back(ID);
} else {
ID->Data = ID->getCanonicalDecl()->Data;
}
}
void ASTDeclReader::VisitObjCIvarDecl(ObjCIvarDecl *IVD) {
VisitFieldDecl(IVD);
IVD->setAccessControl((ObjCIvarDecl::AccessControl)Record.readInt());
// This field will be built lazily.
IVD->setNextIvar(nullptr);
bool synth = Record.readInt();
IVD->setSynthesize(synth);
}
void ASTDeclReader::ReadObjCDefinitionData(
struct ObjCProtocolDecl::DefinitionData &Data) {
unsigned NumProtoRefs = Record.readInt();
SmallVector<ObjCProtocolDecl *, 16> ProtoRefs;
ProtoRefs.reserve(NumProtoRefs);
for (unsigned I = 0; I != NumProtoRefs; ++I)
ProtoRefs.push_back(ReadDeclAs<ObjCProtocolDecl>());
SmallVector<SourceLocation, 16> ProtoLocs;
ProtoLocs.reserve(NumProtoRefs);
for (unsigned I = 0; I != NumProtoRefs; ++I)
ProtoLocs.push_back(ReadSourceLocation());
Data.ReferencedProtocols.set(ProtoRefs.data(), NumProtoRefs,
ProtoLocs.data(), Reader.getContext());
}
void ASTDeclReader::MergeDefinitionData(ObjCProtocolDecl *D,
struct ObjCProtocolDecl::DefinitionData &&NewDD) {
// FIXME: odr checking?
}
void ASTDeclReader::VisitObjCProtocolDecl(ObjCProtocolDecl *PD) {
RedeclarableResult Redecl = VisitRedeclarable(PD);
VisitObjCContainerDecl(PD);
mergeRedeclarable(PD, Redecl);
if (Record.readInt()) {
// Read the definition.
PD->allocateDefinitionData();
ReadObjCDefinitionData(PD->data());
ObjCProtocolDecl *Canon = PD->getCanonicalDecl();
if (Canon->Data.getPointer()) {
// If we already have a definition, keep the definition invariant and
// merge the data.
MergeDefinitionData(Canon, std::move(PD->data()));
PD->Data = Canon->Data;
} else {
// Set the definition data of the canonical declaration, so other
// redeclarations will see it.
PD->getCanonicalDecl()->Data = PD->Data;
}
// Note that we have deserialized a definition.
Reader.PendingDefinitions.insert(PD);
} else {
PD->Data = PD->getCanonicalDecl()->Data;
}
}
void ASTDeclReader::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *FD) {
VisitFieldDecl(FD);
}
void ASTDeclReader::VisitObjCCategoryDecl(ObjCCategoryDecl *CD) {
VisitObjCContainerDecl(CD);
CD->setCategoryNameLoc(ReadSourceLocation());
CD->setIvarLBraceLoc(ReadSourceLocation());
CD->setIvarRBraceLoc(ReadSourceLocation());
// Note that this category has been deserialized. We do this before
// deserializing the interface declaration, so that it will consider this
/// category.
Reader.CategoriesDeserialized.insert(CD);
CD->ClassInterface = ReadDeclAs<ObjCInterfaceDecl>();
CD->TypeParamList = ReadObjCTypeParamList();
unsigned NumProtoRefs = Record.readInt();
SmallVector<ObjCProtocolDecl *, 16> ProtoRefs;
ProtoRefs.reserve(NumProtoRefs);
for (unsigned I = 0; I != NumProtoRefs; ++I)
ProtoRefs.push_back(ReadDeclAs<ObjCProtocolDecl>());
SmallVector<SourceLocation, 16> ProtoLocs;
ProtoLocs.reserve(NumProtoRefs);
for (unsigned I = 0; I != NumProtoRefs; ++I)
ProtoLocs.push_back(ReadSourceLocation());
CD->setProtocolList(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(),
Reader.getContext());
}
void ASTDeclReader::VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *CAD) {
VisitNamedDecl(CAD);
CAD->setClassInterface(ReadDeclAs<ObjCInterfaceDecl>());
}
void ASTDeclReader::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
VisitNamedDecl(D);
D->setAtLoc(ReadSourceLocation());
D->setLParenLoc(ReadSourceLocation());
QualType T = Record.readType();
TypeSourceInfo *TSI = GetTypeSourceInfo();
D->setType(T, TSI);
D->setPropertyAttributes(
(ObjCPropertyDecl::PropertyAttributeKind)Record.readInt());
D->setPropertyAttributesAsWritten(
(ObjCPropertyDecl::PropertyAttributeKind)Record.readInt());
D->setPropertyImplementation(
(ObjCPropertyDecl::PropertyControl)Record.readInt());
DeclarationName GetterName = Record.readDeclarationName();
SourceLocation GetterLoc = ReadSourceLocation();
D->setGetterName(GetterName.getObjCSelector(), GetterLoc);
DeclarationName SetterName = Record.readDeclarationName();
SourceLocation SetterLoc = ReadSourceLocation();
D->setSetterName(SetterName.getObjCSelector(), SetterLoc);
D->setGetterMethodDecl(ReadDeclAs<ObjCMethodDecl>());
D->setSetterMethodDecl(ReadDeclAs<ObjCMethodDecl>());
D->setPropertyIvarDecl(ReadDeclAs<ObjCIvarDecl>());
}
void ASTDeclReader::VisitObjCImplDecl(ObjCImplDecl *D) {
VisitObjCContainerDecl(D);
D->setClassInterface(ReadDeclAs<ObjCInterfaceDecl>());
}
void ASTDeclReader::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D) {
VisitObjCImplDecl(D);
D->CategoryNameLoc = ReadSourceLocation();
}
void ASTDeclReader::VisitObjCImplementationDecl(ObjCImplementationDecl *D) {
VisitObjCImplDecl(D);
D->setSuperClass(ReadDeclAs<ObjCInterfaceDecl>());
D->SuperLoc = ReadSourceLocation();
D->setIvarLBraceLoc(ReadSourceLocation());
D->setIvarRBraceLoc(ReadSourceLocation());
D->setHasNonZeroConstructors(Record.readInt());
D->setHasDestructors(Record.readInt());
D->NumIvarInitializers = Record.readInt();
if (D->NumIvarInitializers)
D->IvarInitializers = ReadGlobalOffset();
}
void ASTDeclReader::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) {
VisitDecl(D);
D->setAtLoc(ReadSourceLocation());
D->setPropertyDecl(ReadDeclAs<ObjCPropertyDecl>());
D->PropertyIvarDecl = ReadDeclAs<ObjCIvarDecl>();
D->IvarLoc = ReadSourceLocation();
D->setGetterCXXConstructor(Record.readExpr());
D->setSetterCXXAssignment(Record.readExpr());
}
void ASTDeclReader::VisitFieldDecl(FieldDecl *FD) {
VisitDeclaratorDecl(FD);
FD->Mutable = Record.readInt();
if (int BitWidthOrInitializer = Record.readInt()) {
FD->InitStorage.setInt(
static_cast<FieldDecl::InitStorageKind>(BitWidthOrInitializer - 1));
if (FD->InitStorage.getInt() == FieldDecl::ISK_CapturedVLAType) {
// Read captured variable length array.
FD->InitStorage.setPointer(Record.readType().getAsOpaquePtr());
} else {
FD->InitStorage.setPointer(Record.readExpr());
}
}
if (!FD->getDeclName()) {
if (FieldDecl *Tmpl = ReadDeclAs<FieldDecl>())
Reader.getContext().setInstantiatedFromUnnamedFieldDecl(FD, Tmpl);
}
mergeMergeable(FD);
}
void ASTDeclReader::VisitMSPropertyDecl(MSPropertyDecl *PD) {
VisitDeclaratorDecl(PD);
PD->GetterId = Record.getIdentifierInfo();
PD->SetterId = Record.getIdentifierInfo();
}
void ASTDeclReader::VisitIndirectFieldDecl(IndirectFieldDecl *FD) {
VisitValueDecl(FD);
FD->ChainingSize = Record.readInt();
assert(FD->ChainingSize >= 2 && "Anonymous chaining must be >= 2");
FD->Chaining = new (Reader.getContext())NamedDecl*[FD->ChainingSize];
for (unsigned I = 0; I != FD->ChainingSize; ++I)
FD->Chaining[I] = ReadDeclAs<NamedDecl>();
mergeMergeable(FD);
}
ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) {
RedeclarableResult Redecl = VisitRedeclarable(VD);
VisitDeclaratorDecl(VD);
VD->VarDeclBits.SClass = (StorageClass)Record.readInt();
VD->VarDeclBits.TSCSpec = Record.readInt();
VD->VarDeclBits.InitStyle = Record.readInt();
if (!isa<ParmVarDecl>(VD)) {
VD->NonParmVarDeclBits.IsThisDeclarationADemotedDefinition =
Record.readInt();
VD->NonParmVarDeclBits.ExceptionVar = Record.readInt();
VD->NonParmVarDeclBits.NRVOVariable = Record.readInt();
VD->NonParmVarDeclBits.CXXForRangeDecl = Record.readInt();
VD->NonParmVarDeclBits.ARCPseudoStrong = Record.readInt();
VD->NonParmVarDeclBits.IsInline = Record.readInt();
VD->NonParmVarDeclBits.IsInlineSpecified = Record.readInt();
VD->NonParmVarDeclBits.IsConstexpr = Record.readInt();
VD->NonParmVarDeclBits.IsInitCapture = Record.readInt();
VD->NonParmVarDeclBits.PreviousDeclInSameBlockScope = Record.readInt();
VD->NonParmVarDeclBits.ImplicitParamKind = Record.readInt();
}
Linkage VarLinkage = Linkage(Record.readInt());
VD->setCachedLinkage(VarLinkage);
// Reconstruct the one piece of the IdentifierNamespace that we need.
if (VD->getStorageClass() == SC_Extern && VarLinkage != NoLinkage &&
VD->getLexicalDeclContext()->isFunctionOrMethod())
VD->setLocalExternDecl();
if (uint64_t Val = Record.readInt()) {
VD->setInit(Record.readExpr());
if (Val > 1) { // IsInitKnownICE = 1, IsInitNotICE = 2, IsInitICE = 3
EvaluatedStmt *Eval = VD->ensureEvaluatedStmt();
Eval->CheckedICE = true;
Eval->IsICE = Val == 3;
}
}
enum VarKind {
VarNotTemplate = 0, VarTemplate, StaticDataMemberSpecialization
};
switch ((VarKind)Record.readInt()) {
case VarNotTemplate:
// Only true variables (not parameters or implicit parameters) can be
// merged; the other kinds are not really redeclarable at all.
if (!isa<ParmVarDecl>(VD) && !isa<ImplicitParamDecl>(VD) &&
!isa<VarTemplateSpecializationDecl>(VD))
mergeRedeclarable(VD, Redecl);
break;
case VarTemplate:
// Merged when we merge the template.
VD->setDescribedVarTemplate(ReadDeclAs<VarTemplateDecl>());
break;
case StaticDataMemberSpecialization: { // HasMemberSpecializationInfo.
VarDecl *Tmpl = ReadDeclAs<VarDecl>();
TemplateSpecializationKind TSK =
(TemplateSpecializationKind)Record.readInt();
SourceLocation POI = ReadSourceLocation();
Reader.getContext().setInstantiatedFromStaticDataMember(VD, Tmpl, TSK,POI);
mergeRedeclarable(VD, Redecl);
break;
}
}
return Redecl;
}
void ASTDeclReader::VisitImplicitParamDecl(ImplicitParamDecl *PD) {
VisitVarDecl(PD);
}
void ASTDeclReader::VisitParmVarDecl(ParmVarDecl *PD) {
VisitVarDecl(PD);
unsigned isObjCMethodParam = Record.readInt();
unsigned scopeDepth = Record.readInt();
unsigned scopeIndex = Record.readInt();
unsigned declQualifier = Record.readInt();
if (isObjCMethodParam) {
assert(scopeDepth == 0);
PD->setObjCMethodScopeInfo(scopeIndex);
PD->ParmVarDeclBits.ScopeDepthOrObjCQuals = declQualifier;
} else {
PD->setScopeInfo(scopeDepth, scopeIndex);
}
PD->ParmVarDeclBits.IsKNRPromoted = Record.readInt();
PD->ParmVarDeclBits.HasInheritedDefaultArg = Record.readInt();
if (Record.readInt()) // hasUninstantiatedDefaultArg.
PD->setUninstantiatedDefaultArg(Record.readExpr());
// FIXME: If this is a redeclaration of a function from another module, handle
// inheritance of default arguments.
}
void ASTDeclReader::VisitDecompositionDecl(DecompositionDecl *DD) {
VisitVarDecl(DD);
BindingDecl **BDs = DD->getTrailingObjects<BindingDecl*>();
for (unsigned I = 0; I != DD->NumBindings; ++I)
BDs[I] = ReadDeclAs<BindingDecl>();
}
void ASTDeclReader::VisitBindingDecl(BindingDecl *BD) {
VisitValueDecl(BD);
BD->Binding = Record.readExpr();
}
void ASTDeclReader::VisitFileScopeAsmDecl(FileScopeAsmDecl *AD) {
VisitDecl(AD);
AD->setAsmString(cast<StringLiteral>(Record.readExpr()));
AD->setRParenLoc(ReadSourceLocation());
}
void ASTDeclReader::VisitBlockDecl(BlockDecl *BD) {
VisitDecl(BD);
BD->setBody(cast_or_null<CompoundStmt>(Record.readStmt()));
BD->setSignatureAsWritten(GetTypeSourceInfo());
unsigned NumParams = Record.readInt();
SmallVector<ParmVarDecl *, 16> Params;
Params.reserve(NumParams);
for (unsigned I = 0; I != NumParams; ++I)
Params.push_back(ReadDeclAs<ParmVarDecl>());
BD->setParams(Params);
BD->setIsVariadic(Record.readInt());
BD->setBlockMissingReturnType(Record.readInt());
BD->setIsConversionFromLambda(Record.readInt());
bool capturesCXXThis = Record.readInt();
unsigned numCaptures = Record.readInt();
SmallVector<BlockDecl::Capture, 16> captures;
captures.reserve(numCaptures);
for (unsigned i = 0; i != numCaptures; ++i) {
VarDecl *decl = ReadDeclAs<VarDecl>();
unsigned flags = Record.readInt();
bool byRef = (flags & 1);
bool nested = (flags & 2);
Expr *copyExpr = ((flags & 4) ? Record.readExpr() : nullptr);
captures.push_back(BlockDecl::Capture(decl, byRef, nested, copyExpr));
}
BD->setCaptures(Reader.getContext(), captures, capturesCXXThis);
}
void ASTDeclReader::VisitCapturedDecl(CapturedDecl *CD) {
VisitDecl(CD);
unsigned ContextParamPos = Record.readInt();
CD->setNothrow(Record.readInt() != 0);
// Body is set by VisitCapturedStmt.
for (unsigned I = 0; I < CD->NumParams; ++I) {
if (I != ContextParamPos)
CD->setParam(I, ReadDeclAs<ImplicitParamDecl>());
else
CD->setContextParam(I, ReadDeclAs<ImplicitParamDecl>());
}
}
void ASTDeclReader::VisitLinkageSpecDecl(LinkageSpecDecl *D) {
VisitDecl(D);
D->setLanguage((LinkageSpecDecl::LanguageIDs)Record.readInt());
D->setExternLoc(ReadSourceLocation());
D->setRBraceLoc(ReadSourceLocation());
}
void ASTDeclReader::VisitExportDecl(ExportDecl *D) {
VisitDecl(D);
D->RBraceLoc = ReadSourceLocation();
}
void ASTDeclReader::VisitLabelDecl(LabelDecl *D) {
VisitNamedDecl(D);
D->setLocStart(ReadSourceLocation());
}
void ASTDeclReader::VisitNamespaceDecl(NamespaceDecl *D) {
RedeclarableResult Redecl = VisitRedeclarable(D);
VisitNamedDecl(D);
D->setInline(Record.readInt());
D->LocStart = ReadSourceLocation();
D->RBraceLoc = ReadSourceLocation();
// Defer loading the anonymous namespace until we've finished merging
// this namespace; loading it might load a later declaration of the
// same namespace, and we have an invariant that older declarations
// get merged before newer ones try to merge.
GlobalDeclID AnonNamespace = 0;
if (Redecl.getFirstID() == ThisDeclID) {
AnonNamespace = ReadDeclID();
} else {
// Link this namespace back to the first declaration, which has already
// been deserialized.
D->AnonOrFirstNamespaceAndInline.setPointer(D->getFirstDecl());
}
mergeRedeclarable(D, Redecl);
if (AnonNamespace) {
// Each module has its own anonymous namespace, which is disjoint from
// any other module's anonymous namespaces, so don't attach the anonymous
// namespace at all.
NamespaceDecl *Anon = cast<NamespaceDecl>(Reader.GetDecl(AnonNamespace));
if (!Record.isModule())
D->setAnonymousNamespace(Anon);
}
}
void ASTDeclReader::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) {
RedeclarableResult Redecl = VisitRedeclarable(D);
VisitNamedDecl(D);
D->NamespaceLoc = ReadSourceLocation();
D->IdentLoc = ReadSourceLocation();
D->QualifierLoc = Record.readNestedNameSpecifierLoc();
D->Namespace = ReadDeclAs<NamedDecl>();
mergeRedeclarable(D, Redecl);
}
void ASTDeclReader::VisitUsingDecl(UsingDecl *D) {
VisitNamedDecl(D);
D->setUsingLoc(ReadSourceLocation());
D->QualifierLoc = Record.readNestedNameSpecifierLoc();
ReadDeclarationNameLoc(D->DNLoc, D->getDeclName());
D->FirstUsingShadow.setPointer(ReadDeclAs<UsingShadowDecl>());
D->setTypename(Record.readInt());
if (NamedDecl *Pattern = ReadDeclAs<NamedDecl>())
Reader.getContext().setInstantiatedFromUsingDecl(D, Pattern);
mergeMergeable(D);
}
void ASTDeclReader::VisitUsingPackDecl(UsingPackDecl *D) {
VisitNamedDecl(D);
D->InstantiatedFrom = ReadDeclAs<NamedDecl>();
NamedDecl **Expansions = D->getTrailingObjects<NamedDecl*>();
for (unsigned I = 0; I != D->NumExpansions; ++I)
Expansions[I] = ReadDeclAs<NamedDecl>();
mergeMergeable(D);
}
void ASTDeclReader::VisitUsingShadowDecl(UsingShadowDecl *D) {
RedeclarableResult Redecl = VisitRedeclarable(D);
VisitNamedDecl(D);
D->setTargetDecl(ReadDeclAs<NamedDecl>());
D->UsingOrNextShadow = ReadDeclAs<NamedDecl>();
UsingShadowDecl *Pattern = ReadDeclAs<UsingShadowDecl>();
if (Pattern)
Reader.getContext().setInstantiatedFromUsingShadowDecl(D, Pattern);
mergeRedeclarable(D, Redecl);
}
void ASTDeclReader::VisitConstructorUsingShadowDecl(
ConstructorUsingShadowDecl *D) {
VisitUsingShadowDecl(D);
D->NominatedBaseClassShadowDecl = ReadDeclAs<ConstructorUsingShadowDecl>();
D->ConstructedBaseClassShadowDecl = ReadDeclAs<ConstructorUsingShadowDecl>();
D->IsVirtual = Record.readInt();
}
void ASTDeclReader::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) {
VisitNamedDecl(D);
D->UsingLoc = ReadSourceLocation();
D->NamespaceLoc = ReadSourceLocation();
D->QualifierLoc = Record.readNestedNameSpecifierLoc();
D->NominatedNamespace = ReadDeclAs<NamedDecl>();
D->CommonAncestor = ReadDeclAs<DeclContext>();
}
void ASTDeclReader::VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D) {
VisitValueDecl(D);
D->setUsingLoc(ReadSourceLocation());
D->QualifierLoc = Record.readNestedNameSpecifierLoc();
ReadDeclarationNameLoc(D->DNLoc, D->getDeclName());
D->EllipsisLoc = ReadSourceLocation();
mergeMergeable(D);
}
void ASTDeclReader::VisitUnresolvedUsingTypenameDecl(
UnresolvedUsingTypenameDecl *D) {
VisitTypeDecl(D);
D->TypenameLocation = ReadSourceLocation();
D->QualifierLoc = Record.readNestedNameSpecifierLoc();
D->EllipsisLoc = ReadSourceLocation();
mergeMergeable(D);
}
void ASTDeclReader::ReadCXXDefinitionData(
struct CXXRecordDecl::DefinitionData &Data, const CXXRecordDecl *D) {
// Note: the caller has deserialized the IsLambda bit already.
Data.UserDeclaredConstructor = Record.readInt();
Data.UserDeclaredSpecialMembers = Record.readInt();
Data.Aggregate = Record.readInt();
Data.PlainOldData = Record.readInt();
Data.Empty = Record.readInt();
Data.Polymorphic = Record.readInt();
Data.Abstract = Record.readInt();
Data.IsStandardLayout = Record.readInt();
Data.HasNoNonEmptyBases = Record.readInt();
Data.HasPrivateFields = Record.readInt();
Data.HasProtectedFields = Record.readInt();
Data.HasPublicFields = Record.readInt();
Data.HasMutableFields = Record.readInt();
Data.HasVariantMembers = Record.readInt();
Data.HasOnlyCMembers = Record.readInt();
Data.HasInClassInitializer = Record.readInt();
Data.HasUninitializedReferenceMember = Record.readInt();
Data.HasUninitializedFields = Record.readInt();
Data.HasInheritedConstructor = Record.readInt();
Data.HasInheritedAssignment = Record.readInt();
+ Data.NeedOverloadResolutionForCopyConstructor = Record.readInt();
Data.NeedOverloadResolutionForMoveConstructor = Record.readInt();
Data.NeedOverloadResolutionForMoveAssignment = Record.readInt();
Data.NeedOverloadResolutionForDestructor = Record.readInt();
+ Data.DefaultedCopyConstructorIsDeleted = Record.readInt();
Data.DefaultedMoveConstructorIsDeleted = Record.readInt();
Data.DefaultedMoveAssignmentIsDeleted = Record.readInt();
Data.DefaultedDestructorIsDeleted = Record.readInt();
Data.HasTrivialSpecialMembers = Record.readInt();
Data.DeclaredNonTrivialSpecialMembers = Record.readInt();
Data.HasIrrelevantDestructor = Record.readInt();
Data.HasConstexprNonCopyMoveConstructor = Record.readInt();
Data.HasDefaultedDefaultConstructor = Record.readInt();
+ Data.CanPassInRegisters = Record.readInt();
Data.DefaultedDefaultConstructorIsConstexpr = Record.readInt();
Data.HasConstexprDefaultConstructor = Record.readInt();
Data.HasNonLiteralTypeFieldsOrBases = Record.readInt();
Data.ComputedVisibleConversions = Record.readInt();
Data.UserProvidedDefaultConstructor = Record.readInt();
Data.DeclaredSpecialMembers = Record.readInt();
Data.ImplicitCopyConstructorCanHaveConstParamForVBase = Record.readInt();
Data.ImplicitCopyConstructorCanHaveConstParamForNonVBase = Record.readInt();
Data.ImplicitCopyAssignmentHasConstParam = Record.readInt();
Data.HasDeclaredCopyConstructorWithConstParam = Record.readInt();
Data.HasDeclaredCopyAssignmentWithConstParam = Record.readInt();
Data.ODRHash = Record.readInt();
Data.HasODRHash = true;
if (Record.readInt()) {
Reader.BodySource[D] = Loc.F->Kind == ModuleKind::MK_MainFile
? ExternalASTSource::EK_Never
: ExternalASTSource::EK_Always;
}
Data.NumBases = Record.readInt();
if (Data.NumBases)
Data.Bases = ReadGlobalOffset();
Data.NumVBases = Record.readInt();
if (Data.NumVBases)
Data.VBases = ReadGlobalOffset();
Record.readUnresolvedSet(Data.Conversions);
Record.readUnresolvedSet(Data.VisibleConversions);
assert(Data.Definition && "Data.Definition should be already set!");
Data.FirstFriend = ReadDeclID();
if (Data.IsLambda) {
typedef LambdaCapture Capture;
CXXRecordDecl::LambdaDefinitionData &Lambda
= static_cast<CXXRecordDecl::LambdaDefinitionData &>(Data);
Lambda.Dependent = Record.readInt();
Lambda.IsGenericLambda = Record.readInt();
Lambda.CaptureDefault = Record.readInt();
Lambda.NumCaptures = Record.readInt();
Lambda.NumExplicitCaptures = Record.readInt();
Lambda.ManglingNumber = Record.readInt();
Lambda.ContextDecl = ReadDeclID();
Lambda.Captures = (Capture *)Reader.getContext().Allocate(
sizeof(Capture) * Lambda.NumCaptures);
Capture *ToCapture = Lambda.Captures;
Lambda.MethodTyInfo = GetTypeSourceInfo();
for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
SourceLocation Loc = ReadSourceLocation();
bool IsImplicit = Record.readInt();
LambdaCaptureKind Kind = static_cast<LambdaCaptureKind>(Record.readInt());
switch (Kind) {
case LCK_StarThis:
case LCK_This:
case LCK_VLAType:
*ToCapture++ = Capture(Loc, IsImplicit, Kind, nullptr,SourceLocation());
break;
case LCK_ByCopy:
case LCK_ByRef:
VarDecl *Var = ReadDeclAs<VarDecl>();
SourceLocation EllipsisLoc = ReadSourceLocation();
*ToCapture++ = Capture(Loc, IsImplicit, Kind, Var, EllipsisLoc);
break;
}
}
}
}
void ASTDeclReader::MergeDefinitionData(
CXXRecordDecl *D, struct CXXRecordDecl::DefinitionData &&MergeDD) {
assert(D->DefinitionData &&
"merging class definition into non-definition");
auto &DD = *D->DefinitionData;
if (DD.Definition != MergeDD.Definition) {
// Track that we merged the definitions.
Reader.MergedDeclContexts.insert(std::make_pair(MergeDD.Definition,
DD.Definition));
Reader.PendingDefinitions.erase(MergeDD.Definition);
MergeDD.Definition->IsCompleteDefinition = false;
Reader.mergeDefinitionVisibility(DD.Definition, MergeDD.Definition);
assert(Reader.Lookups.find(MergeDD.Definition) == Reader.Lookups.end() &&
"already loaded pending lookups for merged definition");
}
auto PFDI = Reader.PendingFakeDefinitionData.find(&DD);
if (PFDI != Reader.PendingFakeDefinitionData.end() &&
PFDI->second == ASTReader::PendingFakeDefinitionKind::Fake) {
// We faked up this definition data because we found a class for which we'd
// not yet loaded the definition. Replace it with the real thing now.
assert(!DD.IsLambda && !MergeDD.IsLambda && "faked up lambda definition?");
PFDI->second = ASTReader::PendingFakeDefinitionKind::FakeLoaded;
// Don't change which declaration is the definition; that is required
// to be invariant once we select it.
auto *Def = DD.Definition;
DD = std::move(MergeDD);
DD.Definition = Def;
return;
}
// FIXME: Move this out into a .def file?
bool DetectedOdrViolation = false;
#define OR_FIELD(Field) DD.Field |= MergeDD.Field;
#define MATCH_FIELD(Field) \
DetectedOdrViolation |= DD.Field != MergeDD.Field; \
OR_FIELD(Field)
MATCH_FIELD(UserDeclaredConstructor)
MATCH_FIELD(UserDeclaredSpecialMembers)
MATCH_FIELD(Aggregate)
MATCH_FIELD(PlainOldData)
MATCH_FIELD(Empty)
MATCH_FIELD(Polymorphic)
MATCH_FIELD(Abstract)
MATCH_FIELD(IsStandardLayout)
MATCH_FIELD(HasNoNonEmptyBases)
MATCH_FIELD(HasPrivateFields)
MATCH_FIELD(HasProtectedFields)
MATCH_FIELD(HasPublicFields)
MATCH_FIELD(HasMutableFields)
MATCH_FIELD(HasVariantMembers)
MATCH_FIELD(HasOnlyCMembers)
MATCH_FIELD(HasInClassInitializer)
MATCH_FIELD(HasUninitializedReferenceMember)
MATCH_FIELD(HasUninitializedFields)
MATCH_FIELD(HasInheritedConstructor)
MATCH_FIELD(HasInheritedAssignment)
+ MATCH_FIELD(NeedOverloadResolutionForCopyConstructor)
MATCH_FIELD(NeedOverloadResolutionForMoveConstructor)
MATCH_FIELD(NeedOverloadResolutionForMoveAssignment)
MATCH_FIELD(NeedOverloadResolutionForDestructor)
+ MATCH_FIELD(DefaultedCopyConstructorIsDeleted)
MATCH_FIELD(DefaultedMoveConstructorIsDeleted)
MATCH_FIELD(DefaultedMoveAssignmentIsDeleted)
MATCH_FIELD(DefaultedDestructorIsDeleted)
OR_FIELD(HasTrivialSpecialMembers)
OR_FIELD(DeclaredNonTrivialSpecialMembers)
MATCH_FIELD(HasIrrelevantDestructor)
OR_FIELD(HasConstexprNonCopyMoveConstructor)
OR_FIELD(HasDefaultedDefaultConstructor)
+ MATCH_FIELD(CanPassInRegisters)
MATCH_FIELD(DefaultedDefaultConstructorIsConstexpr)
OR_FIELD(HasConstexprDefaultConstructor)
MATCH_FIELD(HasNonLiteralTypeFieldsOrBases)
// ComputedVisibleConversions is handled below.
MATCH_FIELD(UserProvidedDefaultConstructor)
OR_FIELD(DeclaredSpecialMembers)
MATCH_FIELD(ImplicitCopyConstructorCanHaveConstParamForVBase)
MATCH_FIELD(ImplicitCopyConstructorCanHaveConstParamForNonVBase)
MATCH_FIELD(ImplicitCopyAssignmentHasConstParam)
OR_FIELD(HasDeclaredCopyConstructorWithConstParam)
OR_FIELD(HasDeclaredCopyAssignmentWithConstParam)
MATCH_FIELD(IsLambda)
#undef OR_FIELD
#undef MATCH_FIELD
if (DD.NumBases != MergeDD.NumBases || DD.NumVBases != MergeDD.NumVBases)
DetectedOdrViolation = true;
// FIXME: Issue a diagnostic if the base classes don't match when we come
// to lazily load them.
// FIXME: Issue a diagnostic if the list of conversion functions doesn't
// match when we come to lazily load them.
if (MergeDD.ComputedVisibleConversions && !DD.ComputedVisibleConversions) {
DD.VisibleConversions = std::move(MergeDD.VisibleConversions);
DD.ComputedVisibleConversions = true;
}
// FIXME: Issue a diagnostic if FirstFriend doesn't match when we come to
// lazily load it.
if (DD.IsLambda) {
// FIXME: ODR-checking for merging lambdas (this happens, for instance,
// when they occur within the body of a function template specialization).
}
if (D->getODRHash() != MergeDD.ODRHash) {
DetectedOdrViolation = true;
}
if (DetectedOdrViolation)
Reader.PendingOdrMergeFailures[DD.Definition].push_back(MergeDD.Definition);
}
void ASTDeclReader::ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update) {
struct CXXRecordDecl::DefinitionData *DD;
ASTContext &C = Reader.getContext();
// Determine whether this is a lambda closure type, so that we can
// allocate the appropriate DefinitionData structure.
bool IsLambda = Record.readInt();
if (IsLambda)
DD = new (C) CXXRecordDecl::LambdaDefinitionData(D, nullptr, false, false,
LCD_None);
else
DD = new (C) struct CXXRecordDecl::DefinitionData(D);
ReadCXXDefinitionData(*DD, D);
// We might already have a definition for this record. This can happen either
// because we're reading an update record, or because we've already done some
// merging. Either way, just merge into it.
CXXRecordDecl *Canon = D->getCanonicalDecl();
if (Canon->DefinitionData) {
MergeDefinitionData(Canon, std::move(*DD));
D->DefinitionData = Canon->DefinitionData;
return;
}
// Mark this declaration as being a definition.
D->IsCompleteDefinition = true;
D->DefinitionData = DD;
// If this is not the first declaration or is an update record, we can have
// other redeclarations already. Make a note that we need to propagate the
// DefinitionData pointer onto them.
if (Update || Canon != D) {
Canon->DefinitionData = D->DefinitionData;
Reader.PendingDefinitions.insert(D);
}
}
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitCXXRecordDeclImpl(CXXRecordDecl *D) {
RedeclarableResult Redecl = VisitRecordDeclImpl(D);
ASTContext &C = Reader.getContext();
enum CXXRecKind {
CXXRecNotTemplate = 0, CXXRecTemplate, CXXRecMemberSpecialization
};
switch ((CXXRecKind)Record.readInt()) {
case CXXRecNotTemplate:
// Merged when we merge the folding set entry in the primary template.
if (!isa<ClassTemplateSpecializationDecl>(D))
mergeRedeclarable(D, Redecl);
break;
case CXXRecTemplate: {
// Merged when we merge the template.
ClassTemplateDecl *Template = ReadDeclAs<ClassTemplateDecl>();
D->TemplateOrInstantiation = Template;
if (!Template->getTemplatedDecl()) {
// We've not actually loaded the ClassTemplateDecl yet, because we're
// currently being loaded as its pattern. Rely on it to set up our
// TypeForDecl (see VisitClassTemplateDecl).
//
// Beware: we do not yet know our canonical declaration, and may still
// get merged once the surrounding class template has got off the ground.
TypeIDForTypeDecl = 0;
}
break;
}
case CXXRecMemberSpecialization: {
CXXRecordDecl *RD = ReadDeclAs<CXXRecordDecl>();
TemplateSpecializationKind TSK =
(TemplateSpecializationKind)Record.readInt();
SourceLocation POI = ReadSourceLocation();
MemberSpecializationInfo *MSI = new (C) MemberSpecializationInfo(RD, TSK);
MSI->setPointOfInstantiation(POI);
D->TemplateOrInstantiation = MSI;
mergeRedeclarable(D, Redecl);
break;
}
}
bool WasDefinition = Record.readInt();
if (WasDefinition)
ReadCXXRecordDefinition(D, /*Update*/false);
else
// Propagate DefinitionData pointer from the canonical declaration.
D->DefinitionData = D->getCanonicalDecl()->DefinitionData;
// Lazily load the key function to avoid deserializing every method so we can
// compute it.
if (WasDefinition) {
DeclID KeyFn = ReadDeclID();
if (KeyFn && D->IsCompleteDefinition)
// FIXME: This is wrong for the ARM ABI, where some other module may have
// made this function no longer be a key function. We need an update
// record or similar for that case.
C.KeyFunctions[D] = KeyFn;
}
return Redecl;
}
void ASTDeclReader::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) {
VisitFunctionDecl(D);
}
void ASTDeclReader::VisitCXXMethodDecl(CXXMethodDecl *D) {
VisitFunctionDecl(D);
unsigned NumOverridenMethods = Record.readInt();
if (D->isCanonicalDecl()) {
while (NumOverridenMethods--) {
// Avoid invariant checking of CXXMethodDecl::addOverriddenMethod,
// MD may be initializing.
if (CXXMethodDecl *MD = ReadDeclAs<CXXMethodDecl>())
Reader.getContext().addOverriddenMethod(D, MD->getCanonicalDecl());
}
} else {
// We don't care about which declarations this used to override; we get
// the relevant information from the canonical declaration.
Record.skipInts(NumOverridenMethods);
}
}
void ASTDeclReader::VisitCXXConstructorDecl(CXXConstructorDecl *D) {
// We need the inherited constructor information to merge the declaration,
// so we have to read it before we call VisitCXXMethodDecl.
if (D->isInheritingConstructor()) {
auto *Shadow = ReadDeclAs<ConstructorUsingShadowDecl>();
auto *Ctor = ReadDeclAs<CXXConstructorDecl>();
*D->getTrailingObjects<InheritedConstructor>() =
InheritedConstructor(Shadow, Ctor);
}
VisitCXXMethodDecl(D);
}
void ASTDeclReader::VisitCXXDestructorDecl(CXXDestructorDecl *D) {
VisitCXXMethodDecl(D);
if (auto *OperatorDelete = ReadDeclAs<FunctionDecl>()) {
auto *Canon = cast<CXXDestructorDecl>(D->getCanonicalDecl());
// FIXME: Check consistency if we have an old and new operator delete.
if (!Canon->OperatorDelete)
Canon->OperatorDelete = OperatorDelete;
}
}
void ASTDeclReader::VisitCXXConversionDecl(CXXConversionDecl *D) {
VisitCXXMethodDecl(D);
}
void ASTDeclReader::VisitImportDecl(ImportDecl *D) {
VisitDecl(D);
D->ImportedAndComplete.setPointer(readModule());
D->ImportedAndComplete.setInt(Record.readInt());
SourceLocation *StoredLocs = D->getTrailingObjects<SourceLocation>();
for (unsigned I = 0, N = Record.back(); I != N; ++I)
StoredLocs[I] = ReadSourceLocation();
Record.skipInts(1); // The number of stored source locations.
}
void ASTDeclReader::VisitAccessSpecDecl(AccessSpecDecl *D) {
VisitDecl(D);
D->setColonLoc(ReadSourceLocation());
}
void ASTDeclReader::VisitFriendDecl(FriendDecl *D) {
VisitDecl(D);
if (Record.readInt()) // hasFriendDecl
D->Friend = ReadDeclAs<NamedDecl>();
else
D->Friend = GetTypeSourceInfo();
for (unsigned i = 0; i != D->NumTPLists; ++i)
D->getTrailingObjects<TemplateParameterList *>()[i] =
Record.readTemplateParameterList();
D->NextFriend = ReadDeclID();
D->UnsupportedFriend = (Record.readInt() != 0);
D->FriendLoc = ReadSourceLocation();
}
void ASTDeclReader::VisitFriendTemplateDecl(FriendTemplateDecl *D) {
VisitDecl(D);
unsigned NumParams = Record.readInt();
D->NumParams = NumParams;
D->Params = new TemplateParameterList*[NumParams];
for (unsigned i = 0; i != NumParams; ++i)
D->Params[i] = Record.readTemplateParameterList();
if (Record.readInt()) // HasFriendDecl
D->Friend = ReadDeclAs<NamedDecl>();
else
D->Friend = GetTypeSourceInfo();
D->FriendLoc = ReadSourceLocation();
}
DeclID ASTDeclReader::VisitTemplateDecl(TemplateDecl *D) {
VisitNamedDecl(D);
DeclID PatternID = ReadDeclID();
NamedDecl *TemplatedDecl = cast_or_null<NamedDecl>(Reader.GetDecl(PatternID));
TemplateParameterList *TemplateParams = Record.readTemplateParameterList();
// FIXME handle associated constraints
D->init(TemplatedDecl, TemplateParams);
return PatternID;
}
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) {
RedeclarableResult Redecl = VisitRedeclarable(D);
// Make sure we've allocated the Common pointer first. We do this before
// VisitTemplateDecl so that getCommonPtr() can be used during initialization.
RedeclarableTemplateDecl *CanonD = D->getCanonicalDecl();
if (!CanonD->Common) {
CanonD->Common = CanonD->newCommon(Reader.getContext());
Reader.PendingDefinitions.insert(CanonD);
}
D->Common = CanonD->Common;
// If this is the first declaration of the template, fill in the information
// for the 'common' pointer.
if (ThisDeclID == Redecl.getFirstID()) {
if (RedeclarableTemplateDecl *RTD
= ReadDeclAs<RedeclarableTemplateDecl>()) {
assert(RTD->getKind() == D->getKind() &&
"InstantiatedFromMemberTemplate kind mismatch");
D->setInstantiatedFromMemberTemplate(RTD);
if (Record.readInt())
D->setMemberSpecialization();
}
}
DeclID PatternID = VisitTemplateDecl(D);
D->IdentifierNamespace = Record.readInt();
mergeRedeclarable(D, Redecl, PatternID);
// If we merged the template with a prior declaration chain, merge the common
// pointer.
// FIXME: Actually merge here, don't just overwrite.
D->Common = D->getCanonicalDecl()->Common;
return Redecl;
}
void ASTDeclReader::VisitClassTemplateDecl(ClassTemplateDecl *D) {
RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D);
if (ThisDeclID == Redecl.getFirstID()) {
// This ClassTemplateDecl owns a CommonPtr; read it to keep track of all of
// the specializations.
SmallVector<serialization::DeclID, 32> SpecIDs;
ReadDeclIDList(SpecIDs);
ASTDeclReader::AddLazySpecializations(D, SpecIDs);
}
if (D->getTemplatedDecl()->TemplateOrInstantiation) {
// We were loaded before our templated declaration was. We've not set up
// its corresponding type yet (see VisitCXXRecordDeclImpl), so reconstruct
// it now.
Reader.getContext().getInjectedClassNameType(
D->getTemplatedDecl(), D->getInjectedClassNameSpecialization());
}
}
void ASTDeclReader::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) {
llvm_unreachable("BuiltinTemplates are not serialized");
}
/// TODO: Unify with ClassTemplateDecl version?
/// May require unifying ClassTemplateDecl and
/// VarTemplateDecl beyond TemplateDecl...
void ASTDeclReader::VisitVarTemplateDecl(VarTemplateDecl *D) {
RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D);
if (ThisDeclID == Redecl.getFirstID()) {
// This VarTemplateDecl owns a CommonPtr; read it to keep track of all of
// the specializations.
SmallVector<serialization::DeclID, 32> SpecIDs;
ReadDeclIDList(SpecIDs);
ASTDeclReader::AddLazySpecializations(D, SpecIDs);
}
}
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitClassTemplateSpecializationDeclImpl(
ClassTemplateSpecializationDecl *D) {
RedeclarableResult Redecl = VisitCXXRecordDeclImpl(D);
ASTContext &C = Reader.getContext();
if (Decl *InstD = ReadDecl()) {
if (ClassTemplateDecl *CTD = dyn_cast<ClassTemplateDecl>(InstD)) {
D->SpecializedTemplate = CTD;
} else {
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs);
TemplateArgumentList *ArgList
= TemplateArgumentList::CreateCopy(C, TemplArgs);
ClassTemplateSpecializationDecl::SpecializedPartialSpecialization *PS
= new (C) ClassTemplateSpecializationDecl::
SpecializedPartialSpecialization();
PS->PartialSpecialization
= cast<ClassTemplatePartialSpecializationDecl>(InstD);
PS->TemplateArgs = ArgList;
D->SpecializedTemplate = PS;
}
}
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true);
D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
D->PointOfInstantiation = ReadSourceLocation();
D->SpecializationKind = (TemplateSpecializationKind)Record.readInt();
bool writtenAsCanonicalDecl = Record.readInt();
if (writtenAsCanonicalDecl) {
ClassTemplateDecl *CanonPattern = ReadDeclAs<ClassTemplateDecl>();
if (D->isCanonicalDecl()) { // It's kept in the folding set.
// Set this as, or find, the canonical declaration for this specialization
ClassTemplateSpecializationDecl *CanonSpec;
if (ClassTemplatePartialSpecializationDecl *Partial =
dyn_cast<ClassTemplatePartialSpecializationDecl>(D)) {
CanonSpec = CanonPattern->getCommonPtr()->PartialSpecializations
.GetOrInsertNode(Partial);
} else {
CanonSpec =
CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D);
}
// If there was already a canonical specialization, merge into it.
if (CanonSpec != D) {
mergeRedeclarable<TagDecl>(D, CanonSpec, Redecl);
// This declaration might be a definition. Merge with any existing
// definition.
if (auto *DDD = D->DefinitionData) {
if (CanonSpec->DefinitionData)
MergeDefinitionData(CanonSpec, std::move(*DDD));
else
CanonSpec->DefinitionData = D->DefinitionData;
}
D->DefinitionData = CanonSpec->DefinitionData;
}
}
}
// Explicit info.
if (TypeSourceInfo *TyInfo = GetTypeSourceInfo()) {
ClassTemplateSpecializationDecl::ExplicitSpecializationInfo *ExplicitInfo
= new (C) ClassTemplateSpecializationDecl::ExplicitSpecializationInfo;
ExplicitInfo->TypeAsWritten = TyInfo;
ExplicitInfo->ExternLoc = ReadSourceLocation();
ExplicitInfo->TemplateKeywordLoc = ReadSourceLocation();
D->ExplicitInfo = ExplicitInfo;
}
return Redecl;
}
void ASTDeclReader::VisitClassTemplatePartialSpecializationDecl(
ClassTemplatePartialSpecializationDecl *D) {
RedeclarableResult Redecl = VisitClassTemplateSpecializationDeclImpl(D);
D->TemplateParams = Record.readTemplateParameterList();
D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo();
// These are read/set from/to the first declaration.
if (ThisDeclID == Redecl.getFirstID()) {
D->InstantiatedFromMember.setPointer(
ReadDeclAs<ClassTemplatePartialSpecializationDecl>());
D->InstantiatedFromMember.setInt(Record.readInt());
}
}
void ASTDeclReader::VisitClassScopeFunctionSpecializationDecl(
ClassScopeFunctionSpecializationDecl *D) {
VisitDecl(D);
D->Specialization = ReadDeclAs<CXXMethodDecl>();
}
void ASTDeclReader::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D);
if (ThisDeclID == Redecl.getFirstID()) {
// This FunctionTemplateDecl owns a CommonPtr; read it.
SmallVector<serialization::DeclID, 32> SpecIDs;
ReadDeclIDList(SpecIDs);
ASTDeclReader::AddLazySpecializations(D, SpecIDs);
}
}
/// TODO: Unify with ClassTemplateSpecializationDecl version?
/// May require unifying ClassTemplate(Partial)SpecializationDecl and
/// VarTemplate(Partial)SpecializationDecl with a new data
/// structure Template(Partial)SpecializationDecl, and
/// using Template(Partial)SpecializationDecl as input type.
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitVarTemplateSpecializationDeclImpl(
VarTemplateSpecializationDecl *D) {
RedeclarableResult Redecl = VisitVarDeclImpl(D);
ASTContext &C = Reader.getContext();
if (Decl *InstD = ReadDecl()) {
if (VarTemplateDecl *VTD = dyn_cast<VarTemplateDecl>(InstD)) {
D->SpecializedTemplate = VTD;
} else {
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs);
TemplateArgumentList *ArgList = TemplateArgumentList::CreateCopy(
C, TemplArgs);
VarTemplateSpecializationDecl::SpecializedPartialSpecialization *PS =
new (C)
VarTemplateSpecializationDecl::SpecializedPartialSpecialization();
PS->PartialSpecialization =
cast<VarTemplatePartialSpecializationDecl>(InstD);
PS->TemplateArgs = ArgList;
D->SpecializedTemplate = PS;
}
}
// Explicit info.
if (TypeSourceInfo *TyInfo = GetTypeSourceInfo()) {
VarTemplateSpecializationDecl::ExplicitSpecializationInfo *ExplicitInfo =
new (C) VarTemplateSpecializationDecl::ExplicitSpecializationInfo;
ExplicitInfo->TypeAsWritten = TyInfo;
ExplicitInfo->ExternLoc = ReadSourceLocation();
ExplicitInfo->TemplateKeywordLoc = ReadSourceLocation();
D->ExplicitInfo = ExplicitInfo;
}
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true);
D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
D->PointOfInstantiation = ReadSourceLocation();
D->SpecializationKind = (TemplateSpecializationKind)Record.readInt();
bool writtenAsCanonicalDecl = Record.readInt();
if (writtenAsCanonicalDecl) {
VarTemplateDecl *CanonPattern = ReadDeclAs<VarTemplateDecl>();
if (D->isCanonicalDecl()) { // It's kept in the folding set.
// FIXME: If it's already present, merge it.
if (VarTemplatePartialSpecializationDecl *Partial =
dyn_cast<VarTemplatePartialSpecializationDecl>(D)) {
CanonPattern->getCommonPtr()->PartialSpecializations
.GetOrInsertNode(Partial);
} else {
CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D);
}
}
}
return Redecl;
}
/// TODO: Unify with ClassTemplatePartialSpecializationDecl version?
/// May require unifying ClassTemplate(Partial)SpecializationDecl and
/// VarTemplate(Partial)SpecializationDecl with a new data
/// structure Template(Partial)SpecializationDecl, and
/// using Template(Partial)SpecializationDecl as input type.
void ASTDeclReader::VisitVarTemplatePartialSpecializationDecl(
VarTemplatePartialSpecializationDecl *D) {
RedeclarableResult Redecl = VisitVarTemplateSpecializationDeclImpl(D);
D->TemplateParams = Record.readTemplateParameterList();
D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo();
// These are read/set from/to the first declaration.
if (ThisDeclID == Redecl.getFirstID()) {
D->InstantiatedFromMember.setPointer(
ReadDeclAs<VarTemplatePartialSpecializationDecl>());
D->InstantiatedFromMember.setInt(Record.readInt());
}
}
void ASTDeclReader::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
VisitTypeDecl(D);
D->setDeclaredWithTypename(Record.readInt());
if (Record.readInt())
D->setDefaultArgument(GetTypeSourceInfo());
}
void ASTDeclReader::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
VisitDeclaratorDecl(D);
// TemplateParmPosition.
D->setDepth(Record.readInt());
D->setPosition(Record.readInt());
if (D->isExpandedParameterPack()) {
auto TypesAndInfos =
D->getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) {
new (&TypesAndInfos[I].first) QualType(Record.readType());
TypesAndInfos[I].second = GetTypeSourceInfo();
}
} else {
// Rest of NonTypeTemplateParmDecl.
D->ParameterPack = Record.readInt();
if (Record.readInt())
D->setDefaultArgument(Record.readExpr());
}
}
void ASTDeclReader::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
VisitTemplateDecl(D);
// TemplateParmPosition.
D->setDepth(Record.readInt());
D->setPosition(Record.readInt());
if (D->isExpandedParameterPack()) {
TemplateParameterList **Data =
D->getTrailingObjects<TemplateParameterList *>();
for (unsigned I = 0, N = D->getNumExpansionTemplateParameters();
I != N; ++I)
Data[I] = Record.readTemplateParameterList();
} else {
// Rest of TemplateTemplateParmDecl.
D->ParameterPack = Record.readInt();
if (Record.readInt())
D->setDefaultArgument(Reader.getContext(),
Record.readTemplateArgumentLoc());
}
}
void ASTDeclReader::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
VisitRedeclarableTemplateDecl(D);
}
void ASTDeclReader::VisitStaticAssertDecl(StaticAssertDecl *D) {
VisitDecl(D);
D->AssertExprAndFailed.setPointer(Record.readExpr());
D->AssertExprAndFailed.setInt(Record.readInt());
D->Message = cast_or_null<StringLiteral>(Record.readExpr());
D->RParenLoc = ReadSourceLocation();
}
void ASTDeclReader::VisitEmptyDecl(EmptyDecl *D) {
VisitDecl(D);
}
std::pair<uint64_t, uint64_t>
ASTDeclReader::VisitDeclContext(DeclContext *DC) {
uint64_t LexicalOffset = ReadLocalOffset();
uint64_t VisibleOffset = ReadLocalOffset();
return std::make_pair(LexicalOffset, VisibleOffset);
}
template <typename T>
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitRedeclarable(Redeclarable<T> *D) {
DeclID FirstDeclID = ReadDeclID();
Decl *MergeWith = nullptr;
bool IsKeyDecl = ThisDeclID == FirstDeclID;
bool IsFirstLocalDecl = false;
uint64_t RedeclOffset = 0;
// 0 indicates that this declaration was the only declaration of its entity,
// and is used for space optimization.
if (FirstDeclID == 0) {
FirstDeclID = ThisDeclID;
IsKeyDecl = true;
IsFirstLocalDecl = true;
} else if (unsigned N = Record.readInt()) {
// This declaration was the first local declaration, but may have imported
// other declarations.
IsKeyDecl = N == 1;
IsFirstLocalDecl = true;
// We have some declarations that must be before us in our redeclaration
// chain. Read them now, and remember that we ought to merge with one of
// them.
// FIXME: Provide a known merge target to the second and subsequent such
// declaration.
for (unsigned I = 0; I != N - 1; ++I)
MergeWith = ReadDecl();
RedeclOffset = ReadLocalOffset();
} else {
// This declaration was not the first local declaration. Read the first
// local declaration now, to trigger the import of other redeclarations.
(void)ReadDecl();
}
T *FirstDecl = cast_or_null<T>(Reader.GetDecl(FirstDeclID));
if (FirstDecl != D) {
// We delay loading of the redeclaration chain to avoid deeply nested calls.
// We temporarily set the first (canonical) declaration as the previous one
// which is the one that matters and mark the real previous DeclID to be
// loaded & attached later on.
D->RedeclLink = Redeclarable<T>::PreviousDeclLink(FirstDecl);
D->First = FirstDecl->getCanonicalDecl();
}
T *DAsT = static_cast<T*>(D);
// Note that we need to load local redeclarations of this decl and build a
// decl chain for them. This must happen *after* we perform the preloading
// above; this ensures that the redeclaration chain is built in the correct
// order.
if (IsFirstLocalDecl)
Reader.PendingDeclChains.push_back(std::make_pair(DAsT, RedeclOffset));
return RedeclarableResult(MergeWith, FirstDeclID, IsKeyDecl);
}
/// \brief Attempts to merge the given declaration (D) with another declaration
/// of the same entity.
template<typename T>
void ASTDeclReader::mergeRedeclarable(Redeclarable<T> *DBase,
RedeclarableResult &Redecl,
DeclID TemplatePatternID) {
// If modules are not available, there is no reason to perform this merge.
if (!Reader.getContext().getLangOpts().Modules)
return;
// If we're not the canonical declaration, we don't need to merge.
if (!DBase->isFirstDecl())
return;
T *D = static_cast<T*>(DBase);
if (auto *Existing = Redecl.getKnownMergeTarget())
// We already know of an existing declaration we should merge with.
mergeRedeclarable(D, cast<T>(Existing), Redecl, TemplatePatternID);
else if (FindExistingResult ExistingRes = findExisting(D))
if (T *Existing = ExistingRes)
mergeRedeclarable(D, Existing, Redecl, TemplatePatternID);
}
/// \brief "Cast" to type T, asserting if we don't have an implicit conversion.
/// We use this to put code in a template that will only be valid for certain
/// instantiations.
template<typename T> static T assert_cast(T t) { return t; }
template<typename T> static T assert_cast(...) {
llvm_unreachable("bad assert_cast");
}
/// \brief Merge together the pattern declarations from two template
/// declarations.
void ASTDeclReader::mergeTemplatePattern(RedeclarableTemplateDecl *D,
RedeclarableTemplateDecl *Existing,
DeclID DsID, bool IsKeyDecl) {
auto *DPattern = D->getTemplatedDecl();
auto *ExistingPattern = Existing->getTemplatedDecl();
RedeclarableResult Result(/*MergeWith*/ ExistingPattern,
DPattern->getCanonicalDecl()->getGlobalID(),
IsKeyDecl);
if (auto *DClass = dyn_cast<CXXRecordDecl>(DPattern)) {
// Merge with any existing definition.
// FIXME: This is duplicated in several places. Refactor.
auto *ExistingClass =
cast<CXXRecordDecl>(ExistingPattern)->getCanonicalDecl();
if (auto *DDD = DClass->DefinitionData) {
if (ExistingClass->DefinitionData) {
MergeDefinitionData(ExistingClass, std::move(*DDD));
} else {
ExistingClass->DefinitionData = DClass->DefinitionData;
// We may have skipped this before because we thought that DClass
// was the canonical declaration.
Reader.PendingDefinitions.insert(DClass);
}
}
DClass->DefinitionData = ExistingClass->DefinitionData;
return mergeRedeclarable(DClass, cast<TagDecl>(ExistingPattern),
Result);
}
if (auto *DFunction = dyn_cast<FunctionDecl>(DPattern))
return mergeRedeclarable(DFunction, cast<FunctionDecl>(ExistingPattern),
Result);
if (auto *DVar = dyn_cast<VarDecl>(DPattern))
return mergeRedeclarable(DVar, cast<VarDecl>(ExistingPattern), Result);
if (auto *DAlias = dyn_cast<TypeAliasDecl>(DPattern))
return mergeRedeclarable(DAlias, cast<TypedefNameDecl>(ExistingPattern),
Result);
llvm_unreachable("merged an unknown kind of redeclarable template");
}
/// \brief Attempts to merge the given declaration (D) with another declaration
/// of the same entity.
template<typename T>
void ASTDeclReader::mergeRedeclarable(Redeclarable<T> *DBase, T *Existing,
RedeclarableResult &Redecl,
DeclID TemplatePatternID) {
T *D = static_cast<T*>(DBase);
T *ExistingCanon = Existing->getCanonicalDecl();
T *DCanon = D->getCanonicalDecl();
if (ExistingCanon != DCanon) {
assert(DCanon->getGlobalID() == Redecl.getFirstID() &&
"already merged this declaration");
// Have our redeclaration link point back at the canonical declaration
// of the existing declaration, so that this declaration has the
// appropriate canonical declaration.
D->RedeclLink = Redeclarable<T>::PreviousDeclLink(ExistingCanon);
D->First = ExistingCanon;
ExistingCanon->Used |= D->Used;
D->Used = false;
// When we merge a namespace, update its pointer to the first namespace.
// We cannot have loaded any redeclarations of this declaration yet, so
// there's nothing else that needs to be updated.
if (auto *Namespace = dyn_cast<NamespaceDecl>(D))
Namespace->AnonOrFirstNamespaceAndInline.setPointer(
assert_cast<NamespaceDecl*>(ExistingCanon));
// When we merge a template, merge its pattern.
if (auto *DTemplate = dyn_cast<RedeclarableTemplateDecl>(D))
mergeTemplatePattern(
DTemplate, assert_cast<RedeclarableTemplateDecl*>(ExistingCanon),
TemplatePatternID, Redecl.isKeyDecl());
// If this declaration is a key declaration, make a note of that.
if (Redecl.isKeyDecl())
Reader.KeyDecls[ExistingCanon].push_back(Redecl.getFirstID());
}
}
/// \brief Attempts to merge the given declaration (D) with another declaration
/// of the same entity, for the case where the entity is not actually
/// redeclarable. This happens, for instance, when merging the fields of
/// identical class definitions from two different modules.
template<typename T>
void ASTDeclReader::mergeMergeable(Mergeable<T> *D) {
// If modules are not available, there is no reason to perform this merge.
if (!Reader.getContext().getLangOpts().Modules)
return;
// ODR-based merging is only performed in C++. In C, identically-named things
// in different translation units are not redeclarations (but may still have
// compatible types).
if (!Reader.getContext().getLangOpts().CPlusPlus)
return;
if (FindExistingResult ExistingRes = findExisting(static_cast<T*>(D)))
if (T *Existing = ExistingRes)
Reader.getContext().setPrimaryMergedDecl(static_cast<T *>(D),
Existing->getCanonicalDecl());
}
void ASTDeclReader::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) {
VisitDecl(D);
unsigned NumVars = D->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i) {
Vars.push_back(Record.readExpr());
}
D->setVars(Vars);
}
void ASTDeclReader::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) {
VisitValueDecl(D);
D->setLocation(ReadSourceLocation());
D->setCombiner(Record.readExpr());
D->setInitializer(Record.readExpr());
D->PrevDeclInScope = ReadDeclID();
}
void ASTDeclReader::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) {
VisitVarDecl(D);
}
//===----------------------------------------------------------------------===//
// Attribute Reading
//===----------------------------------------------------------------------===//
/// \brief Reads attributes from the current stream position.
void ASTReader::ReadAttributes(ASTRecordReader &Record, AttrVec &Attrs) {
for (unsigned i = 0, e = Record.readInt(); i != e; ++i) {
Attr *New = nullptr;
attr::Kind Kind = (attr::Kind)Record.readInt();
SourceRange Range = Record.readSourceRange();
ASTContext &Context = getContext();
#include "clang/Serialization/AttrPCHRead.inc"
assert(New && "Unable to decode attribute?");
Attrs.push_back(New);
}
}
//===----------------------------------------------------------------------===//
// ASTReader Implementation
//===----------------------------------------------------------------------===//
/// \brief Note that we have loaded the declaration with the given
/// Index.
///
/// This routine notes that this declaration has already been loaded,
/// so that future GetDecl calls will return this declaration rather
/// than trying to load a new declaration.
inline void ASTReader::LoadedDecl(unsigned Index, Decl *D) {
assert(!DeclsLoaded[Index] && "Decl loaded twice?");
DeclsLoaded[Index] = D;
}
/// \brief Determine whether the consumer will be interested in seeing
/// this declaration (via HandleTopLevelDecl).
///
/// This routine should return true for anything that might affect
/// code generation, e.g., inline function definitions, Objective-C
/// declarations with metadata, etc.
static bool isConsumerInterestedIn(ASTContext &Ctx, Decl *D, bool HasBody) {
// An ObjCMethodDecl is never considered as "interesting" because its
// implementation container always is.
// An ImportDecl or VarDecl imported from a module will get emitted when
// we import the relevant module.
if ((isa<ImportDecl>(D) || isa<VarDecl>(D)) && D->getImportedOwningModule() &&
Ctx.DeclMustBeEmitted(D))
return false;
if (isa<FileScopeAsmDecl>(D) ||
isa<ObjCProtocolDecl>(D) ||
isa<ObjCImplDecl>(D) ||
isa<ImportDecl>(D) ||
isa<PragmaCommentDecl>(D) ||
isa<PragmaDetectMismatchDecl>(D))
return true;
if (isa<OMPThreadPrivateDecl>(D) || isa<OMPDeclareReductionDecl>(D))
return !D->getDeclContext()->isFunctionOrMethod();
if (VarDecl *Var = dyn_cast<VarDecl>(D))
return Var->isFileVarDecl() &&
Var->isThisDeclarationADefinition() == VarDecl::Definition;
if (FunctionDecl *Func = dyn_cast<FunctionDecl>(D))
return Func->doesThisDeclarationHaveABody() || HasBody;
if (auto *ES = D->getASTContext().getExternalSource())
if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never)
return true;
return false;
}
/// \brief Get the correct cursor and offset for loading a declaration.
ASTReader::RecordLocation
ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) {
GlobalDeclMapType::iterator I = GlobalDeclMap.find(ID);
assert(I != GlobalDeclMap.end() && "Corrupted global declaration map");
ModuleFile *M = I->second;
const DeclOffset &DOffs =
M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
Loc = TranslateSourceLocation(*M, DOffs.getLocation());
return RecordLocation(M, DOffs.BitOffset);
}
ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) {
ContinuousRangeMap<uint64_t, ModuleFile*, 4>::iterator I
= GlobalBitOffsetsMap.find(GlobalOffset);
assert(I != GlobalBitOffsetsMap.end() && "Corrupted global bit offsets map");
return RecordLocation(I->second, GlobalOffset - I->second->GlobalBitOffset);
}
uint64_t ASTReader::getGlobalBitOffset(ModuleFile &M, uint32_t LocalOffset) {
return LocalOffset + M.GlobalBitOffset;
}
static bool isSameTemplateParameterList(const TemplateParameterList *X,
const TemplateParameterList *Y);
/// \brief Determine whether two template parameters are similar enough
/// that they may be used in declarations of the same template.
static bool isSameTemplateParameter(const NamedDecl *X,
const NamedDecl *Y) {
if (X->getKind() != Y->getKind())
return false;
if (const TemplateTypeParmDecl *TX = dyn_cast<TemplateTypeParmDecl>(X)) {
const TemplateTypeParmDecl *TY = cast<TemplateTypeParmDecl>(Y);
return TX->isParameterPack() == TY->isParameterPack();
}
if (const NonTypeTemplateParmDecl *TX = dyn_cast<NonTypeTemplateParmDecl>(X)) {
const NonTypeTemplateParmDecl *TY = cast<NonTypeTemplateParmDecl>(Y);
return TX->isParameterPack() == TY->isParameterPack() &&
TX->getASTContext().hasSameType(TX->getType(), TY->getType());
}
const TemplateTemplateParmDecl *TX = cast<TemplateTemplateParmDecl>(X);
const TemplateTemplateParmDecl *TY = cast<TemplateTemplateParmDecl>(Y);
return TX->isParameterPack() == TY->isParameterPack() &&
isSameTemplateParameterList(TX->getTemplateParameters(),
TY->getTemplateParameters());
}
static NamespaceDecl *getNamespace(const NestedNameSpecifier *X) {
if (auto *NS = X->getAsNamespace())
return NS;
if (auto *NAS = X->getAsNamespaceAlias())
return NAS->getNamespace();
return nullptr;
}
static bool isSameQualifier(const NestedNameSpecifier *X,
const NestedNameSpecifier *Y) {
if (auto *NSX = getNamespace(X)) {
auto *NSY = getNamespace(Y);
if (!NSY || NSX->getCanonicalDecl() != NSY->getCanonicalDecl())
return false;
} else if (X->getKind() != Y->getKind())
return false;
// FIXME: For namespaces and types, we're permitted to check that the entity
// is named via the same tokens. We should probably do so.
switch (X->getKind()) {
case NestedNameSpecifier::Identifier:
if (X->getAsIdentifier() != Y->getAsIdentifier())
return false;
break;
case NestedNameSpecifier::Namespace:
case NestedNameSpecifier::NamespaceAlias:
// We've already checked that we named the same namespace.
break;
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate:
if (X->getAsType()->getCanonicalTypeInternal() !=
Y->getAsType()->getCanonicalTypeInternal())
return false;
break;
case NestedNameSpecifier::Global:
case NestedNameSpecifier::Super:
return true;
}
// Recurse into earlier portion of NNS, if any.
auto *PX = X->getPrefix();
auto *PY = Y->getPrefix();
if (PX && PY)
return isSameQualifier(PX, PY);
return !PX && !PY;
}
/// \brief Determine whether two template parameter lists are similar enough
/// that they may be used in declarations of the same template.
static bool isSameTemplateParameterList(const TemplateParameterList *X,
const TemplateParameterList *Y) {
if (X->size() != Y->size())
return false;
for (unsigned I = 0, N = X->size(); I != N; ++I)
if (!isSameTemplateParameter(X->getParam(I), Y->getParam(I)))
return false;
return true;
}
/// Determine whether the attributes we can overload on are identical for A and
/// B. Will ignore any overloadable attrs represented in the type of A and B.
static bool hasSameOverloadableAttrs(const FunctionDecl *A,
const FunctionDecl *B) {
// Note that pass_object_size attributes are represented in the function's
// ExtParameterInfo, so we don't need to check them here.
SmallVector<const EnableIfAttr *, 4> AEnableIfs;
// Since this is an equality check, we can ignore that enable_if attrs show up
// in reverse order.
for (const auto *EIA : A->specific_attrs<EnableIfAttr>())
AEnableIfs.push_back(EIA);
SmallVector<const EnableIfAttr *, 4> BEnableIfs;
for (const auto *EIA : B->specific_attrs<EnableIfAttr>())
BEnableIfs.push_back(EIA);
// Two very common cases: either we have 0 enable_if attrs, or we have an
// unequal number of enable_if attrs.
if (AEnableIfs.empty() && BEnableIfs.empty())
return true;
if (AEnableIfs.size() != BEnableIfs.size())
return false;
llvm::FoldingSetNodeID Cand1ID, Cand2ID;
for (unsigned I = 0, E = AEnableIfs.size(); I != E; ++I) {
Cand1ID.clear();
Cand2ID.clear();
AEnableIfs[I]->getCond()->Profile(Cand1ID, A->getASTContext(), true);
BEnableIfs[I]->getCond()->Profile(Cand2ID, B->getASTContext(), true);
if (Cand1ID != Cand2ID)
return false;
}
return true;
}
/// \brief Determine whether the two declarations refer to the same entity.
static bool isSameEntity(NamedDecl *X, NamedDecl *Y) {
assert(X->getDeclName() == Y->getDeclName() && "Declaration name mismatch!");
if (X == Y)
return true;
// Must be in the same context.
if (!X->getDeclContext()->getRedeclContext()->Equals(
Y->getDeclContext()->getRedeclContext()))
return false;
// Two typedefs refer to the same entity if they have the same underlying
// type.
if (TypedefNameDecl *TypedefX = dyn_cast<TypedefNameDecl>(X))
if (TypedefNameDecl *TypedefY = dyn_cast<TypedefNameDecl>(Y))
return X->getASTContext().hasSameType(TypedefX->getUnderlyingType(),
TypedefY->getUnderlyingType());
// Must have the same kind.
if (X->getKind() != Y->getKind())
return false;
// Objective-C classes and protocols with the same name always match.
if (isa<ObjCInterfaceDecl>(X) || isa<ObjCProtocolDecl>(X))
return true;
if (isa<ClassTemplateSpecializationDecl>(X)) {
// No need to handle these here: we merge them when adding them to the
// template.
return false;
}
// Compatible tags match.
if (TagDecl *TagX = dyn_cast<TagDecl>(X)) {
TagDecl *TagY = cast<TagDecl>(Y);
return (TagX->getTagKind() == TagY->getTagKind()) ||
((TagX->getTagKind() == TTK_Struct || TagX->getTagKind() == TTK_Class ||
TagX->getTagKind() == TTK_Interface) &&
(TagY->getTagKind() == TTK_Struct || TagY->getTagKind() == TTK_Class ||
TagY->getTagKind() == TTK_Interface));
}
// Functions with the same type and linkage match.
// FIXME: This needs to cope with merging of prototyped/non-prototyped
// functions, etc.
if (FunctionDecl *FuncX = dyn_cast<FunctionDecl>(X)) {
FunctionDecl *FuncY = cast<FunctionDecl>(Y);
if (CXXConstructorDecl *CtorX = dyn_cast<CXXConstructorDecl>(X)) {
CXXConstructorDecl *CtorY = cast<CXXConstructorDecl>(Y);
if (CtorX->getInheritedConstructor() &&
!isSameEntity(CtorX->getInheritedConstructor().getConstructor(),
CtorY->getInheritedConstructor().getConstructor()))
return false;
}
ASTContext &C = FuncX->getASTContext();
if (!C.hasSameType(FuncX->getType(), FuncY->getType())) {
// We can get functions with different types on the redecl chain in C++17
// if they have differing exception specifications and at least one of
// the excpetion specs is unresolved.
// FIXME: Do we need to check for C++14 deduced return types here too?
auto *XFPT = FuncX->getType()->getAs<FunctionProtoType>();
auto *YFPT = FuncY->getType()->getAs<FunctionProtoType>();
if (C.getLangOpts().CPlusPlus1z && XFPT && YFPT &&
(isUnresolvedExceptionSpec(XFPT->getExceptionSpecType()) ||
isUnresolvedExceptionSpec(YFPT->getExceptionSpecType())) &&
C.hasSameFunctionTypeIgnoringExceptionSpec(FuncX->getType(),
FuncY->getType()))
return true;
return false;
}
return FuncX->getLinkageInternal() == FuncY->getLinkageInternal() &&
hasSameOverloadableAttrs(FuncX, FuncY);
}
// Variables with the same type and linkage match.
if (VarDecl *VarX = dyn_cast<VarDecl>(X)) {
VarDecl *VarY = cast<VarDecl>(Y);
if (VarX->getLinkageInternal() == VarY->getLinkageInternal()) {
ASTContext &C = VarX->getASTContext();
if (C.hasSameType(VarX->getType(), VarY->getType()))
return true;
// We can get decls with different types on the redecl chain. Eg.
// template <typename T> struct S { static T Var[]; }; // #1
// template <typename T> T S<T>::Var[sizeof(T)]; // #2
// Only? happens when completing an incomplete array type. In this case
// when comparing #1 and #2 we should go through their element type.
const ArrayType *VarXTy = C.getAsArrayType(VarX->getType());
const ArrayType *VarYTy = C.getAsArrayType(VarY->getType());
if (!VarXTy || !VarYTy)
return false;
if (VarXTy->isIncompleteArrayType() || VarYTy->isIncompleteArrayType())
return C.hasSameType(VarXTy->getElementType(), VarYTy->getElementType());
}
return false;
}
// Namespaces with the same name and inlinedness match.
if (NamespaceDecl *NamespaceX = dyn_cast<NamespaceDecl>(X)) {
NamespaceDecl *NamespaceY = cast<NamespaceDecl>(Y);
return NamespaceX->isInline() == NamespaceY->isInline();
}
// Identical template names and kinds match if their template parameter lists
// and patterns match.
if (TemplateDecl *TemplateX = dyn_cast<TemplateDecl>(X)) {
TemplateDecl *TemplateY = cast<TemplateDecl>(Y);
return isSameEntity(TemplateX->getTemplatedDecl(),
TemplateY->getTemplatedDecl()) &&
isSameTemplateParameterList(TemplateX->getTemplateParameters(),
TemplateY->getTemplateParameters());
}
// Fields with the same name and the same type match.
if (FieldDecl *FDX = dyn_cast<FieldDecl>(X)) {
FieldDecl *FDY = cast<FieldDecl>(Y);
// FIXME: Also check the bitwidth is odr-equivalent, if any.
return X->getASTContext().hasSameType(FDX->getType(), FDY->getType());
}
// Indirect fields with the same target field match.
if (auto *IFDX = dyn_cast<IndirectFieldDecl>(X)) {
auto *IFDY = cast<IndirectFieldDecl>(Y);
return IFDX->getAnonField()->getCanonicalDecl() ==
IFDY->getAnonField()->getCanonicalDecl();
}
// Enumerators with the same name match.
if (isa<EnumConstantDecl>(X))
// FIXME: Also check the value is odr-equivalent.
return true;
// Using shadow declarations with the same target match.
if (UsingShadowDecl *USX = dyn_cast<UsingShadowDecl>(X)) {
UsingShadowDecl *USY = cast<UsingShadowDecl>(Y);
return USX->getTargetDecl() == USY->getTargetDecl();
}
// Using declarations with the same qualifier match. (We already know that
// the name matches.)
if (auto *UX = dyn_cast<UsingDecl>(X)) {
auto *UY = cast<UsingDecl>(Y);
return isSameQualifier(UX->getQualifier(), UY->getQualifier()) &&
UX->hasTypename() == UY->hasTypename() &&
UX->isAccessDeclaration() == UY->isAccessDeclaration();
}
if (auto *UX = dyn_cast<UnresolvedUsingValueDecl>(X)) {
auto *UY = cast<UnresolvedUsingValueDecl>(Y);
return isSameQualifier(UX->getQualifier(), UY->getQualifier()) &&
UX->isAccessDeclaration() == UY->isAccessDeclaration();
}
if (auto *UX = dyn_cast<UnresolvedUsingTypenameDecl>(X))
return isSameQualifier(
UX->getQualifier(),
cast<UnresolvedUsingTypenameDecl>(Y)->getQualifier());
// Namespace alias definitions with the same target match.
if (auto *NAX = dyn_cast<NamespaceAliasDecl>(X)) {
auto *NAY = cast<NamespaceAliasDecl>(Y);
return NAX->getNamespace()->Equals(NAY->getNamespace());
}
return false;
}
/// Find the context in which we should search for previous declarations when
/// looking for declarations to merge.
DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader,
DeclContext *DC) {
if (NamespaceDecl *ND = dyn_cast<NamespaceDecl>(DC))
return ND->getOriginalNamespace();
if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(DC)) {
// Try to dig out the definition.
auto *DD = RD->DefinitionData;
if (!DD)
DD = RD->getCanonicalDecl()->DefinitionData;
// If there's no definition yet, then DC's definition is added by an update
// record, but we've not yet loaded that update record. In this case, we
// commit to DC being the canonical definition now, and will fix this when
// we load the update record.
if (!DD) {
DD = new (Reader.getContext()) struct CXXRecordDecl::DefinitionData(RD);
RD->IsCompleteDefinition = true;
RD->DefinitionData = DD;
RD->getCanonicalDecl()->DefinitionData = DD;
// Track that we did this horrible thing so that we can fix it later.
Reader.PendingFakeDefinitionData.insert(
std::make_pair(DD, ASTReader::PendingFakeDefinitionKind::Fake));
}
return DD->Definition;
}
if (EnumDecl *ED = dyn_cast<EnumDecl>(DC))
return ED->getASTContext().getLangOpts().CPlusPlus? ED->getDefinition()
: nullptr;
// We can see the TU here only if we have no Sema object. In that case,
// there's no TU scope to look in, so using the DC alone is sufficient.
if (auto *TU = dyn_cast<TranslationUnitDecl>(DC))
return TU;
return nullptr;
}
ASTDeclReader::FindExistingResult::~FindExistingResult() {
// Record that we had a typedef name for linkage whether or not we merge
// with that declaration.
if (TypedefNameForLinkage) {
DeclContext *DC = New->getDeclContext()->getRedeclContext();
Reader.ImportedTypedefNamesForLinkage.insert(
std::make_pair(std::make_pair(DC, TypedefNameForLinkage), New));
return;
}
if (!AddResult || Existing)
return;
DeclarationName Name = New->getDeclName();
DeclContext *DC = New->getDeclContext()->getRedeclContext();
if (needsAnonymousDeclarationNumber(New)) {
setAnonymousDeclForMerging(Reader, New->getLexicalDeclContext(),
AnonymousDeclNumber, New);
} else if (DC->isTranslationUnit() &&
!Reader.getContext().getLangOpts().CPlusPlus) {
if (Reader.getIdResolver().tryAddTopLevelDecl(New, Name))
Reader.PendingFakeLookupResults[Name.getAsIdentifierInfo()]
.push_back(New);
} else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) {
// Add the declaration to its redeclaration context so later merging
// lookups will find it.
MergeDC->makeDeclVisibleInContextImpl(New, /*Internal*/true);
}
}
/// Find the declaration that should be merged into, given the declaration found
/// by name lookup. If we're merging an anonymous declaration within a typedef,
/// we need a matching typedef, and we merge with the type inside it.
static NamedDecl *getDeclForMerging(NamedDecl *Found,
bool IsTypedefNameForLinkage) {
if (!IsTypedefNameForLinkage)
return Found;
// If we found a typedef declaration that gives a name to some other
// declaration, then we want that inner declaration. Declarations from
// AST files are handled via ImportedTypedefNamesForLinkage.
if (Found->isFromASTFile())
return nullptr;
if (auto *TND = dyn_cast<TypedefNameDecl>(Found))
return TND->getAnonDeclWithTypedefName(/*AnyRedecl*/true);
return nullptr;
}
NamedDecl *ASTDeclReader::getAnonymousDeclForMerging(ASTReader &Reader,
DeclContext *DC,
unsigned Index) {
// If the lexical context has been merged, look into the now-canonical
// definition.
if (auto *Merged = Reader.MergedDeclContexts.lookup(DC))
DC = Merged;
// If we've seen this before, return the canonical declaration.
auto &Previous = Reader.AnonymousDeclarationsForMerging[DC];
if (Index < Previous.size() && Previous[Index])
return Previous[Index];
// If this is the first time, but we have parsed a declaration of the context,
// build the anonymous declaration list from the parsed declaration.
if (!cast<Decl>(DC)->isFromASTFile()) {
numberAnonymousDeclsWithin(DC, [&](NamedDecl *ND, unsigned Number) {
if (Previous.size() == Number)
Previous.push_back(cast<NamedDecl>(ND->getCanonicalDecl()));
else
Previous[Number] = cast<NamedDecl>(ND->getCanonicalDecl());
});
}
return Index < Previous.size() ? Previous[Index] : nullptr;
}
void ASTDeclReader::setAnonymousDeclForMerging(ASTReader &Reader,
DeclContext *DC, unsigned Index,
NamedDecl *D) {
if (auto *Merged = Reader.MergedDeclContexts.lookup(DC))
DC = Merged;
auto &Previous = Reader.AnonymousDeclarationsForMerging[DC];
if (Index >= Previous.size())
Previous.resize(Index + 1);
if (!Previous[Index])
Previous[Index] = D;
}
ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
DeclarationName Name = TypedefNameForLinkage ? TypedefNameForLinkage
: D->getDeclName();
if (!Name && !needsAnonymousDeclarationNumber(D)) {
// Don't bother trying to find unnamed declarations that are in
// unmergeable contexts.
FindExistingResult Result(Reader, D, /*Existing=*/nullptr,
AnonymousDeclNumber, TypedefNameForLinkage);
Result.suppress();
return Result;
}
DeclContext *DC = D->getDeclContext()->getRedeclContext();
if (TypedefNameForLinkage) {
auto It = Reader.ImportedTypedefNamesForLinkage.find(
std::make_pair(DC, TypedefNameForLinkage));
if (It != Reader.ImportedTypedefNamesForLinkage.end())
if (isSameEntity(It->second, D))
return FindExistingResult(Reader, D, It->second, AnonymousDeclNumber,
TypedefNameForLinkage);
// Go on to check in other places in case an existing typedef name
// was not imported.
}
if (needsAnonymousDeclarationNumber(D)) {
// This is an anonymous declaration that we may need to merge. Look it up
// in its context by number.
if (auto *Existing = getAnonymousDeclForMerging(
Reader, D->getLexicalDeclContext(), AnonymousDeclNumber))
if (isSameEntity(Existing, D))
return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
TypedefNameForLinkage);
} else if (DC->isTranslationUnit() &&
!Reader.getContext().getLangOpts().CPlusPlus) {
IdentifierResolver &IdResolver = Reader.getIdResolver();
// Temporarily consider the identifier to be up-to-date. We don't want to
// cause additional lookups here.
class UpToDateIdentifierRAII {
IdentifierInfo *II;
bool WasOutToDate;
public:
explicit UpToDateIdentifierRAII(IdentifierInfo *II)
: II(II), WasOutToDate(false)
{
if (II) {
WasOutToDate = II->isOutOfDate();
if (WasOutToDate)
II->setOutOfDate(false);
}
}
~UpToDateIdentifierRAII() {
if (WasOutToDate)
II->setOutOfDate(true);
}
} UpToDate(Name.getAsIdentifierInfo());
for (IdentifierResolver::iterator I = IdResolver.begin(Name),
IEnd = IdResolver.end();
I != IEnd; ++I) {
if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage))
if (isSameEntity(Existing, D))
return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
TypedefNameForLinkage);
}
} else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) {
DeclContext::lookup_result R = MergeDC->noload_lookup(Name);
for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) {
if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage))
if (isSameEntity(Existing, D))
return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
TypedefNameForLinkage);
}
} else {
// Not in a mergeable context.
return FindExistingResult(Reader);
}
// If this declaration is from a merged context, make a note that we need to
// check that the canonical definition of that context contains the decl.
//
// FIXME: We should do something similar if we merge two definitions of the
// same template specialization into the same CXXRecordDecl.
auto MergedDCIt = Reader.MergedDeclContexts.find(D->getLexicalDeclContext());
if (MergedDCIt != Reader.MergedDeclContexts.end() &&
MergedDCIt->second == D->getDeclContext())
Reader.PendingOdrMergeChecks.push_back(D);
return FindExistingResult(Reader, D, /*Existing=*/nullptr,
AnonymousDeclNumber, TypedefNameForLinkage);
}
template<typename DeclT>
Decl *ASTDeclReader::getMostRecentDeclImpl(Redeclarable<DeclT> *D) {
return D->RedeclLink.getLatestNotUpdated();
}
Decl *ASTDeclReader::getMostRecentDeclImpl(...) {
llvm_unreachable("getMostRecentDecl on non-redeclarable declaration");
}
Decl *ASTDeclReader::getMostRecentDecl(Decl *D) {
assert(D);
switch (D->getKind()) {
#define ABSTRACT_DECL(TYPE)
#define DECL(TYPE, BASE) \
case Decl::TYPE: \
return getMostRecentDeclImpl(cast<TYPE##Decl>(D));
#include "clang/AST/DeclNodes.inc"
}
llvm_unreachable("unknown decl kind");
}
Decl *ASTReader::getMostRecentExistingDecl(Decl *D) {
return ASTDeclReader::getMostRecentDecl(D->getCanonicalDecl());
}
template<typename DeclT>
void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
Redeclarable<DeclT> *D,
Decl *Previous, Decl *Canon) {
D->RedeclLink.setPrevious(cast<DeclT>(Previous));
D->First = cast<DeclT>(Previous)->First;
}
namespace clang {
template<>
void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
Redeclarable<VarDecl> *D,
Decl *Previous, Decl *Canon) {
VarDecl *VD = static_cast<VarDecl*>(D);
VarDecl *PrevVD = cast<VarDecl>(Previous);
D->RedeclLink.setPrevious(PrevVD);
D->First = PrevVD->First;
// We should keep at most one definition on the chain.
// FIXME: Cache the definition once we've found it. Building a chain with
// N definitions currently takes O(N^2) time here.
if (VD->isThisDeclarationADefinition() == VarDecl::Definition) {
for (VarDecl *CurD = PrevVD; CurD; CurD = CurD->getPreviousDecl()) {
if (CurD->isThisDeclarationADefinition() == VarDecl::Definition) {
Reader.mergeDefinitionVisibility(CurD, VD);
VD->demoteThisDefinitionToDeclaration();
break;
}
}
}
}
template<>
void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
Redeclarable<FunctionDecl> *D,
Decl *Previous, Decl *Canon) {
FunctionDecl *FD = static_cast<FunctionDecl*>(D);
FunctionDecl *PrevFD = cast<FunctionDecl>(Previous);
FD->RedeclLink.setPrevious(PrevFD);
FD->First = PrevFD->First;
// If the previous declaration is an inline function declaration, then this
// declaration is too.
if (PrevFD->IsInline != FD->IsInline) {
// FIXME: [dcl.fct.spec]p4:
// If a function with external linkage is declared inline in one
// translation unit, it shall be declared inline in all translation
// units in which it appears.
//
// Be careful of this case:
//
// module A:
// template<typename T> struct X { void f(); };
// template<typename T> inline void X<T>::f() {}
//
// module B instantiates the declaration of X<int>::f
// module C instantiates the definition of X<int>::f
//
// If module B and C are merged, we do not have a violation of this rule.
FD->IsInline = true;
}
// If we need to propagate an exception specification along the redecl
// chain, make a note of that so that we can do so later.
auto *FPT = FD->getType()->getAs<FunctionProtoType>();
auto *PrevFPT = PrevFD->getType()->getAs<FunctionProtoType>();
if (FPT && PrevFPT) {
bool IsUnresolved = isUnresolvedExceptionSpec(FPT->getExceptionSpecType());
bool WasUnresolved =
isUnresolvedExceptionSpec(PrevFPT->getExceptionSpecType());
if (IsUnresolved != WasUnresolved)
Reader.PendingExceptionSpecUpdates.insert(
std::make_pair(Canon, IsUnresolved ? PrevFD : FD));
}
}
} // end namespace clang
void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, ...) {
llvm_unreachable("attachPreviousDecl on non-redeclarable declaration");
}
/// Inherit the default template argument from \p From to \p To. Returns
/// \c false if there is no default template for \p From.
template <typename ParmDecl>
static bool inheritDefaultTemplateArgument(ASTContext &Context, ParmDecl *From,
Decl *ToD) {
auto *To = cast<ParmDecl>(ToD);
if (!From->hasDefaultArgument())
return false;
To->setInheritedDefaultArgument(Context, From);
return true;
}
static void inheritDefaultTemplateArguments(ASTContext &Context,
TemplateDecl *From,
TemplateDecl *To) {
auto *FromTP = From->getTemplateParameters();
auto *ToTP = To->getTemplateParameters();
assert(FromTP->size() == ToTP->size() && "merged mismatched templates?");
for (unsigned I = 0, N = FromTP->size(); I != N; ++I) {
NamedDecl *FromParam = FromTP->getParam(N - I - 1);
if (FromParam->isParameterPack())
continue;
NamedDecl *ToParam = ToTP->getParam(N - I - 1);
if (auto *FTTP = dyn_cast<TemplateTypeParmDecl>(FromParam)) {
if (!inheritDefaultTemplateArgument(Context, FTTP, ToParam))
break;
} else if (auto *FNTTP = dyn_cast<NonTypeTemplateParmDecl>(FromParam)) {
if (!inheritDefaultTemplateArgument(Context, FNTTP, ToParam))
break;
} else {
if (!inheritDefaultTemplateArgument(
Context, cast<TemplateTemplateParmDecl>(FromParam), ToParam))
break;
}
}
}
void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D,
Decl *Previous, Decl *Canon) {
assert(D && Previous);
switch (D->getKind()) {
#define ABSTRACT_DECL(TYPE)
#define DECL(TYPE, BASE) \
case Decl::TYPE: \
attachPreviousDeclImpl(Reader, cast<TYPE##Decl>(D), Previous, Canon); \
break;
#include "clang/AST/DeclNodes.inc"
}
// If the declaration was visible in one module, a redeclaration of it in
// another module remains visible even if it wouldn't be visible by itself.
//
// FIXME: In this case, the declaration should only be visible if a module
// that makes it visible has been imported.
D->IdentifierNamespace |=
Previous->IdentifierNamespace &
(Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Type);
// If the declaration declares a template, it may inherit default arguments
// from the previous declaration.
if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
inheritDefaultTemplateArguments(Reader.getContext(),
cast<TemplateDecl>(Previous), TD);
}
template<typename DeclT>
void ASTDeclReader::attachLatestDeclImpl(Redeclarable<DeclT> *D, Decl *Latest) {
D->RedeclLink.setLatest(cast<DeclT>(Latest));
}
void ASTDeclReader::attachLatestDeclImpl(...) {
llvm_unreachable("attachLatestDecl on non-redeclarable declaration");
}
void ASTDeclReader::attachLatestDecl(Decl *D, Decl *Latest) {
assert(D && Latest);
switch (D->getKind()) {
#define ABSTRACT_DECL(TYPE)
#define DECL(TYPE, BASE) \
case Decl::TYPE: \
attachLatestDeclImpl(cast<TYPE##Decl>(D), Latest); \
break;
#include "clang/AST/DeclNodes.inc"
}
}
template<typename DeclT>
void ASTDeclReader::markIncompleteDeclChainImpl(Redeclarable<DeclT> *D) {
D->RedeclLink.markIncomplete();
}
void ASTDeclReader::markIncompleteDeclChainImpl(...) {
llvm_unreachable("markIncompleteDeclChain on non-redeclarable declaration");
}
void ASTReader::markIncompleteDeclChain(Decl *D) {
switch (D->getKind()) {
#define ABSTRACT_DECL(TYPE)
#define DECL(TYPE, BASE) \
case Decl::TYPE: \
ASTDeclReader::markIncompleteDeclChainImpl(cast<TYPE##Decl>(D)); \
break;
#include "clang/AST/DeclNodes.inc"
}
}
/// \brief Read the declaration at the given offset from the AST file.
Decl *ASTReader::ReadDeclRecord(DeclID ID) {
unsigned Index = ID - NUM_PREDEF_DECL_IDS;
SourceLocation DeclLoc;
RecordLocation Loc = DeclCursorForID(ID, DeclLoc);
llvm::BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor;
// Keep track of where we are in the stream, then jump back there
// after reading this declaration.
SavedStreamPosition SavedPosition(DeclsCursor);
ReadingKindTracker ReadingKind(Read_Decl, *this);
// Note that we are loading a declaration record.
Deserializing ADecl(this);
DeclsCursor.JumpToBit(Loc.Offset);
ASTRecordReader Record(*this, *Loc.F);
ASTDeclReader Reader(*this, Record, Loc, ID, DeclLoc);
unsigned Code = DeclsCursor.ReadCode();
ASTContext &Context = getContext();
Decl *D = nullptr;
switch ((DeclCode)Record.readRecord(DeclsCursor, Code)) {
case DECL_CONTEXT_LEXICAL:
case DECL_CONTEXT_VISIBLE:
llvm_unreachable("Record cannot be de-serialized with ReadDeclRecord");
case DECL_TYPEDEF:
D = TypedefDecl::CreateDeserialized(Context, ID);
break;
case DECL_TYPEALIAS:
D = TypeAliasDecl::CreateDeserialized(Context, ID);
break;
case DECL_ENUM:
D = EnumDecl::CreateDeserialized(Context, ID);
break;
case DECL_RECORD:
D = RecordDecl::CreateDeserialized(Context, ID);
break;
case DECL_ENUM_CONSTANT:
D = EnumConstantDecl::CreateDeserialized(Context, ID);
break;
case DECL_FUNCTION:
D = FunctionDecl::CreateDeserialized(Context, ID);
break;
case DECL_LINKAGE_SPEC:
D = LinkageSpecDecl::CreateDeserialized(Context, ID);
break;
case DECL_EXPORT:
D = ExportDecl::CreateDeserialized(Context, ID);
break;
case DECL_LABEL:
D = LabelDecl::CreateDeserialized(Context, ID);
break;
case DECL_NAMESPACE:
D = NamespaceDecl::CreateDeserialized(Context, ID);
break;
case DECL_NAMESPACE_ALIAS:
D = NamespaceAliasDecl::CreateDeserialized(Context, ID);
break;
case DECL_USING:
D = UsingDecl::CreateDeserialized(Context, ID);
break;
case DECL_USING_PACK:
D = UsingPackDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_USING_SHADOW:
D = UsingShadowDecl::CreateDeserialized(Context, ID);
break;
case DECL_CONSTRUCTOR_USING_SHADOW:
D = ConstructorUsingShadowDecl::CreateDeserialized(Context, ID);
break;
case DECL_USING_DIRECTIVE:
D = UsingDirectiveDecl::CreateDeserialized(Context, ID);
break;
case DECL_UNRESOLVED_USING_VALUE:
D = UnresolvedUsingValueDecl::CreateDeserialized(Context, ID);
break;
case DECL_UNRESOLVED_USING_TYPENAME:
D = UnresolvedUsingTypenameDecl::CreateDeserialized(Context, ID);
break;
case DECL_CXX_RECORD:
D = CXXRecordDecl::CreateDeserialized(Context, ID);
break;
case DECL_CXX_DEDUCTION_GUIDE:
D = CXXDeductionGuideDecl::CreateDeserialized(Context, ID);
break;
case DECL_CXX_METHOD:
D = CXXMethodDecl::CreateDeserialized(Context, ID);
break;
case DECL_CXX_CONSTRUCTOR:
D = CXXConstructorDecl::CreateDeserialized(Context, ID, false);
break;
case DECL_CXX_INHERITED_CONSTRUCTOR:
D = CXXConstructorDecl::CreateDeserialized(Context, ID, true);
break;
case DECL_CXX_DESTRUCTOR:
D = CXXDestructorDecl::CreateDeserialized(Context, ID);
break;
case DECL_CXX_CONVERSION:
D = CXXConversionDecl::CreateDeserialized(Context, ID);
break;
case DECL_ACCESS_SPEC:
D = AccessSpecDecl::CreateDeserialized(Context, ID);
break;
case DECL_FRIEND:
D = FriendDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_FRIEND_TEMPLATE:
D = FriendTemplateDecl::CreateDeserialized(Context, ID);
break;
case DECL_CLASS_TEMPLATE:
D = ClassTemplateDecl::CreateDeserialized(Context, ID);
break;
case DECL_CLASS_TEMPLATE_SPECIALIZATION:
D = ClassTemplateSpecializationDecl::CreateDeserialized(Context, ID);
break;
case DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION:
D = ClassTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID);
break;
case DECL_VAR_TEMPLATE:
D = VarTemplateDecl::CreateDeserialized(Context, ID);
break;
case DECL_VAR_TEMPLATE_SPECIALIZATION:
D = VarTemplateSpecializationDecl::CreateDeserialized(Context, ID);
break;
case DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION:
D = VarTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID);
break;
case DECL_CLASS_SCOPE_FUNCTION_SPECIALIZATION:
D = ClassScopeFunctionSpecializationDecl::CreateDeserialized(Context, ID);
break;
case DECL_FUNCTION_TEMPLATE:
D = FunctionTemplateDecl::CreateDeserialized(Context, ID);
break;
case DECL_TEMPLATE_TYPE_PARM:
D = TemplateTypeParmDecl::CreateDeserialized(Context, ID);
break;
case DECL_NON_TYPE_TEMPLATE_PARM:
D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID);
break;
case DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK:
D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID,
Record.readInt());
break;
case DECL_TEMPLATE_TEMPLATE_PARM:
D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID);
break;
case DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK:
D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID,
Record.readInt());
break;
case DECL_TYPE_ALIAS_TEMPLATE:
D = TypeAliasTemplateDecl::CreateDeserialized(Context, ID);
break;
case DECL_STATIC_ASSERT:
D = StaticAssertDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_METHOD:
D = ObjCMethodDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_INTERFACE:
D = ObjCInterfaceDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_IVAR:
D = ObjCIvarDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_PROTOCOL:
D = ObjCProtocolDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_AT_DEFS_FIELD:
D = ObjCAtDefsFieldDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_CATEGORY:
D = ObjCCategoryDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_CATEGORY_IMPL:
D = ObjCCategoryImplDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_IMPLEMENTATION:
D = ObjCImplementationDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_COMPATIBLE_ALIAS:
D = ObjCCompatibleAliasDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_PROPERTY:
D = ObjCPropertyDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_PROPERTY_IMPL:
D = ObjCPropertyImplDecl::CreateDeserialized(Context, ID);
break;
case DECL_FIELD:
D = FieldDecl::CreateDeserialized(Context, ID);
break;
case DECL_INDIRECTFIELD:
D = IndirectFieldDecl::CreateDeserialized(Context, ID);
break;
case DECL_VAR:
D = VarDecl::CreateDeserialized(Context, ID);
break;
case DECL_IMPLICIT_PARAM:
D = ImplicitParamDecl::CreateDeserialized(Context, ID);
break;
case DECL_PARM_VAR:
D = ParmVarDecl::CreateDeserialized(Context, ID);
break;
case DECL_DECOMPOSITION:
D = DecompositionDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_BINDING:
D = BindingDecl::CreateDeserialized(Context, ID);
break;
case DECL_FILE_SCOPE_ASM:
D = FileScopeAsmDecl::CreateDeserialized(Context, ID);
break;
case DECL_BLOCK:
D = BlockDecl::CreateDeserialized(Context, ID);
break;
case DECL_MS_PROPERTY:
D = MSPropertyDecl::CreateDeserialized(Context, ID);
break;
case DECL_CAPTURED:
D = CapturedDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_CXX_BASE_SPECIFIERS:
Error("attempt to read a C++ base-specifier record as a declaration");
return nullptr;
case DECL_CXX_CTOR_INITIALIZERS:
Error("attempt to read a C++ ctor initializer record as a declaration");
return nullptr;
case DECL_IMPORT:
// Note: last entry of the ImportDecl record is the number of stored source
// locations.
D = ImportDecl::CreateDeserialized(Context, ID, Record.back());
break;
case DECL_OMP_THREADPRIVATE:
D = OMPThreadPrivateDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_OMP_DECLARE_REDUCTION:
D = OMPDeclareReductionDecl::CreateDeserialized(Context, ID);
break;
case DECL_OMP_CAPTUREDEXPR:
D = OMPCapturedExprDecl::CreateDeserialized(Context, ID);
break;
case DECL_PRAGMA_COMMENT:
D = PragmaCommentDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_PRAGMA_DETECT_MISMATCH:
D = PragmaDetectMismatchDecl::CreateDeserialized(Context, ID,
Record.readInt());
break;
case DECL_EMPTY:
D = EmptyDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_TYPE_PARAM:
D = ObjCTypeParamDecl::CreateDeserialized(Context, ID);
break;
}
assert(D && "Unknown declaration reading AST file");
LoadedDecl(Index, D);
// Set the DeclContext before doing any deserialization, to make sure internal
// calls to Decl::getASTContext() by Decl's methods will find the
// TranslationUnitDecl without crashing.
D->setDeclContext(Context.getTranslationUnitDecl());
Reader.Visit(D);
// If this declaration is also a declaration context, get the
// offsets for its tables of lexical and visible declarations.
if (DeclContext *DC = dyn_cast<DeclContext>(D)) {
std::pair<uint64_t, uint64_t> Offsets = Reader.VisitDeclContext(DC);
if (Offsets.first &&
ReadLexicalDeclContextStorage(*Loc.F, DeclsCursor, Offsets.first, DC))
return nullptr;
if (Offsets.second &&
ReadVisibleDeclContextStorage(*Loc.F, DeclsCursor, Offsets.second, ID))
return nullptr;
}
assert(Record.getIdx() == Record.size());
// Load any relevant update records.
PendingUpdateRecords.push_back(
PendingUpdateRecord(ID, D, /*JustLoaded=*/true));
// Load the categories after recursive loading is finished.
if (ObjCInterfaceDecl *Class = dyn_cast<ObjCInterfaceDecl>(D))
// If we already have a definition when deserializing the ObjCInterfaceDecl,
// we put the Decl in PendingDefinitions so we can pull the categories here.
if (Class->isThisDeclarationADefinition() ||
PendingDefinitions.count(Class))
loadObjCCategories(ID, Class);
// If we have deserialized a declaration that has a definition the
// AST consumer might need to know about, queue it.
// We don't pass it to the consumer immediately because we may be in recursive
// loading, and some declarations may still be initializing.
PotentiallyInterestingDecls.push_back(
InterestingDecl(D, Reader.hasPendingBody()));
return D;
}
void ASTReader::PassInterestingDeclsToConsumer() {
assert(Consumer);
if (PassingDeclsToConsumer)
return;
// Guard variable to avoid recursively redoing the process of passing
// decls to consumer.
SaveAndRestore<bool> GuardPassingDeclsToConsumer(PassingDeclsToConsumer,
true);
// Ensure that we've loaded all potentially-interesting declarations
// that need to be eagerly loaded.
for (auto ID : EagerlyDeserializedDecls)
GetDecl(ID);
EagerlyDeserializedDecls.clear();
while (!PotentiallyInterestingDecls.empty()) {
InterestingDecl D = PotentiallyInterestingDecls.front();
PotentiallyInterestingDecls.pop_front();
if (isConsumerInterestedIn(getContext(), D.getDecl(), D.hasPendingBody()))
PassInterestingDeclToConsumer(D.getDecl());
}
}
void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
// The declaration may have been modified by files later in the chain.
// If this is the case, read the record containing the updates from each file
// and pass it to ASTDeclReader to make the modifications.
serialization::GlobalDeclID ID = Record.ID;
Decl *D = Record.D;
ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
DeclUpdateOffsetsMap::iterator UpdI = DeclUpdateOffsets.find(ID);
llvm::SmallVector<serialization::DeclID, 8> PendingLazySpecializationIDs;
if (UpdI != DeclUpdateOffsets.end()) {
auto UpdateOffsets = std::move(UpdI->second);
DeclUpdateOffsets.erase(UpdI);
// Check if this decl was interesting to the consumer. If we just loaded
// the declaration, then we know it was interesting and we skip the call
// to isConsumerInterestedIn because it is unsafe to call in the
// current ASTReader state.
bool WasInteresting =
Record.JustLoaded || isConsumerInterestedIn(getContext(), D, false);
for (auto &FileAndOffset : UpdateOffsets) {
ModuleFile *F = FileAndOffset.first;
uint64_t Offset = FileAndOffset.second;
llvm::BitstreamCursor &Cursor = F->DeclsCursor;
SavedStreamPosition SavedPosition(Cursor);
Cursor.JumpToBit(Offset);
unsigned Code = Cursor.ReadCode();
ASTRecordReader Record(*this, *F);
unsigned RecCode = Record.readRecord(Cursor, Code);
(void)RecCode;
assert(RecCode == DECL_UPDATES && "Expected DECL_UPDATES record!");
ASTDeclReader Reader(*this, Record, RecordLocation(F, Offset), ID,
SourceLocation());
Reader.UpdateDecl(D, PendingLazySpecializationIDs);
// We might have made this declaration interesting. If so, remember that
// we need to hand it off to the consumer.
if (!WasInteresting &&
isConsumerInterestedIn(getContext(), D, Reader.hasPendingBody())) {
PotentiallyInterestingDecls.push_back(
InterestingDecl(D, Reader.hasPendingBody()));
WasInteresting = true;
}
}
}
// Add the lazy specializations to the template.
assert((PendingLazySpecializationIDs.empty() || isa<ClassTemplateDecl>(D) ||
isa<FunctionTemplateDecl>(D) || isa<VarTemplateDecl>(D)) &&
"Must not have pending specializations");
if (auto *CTD = dyn_cast<ClassTemplateDecl>(D))
ASTDeclReader::AddLazySpecializations(CTD, PendingLazySpecializationIDs);
else if (auto *FTD = dyn_cast<FunctionTemplateDecl>(D))
ASTDeclReader::AddLazySpecializations(FTD, PendingLazySpecializationIDs);
else if (auto *VTD = dyn_cast<VarTemplateDecl>(D))
ASTDeclReader::AddLazySpecializations(VTD, PendingLazySpecializationIDs);
PendingLazySpecializationIDs.clear();
// Load the pending visible updates for this decl context, if it has any.
auto I = PendingVisibleUpdates.find(ID);
if (I != PendingVisibleUpdates.end()) {
auto VisibleUpdates = std::move(I->second);
PendingVisibleUpdates.erase(I);
auto *DC = cast<DeclContext>(D)->getPrimaryContext();
for (const PendingVisibleUpdate &Update : VisibleUpdates)
Lookups[DC].Table.add(
Update.Mod, Update.Data,
reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod));
DC->setHasExternalVisibleStorage(true);
}
}
void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) {
// Attach FirstLocal to the end of the decl chain.
Decl *CanonDecl = FirstLocal->getCanonicalDecl();
if (FirstLocal != CanonDecl) {
Decl *PrevMostRecent = ASTDeclReader::getMostRecentDecl(CanonDecl);
ASTDeclReader::attachPreviousDecl(
*this, FirstLocal, PrevMostRecent ? PrevMostRecent : CanonDecl,
CanonDecl);
}
if (!LocalOffset) {
ASTDeclReader::attachLatestDecl(CanonDecl, FirstLocal);
return;
}
// Load the list of other redeclarations from this module file.
ModuleFile *M = getOwningModuleFile(FirstLocal);
assert(M && "imported decl from no module file");
llvm::BitstreamCursor &Cursor = M->DeclsCursor;
SavedStreamPosition SavedPosition(Cursor);
Cursor.JumpToBit(LocalOffset);
RecordData Record;
unsigned Code = Cursor.ReadCode();
unsigned RecCode = Cursor.readRecord(Code, Record);
(void)RecCode;
assert(RecCode == LOCAL_REDECLARATIONS && "expected LOCAL_REDECLARATIONS record!");
// FIXME: We have several different dispatches on decl kind here; maybe
// we should instead generate one loop per kind and dispatch up-front?
Decl *MostRecent = FirstLocal;
for (unsigned I = 0, N = Record.size(); I != N; ++I) {
auto *D = GetLocalDecl(*M, Record[N - I - 1]);
ASTDeclReader::attachPreviousDecl(*this, D, MostRecent, CanonDecl);
MostRecent = D;
}
ASTDeclReader::attachLatestDecl(CanonDecl, MostRecent);
}
namespace {
/// \brief Given an ObjC interface, goes through the modules and links to the
/// interface all the categories for it.
class ObjCCategoriesVisitor {
ASTReader &Reader;
ObjCInterfaceDecl *Interface;
llvm::SmallPtrSetImpl<ObjCCategoryDecl *> &Deserialized;
ObjCCategoryDecl *Tail;
llvm::DenseMap<DeclarationName, ObjCCategoryDecl *> NameCategoryMap;
serialization::GlobalDeclID InterfaceID;
unsigned PreviousGeneration;
void add(ObjCCategoryDecl *Cat) {
// Only process each category once.
if (!Deserialized.erase(Cat))
return;
// Check for duplicate categories.
if (Cat->getDeclName()) {
ObjCCategoryDecl *&Existing = NameCategoryMap[Cat->getDeclName()];
if (Existing &&
Reader.getOwningModuleFile(Existing)
!= Reader.getOwningModuleFile(Cat)) {
// FIXME: We should not warn for duplicates in diamond:
//
// MT //
// / \ //
// ML MR //
// \ / //
// MB //
//
// If there are duplicates in ML/MR, there will be warning when
// creating MB *and* when importing MB. We should not warn when
// importing.
Reader.Diag(Cat->getLocation(), diag::warn_dup_category_def)
<< Interface->getDeclName() << Cat->getDeclName();
Reader.Diag(Existing->getLocation(), diag::note_previous_definition);
} else if (!Existing) {
// Record this category.
Existing = Cat;
}
}
// Add this category to the end of the chain.
if (Tail)
ASTDeclReader::setNextObjCCategory(Tail, Cat);
else
Interface->setCategoryListRaw(Cat);
Tail = Cat;
}
public:
ObjCCategoriesVisitor(ASTReader &Reader,
ObjCInterfaceDecl *Interface,
llvm::SmallPtrSetImpl<ObjCCategoryDecl *> &Deserialized,
serialization::GlobalDeclID InterfaceID,
unsigned PreviousGeneration)
: Reader(Reader), Interface(Interface), Deserialized(Deserialized),
Tail(nullptr), InterfaceID(InterfaceID),
PreviousGeneration(PreviousGeneration)
{
// Populate the name -> category map with the set of known categories.
for (auto *Cat : Interface->known_categories()) {
if (Cat->getDeclName())
NameCategoryMap[Cat->getDeclName()] = Cat;
// Keep track of the tail of the category list.
Tail = Cat;
}
}
bool operator()(ModuleFile &M) {
// If we've loaded all of the category information we care about from
// this module file, we're done.
if (M.Generation <= PreviousGeneration)
return true;
// Map global ID of the definition down to the local ID used in this
// module file. If there is no such mapping, we'll find nothing here
// (or in any module it imports).
DeclID LocalID = Reader.mapGlobalIDToModuleFileGlobalID(M, InterfaceID);
if (!LocalID)
return true;
// Perform a binary search to find the local redeclarations for this
// declaration (if any).
const ObjCCategoriesInfo Compare = { LocalID, 0 };
const ObjCCategoriesInfo *Result
= std::lower_bound(M.ObjCCategoriesMap,
M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap,
Compare);
if (Result == M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap ||
Result->DefinitionID != LocalID) {
// We didn't find anything. If the class definition is in this module
// file, then the module files it depends on cannot have any categories,
// so suppress further lookup.
return Reader.isDeclIDFromModule(InterfaceID, M);
}
// We found something. Dig out all of the categories.
unsigned Offset = Result->Offset;
unsigned N = M.ObjCCategories[Offset];
M.ObjCCategories[Offset++] = 0; // Don't try to deserialize again
for (unsigned I = 0; I != N; ++I)
add(cast_or_null<ObjCCategoryDecl>(
Reader.GetLocalDecl(M, M.ObjCCategories[Offset++])));
return true;
}
};
} // end anonymous namespace
void ASTReader::loadObjCCategories(serialization::GlobalDeclID ID,
ObjCInterfaceDecl *D,
unsigned PreviousGeneration) {
ObjCCategoriesVisitor Visitor(*this, D, CategoriesDeserialized, ID,
PreviousGeneration);
ModuleMgr.visit(Visitor);
}
template<typename DeclT, typename Fn>
static void forAllLaterRedecls(DeclT *D, Fn F) {
F(D);
// Check whether we've already merged D into its redeclaration chain.
// MostRecent may or may not be nullptr if D has not been merged. If
// not, walk the merged redecl chain and see if it's there.
auto *MostRecent = D->getMostRecentDecl();
bool Found = false;
for (auto *Redecl = MostRecent; Redecl && !Found;
Redecl = Redecl->getPreviousDecl())
Found = (Redecl == D);
// If this declaration is merged, apply the functor to all later decls.
if (Found) {
for (auto *Redecl = MostRecent; Redecl != D;
Redecl = Redecl->getPreviousDecl())
F(Redecl);
}
}
void ASTDeclReader::UpdateDecl(Decl *D,
llvm::SmallVectorImpl<serialization::DeclID> &PendingLazySpecializationIDs) {
while (Record.getIdx() < Record.size()) {
switch ((DeclUpdateKind)Record.readInt()) {
case UPD_CXX_ADDED_IMPLICIT_MEMBER: {
auto *RD = cast<CXXRecordDecl>(D);
// FIXME: If we also have an update record for instantiating the
// definition of D, we need that to happen before we get here.
Decl *MD = Record.readDecl();
assert(MD && "couldn't read decl from update record");
// FIXME: We should call addHiddenDecl instead, to add the member
// to its DeclContext.
RD->addedMember(MD);
break;
}
case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION:
// It will be added to the template's lazy specialization set.
PendingLazySpecializationIDs.push_back(ReadDeclID());
break;
case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE: {
NamespaceDecl *Anon = ReadDeclAs<NamespaceDecl>();
// Each module has its own anonymous namespace, which is disjoint from
// any other module's anonymous namespaces, so don't attach the anonymous
// namespace at all.
if (!Record.isModule()) {
if (TranslationUnitDecl *TU = dyn_cast<TranslationUnitDecl>(D))
TU->setAnonymousNamespace(Anon);
else
cast<NamespaceDecl>(D)->setAnonymousNamespace(Anon);
}
break;
}
case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER: {
VarDecl *VD = cast<VarDecl>(D);
VD->getMemberSpecializationInfo()->setPointOfInstantiation(
ReadSourceLocation());
uint64_t Val = Record.readInt();
if (Val && !VD->getInit()) {
VD->setInit(Record.readExpr());
if (Val > 1) { // IsInitKnownICE = 1, IsInitNotICE = 2, IsInitICE = 3
EvaluatedStmt *Eval = VD->ensureEvaluatedStmt();
Eval->CheckedICE = true;
Eval->IsICE = Val == 3;
}
}
break;
}
case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: {
auto Param = cast<ParmVarDecl>(D);
// We have to read the default argument regardless of whether we use it
// so that hypothetical further update records aren't messed up.
// TODO: Add a function to skip over the next expr record.
auto DefaultArg = Record.readExpr();
// Only apply the update if the parameter still has an uninstantiated
// default argument.
if (Param->hasUninstantiatedDefaultArg())
Param->setDefaultArg(DefaultArg);
break;
}
case UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER: {
auto FD = cast<FieldDecl>(D);
auto DefaultInit = Record.readExpr();
// Only apply the update if the field still has an uninstantiated
// default member initializer.
if (FD->hasInClassInitializer() && !FD->getInClassInitializer()) {
if (DefaultInit)
FD->setInClassInitializer(DefaultInit);
else
// Instantiation failed. We can get here if we serialized an AST for
// an invalid program.
FD->removeInClassInitializer();
}
break;
}
case UPD_CXX_ADDED_FUNCTION_DEFINITION: {
FunctionDecl *FD = cast<FunctionDecl>(D);
if (Reader.PendingBodies[FD]) {
// FIXME: Maybe check for ODR violations.
// It's safe to stop now because this update record is always last.
return;
}
if (Record.readInt()) {
// Maintain AST consistency: any later redeclarations of this function
// are inline if this one is. (We might have merged another declaration
// into this one.)
forAllLaterRedecls(FD, [](FunctionDecl *FD) {
FD->setImplicitlyInline();
});
}
FD->setInnerLocStart(ReadSourceLocation());
ReadFunctionDefinition(FD);
assert(Record.getIdx() == Record.size() && "lazy body must be last");
break;
}
case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: {
auto *RD = cast<CXXRecordDecl>(D);
auto *OldDD = RD->getCanonicalDecl()->DefinitionData;
bool HadRealDefinition =
OldDD && (OldDD->Definition != RD ||
!Reader.PendingFakeDefinitionData.count(OldDD));
ReadCXXRecordDefinition(RD, /*Update*/true);
// Visible update is handled separately.
uint64_t LexicalOffset = ReadLocalOffset();
if (!HadRealDefinition && LexicalOffset) {
Record.readLexicalDeclContextStorage(LexicalOffset, RD);
Reader.PendingFakeDefinitionData.erase(OldDD);
}
auto TSK = (TemplateSpecializationKind)Record.readInt();
SourceLocation POI = ReadSourceLocation();
if (MemberSpecializationInfo *MSInfo =
RD->getMemberSpecializationInfo()) {
MSInfo->setTemplateSpecializationKind(TSK);
MSInfo->setPointOfInstantiation(POI);
} else {
ClassTemplateSpecializationDecl *Spec =
cast<ClassTemplateSpecializationDecl>(RD);
Spec->setTemplateSpecializationKind(TSK);
Spec->setPointOfInstantiation(POI);
if (Record.readInt()) {
auto PartialSpec =
ReadDeclAs<ClassTemplatePartialSpecializationDecl>();
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs);
auto *TemplArgList = TemplateArgumentList::CreateCopy(
Reader.getContext(), TemplArgs);
// FIXME: If we already have a partial specialization set,
// check that it matches.
if (!Spec->getSpecializedTemplateOrPartial()
.is<ClassTemplatePartialSpecializationDecl *>())
Spec->setInstantiationOf(PartialSpec, TemplArgList);
}
}
RD->setTagKind((TagTypeKind)Record.readInt());
RD->setLocation(ReadSourceLocation());
RD->setLocStart(ReadSourceLocation());
RD->setBraceRange(ReadSourceRange());
if (Record.readInt()) {
AttrVec Attrs;
Record.readAttributes(Attrs);
// If the declaration already has attributes, we assume that some other
// AST file already loaded them.
if (!D->hasAttrs())
D->setAttrsImpl(Attrs, Reader.getContext());
}
break;
}
case UPD_CXX_RESOLVED_DTOR_DELETE: {
// Set the 'operator delete' directly to avoid emitting another update
// record.
auto *Del = ReadDeclAs<FunctionDecl>();
auto *First = cast<CXXDestructorDecl>(D->getCanonicalDecl());
// FIXME: Check consistency if we have an old and new operator delete.
if (!First->OperatorDelete)
First->OperatorDelete = Del;
break;
}
case UPD_CXX_RESOLVED_EXCEPTION_SPEC: {
FunctionProtoType::ExceptionSpecInfo ESI;
SmallVector<QualType, 8> ExceptionStorage;
Record.readExceptionSpec(ExceptionStorage, ESI);
// Update this declaration's exception specification, if needed.
auto *FD = cast<FunctionDecl>(D);
auto *FPT = FD->getType()->castAs<FunctionProtoType>();
// FIXME: If the exception specification is already present, check that it
// matches.
if (isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) {
FD->setType(Reader.getContext().getFunctionType(
FPT->getReturnType(), FPT->getParamTypes(),
FPT->getExtProtoInfo().withExceptionSpec(ESI)));
// When we get to the end of deserializing, see if there are other decls
// that we need to propagate this exception specification onto.
Reader.PendingExceptionSpecUpdates.insert(
std::make_pair(FD->getCanonicalDecl(), FD));
}
break;
}
case UPD_CXX_DEDUCED_RETURN_TYPE: {
// FIXME: Also do this when merging redecls.
QualType DeducedResultType = Record.readType();
for (auto *Redecl : merged_redecls(D)) {
// FIXME: If the return type is already deduced, check that it matches.
FunctionDecl *FD = cast<FunctionDecl>(Redecl);
Reader.getContext().adjustDeducedFunctionResultType(FD,
DeducedResultType);
}
break;
}
case UPD_DECL_MARKED_USED: {
// Maintain AST consistency: any later redeclarations are used too.
D->markUsed(Reader.getContext());
break;
}
case UPD_MANGLING_NUMBER:
Reader.getContext().setManglingNumber(cast<NamedDecl>(D),
Record.readInt());
break;
case UPD_STATIC_LOCAL_NUMBER:
Reader.getContext().setStaticLocalNumber(cast<VarDecl>(D),
Record.readInt());
break;
case UPD_DECL_MARKED_OPENMP_THREADPRIVATE:
D->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit(Reader.getContext(),
ReadSourceRange()));
break;
case UPD_DECL_EXPORTED: {
unsigned SubmoduleID = readSubmoduleID();
auto *Exported = cast<NamedDecl>(D);
if (auto *TD = dyn_cast<TagDecl>(Exported))
Exported = TD->getDefinition();
Module *Owner = SubmoduleID ? Reader.getSubmodule(SubmoduleID) : nullptr;
if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
Reader.getContext().mergeDefinitionIntoModule(cast<NamedDecl>(Exported),
Owner);
Reader.PendingMergedDefinitionsToDeduplicate.insert(
cast<NamedDecl>(Exported));
} else if (Owner && Owner->NameVisibility != Module::AllVisible) {
// If Owner is made visible at some later point, make this declaration
// visible too.
Reader.HiddenNamesMap[Owner].push_back(Exported);
} else {
// The declaration is now visible.
Exported->setVisibleDespiteOwningModule();
}
break;
}
case UPD_DECL_MARKED_OPENMP_DECLARETARGET:
case UPD_ADDED_ATTR_TO_RECORD:
AttrVec Attrs;
Record.readAttributes(Attrs);
assert(Attrs.size() == 1);
D->addAttr(Attrs[0]);
break;
}
}
}
Index: head/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp (revision 322855)
@@ -1,6293 +1,6296 @@
//===--- ASTWriter.cpp - AST File Writer ------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the ASTWriter class, which writes AST files.
//
//===----------------------------------------------------------------------===//
#include "clang/Serialization/ASTWriter.h"
#include "ASTCommon.h"
#include "ASTReaderInternals.h"
#include "MultiOnDiskHashTable.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTUnresolvedSet.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclContextInternals.h"
#include "clang/AST/DeclFriend.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/LambdaCapture.h"
#include "clang/AST/NestedNameSpecifier.h"
#include "clang/AST/RawCommentList.h"
#include "clang/AST/TemplateName.h"
#include "clang/AST/Type.h"
#include "clang/AST/TypeLocVisitor.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/MemoryBufferCache.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/SourceManagerInternals.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "clang/Basic/Version.h"
#include "clang/Basic/VersionTuple.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/ModuleMap.h"
#include "clang/Lex/PreprocessingRecord.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Lex/Token.h"
#include "clang/Sema/IdentifierResolver.h"
#include "clang/Sema/ObjCMethodList.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/Weak.h"
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/Module.h"
#include "clang/Serialization/ModuleFileExtension.h"
#include "clang/Serialization/SerializationDiagnostic.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitCodes.h"
#include "llvm/Bitcode/BitstreamWriter.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/OnDiskHashTable.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <deque>
#include <limits>
#include <new>
#include <tuple>
#include <utility>
using namespace clang;
using namespace clang::serialization;
template <typename T, typename Allocator>
static StringRef bytes(const std::vector<T, Allocator> &v) {
if (v.empty()) return StringRef();
return StringRef(reinterpret_cast<const char*>(&v[0]),
sizeof(T) * v.size());
}
template <typename T>
static StringRef bytes(const SmallVectorImpl<T> &v) {
return StringRef(reinterpret_cast<const char*>(v.data()),
sizeof(T) * v.size());
}
//===----------------------------------------------------------------------===//
// Type serialization
//===----------------------------------------------------------------------===//
namespace clang {
class ASTTypeWriter {
ASTWriter &Writer;
ASTRecordWriter Record;
/// \brief Type code that corresponds to the record generated.
TypeCode Code;
/// \brief Abbreviation to use for the record, if any.
unsigned AbbrevToUse;
public:
ASTTypeWriter(ASTWriter &Writer, ASTWriter::RecordDataImpl &Record)
: Writer(Writer), Record(Writer, Record), Code((TypeCode)0), AbbrevToUse(0) { }
uint64_t Emit() {
return Record.Emit(Code, AbbrevToUse);
}
void Visit(QualType T) {
if (T.hasLocalNonFastQualifiers()) {
Qualifiers Qs = T.getLocalQualifiers();
Record.AddTypeRef(T.getLocalUnqualifiedType());
Record.push_back(Qs.getAsOpaqueValue());
Code = TYPE_EXT_QUAL;
AbbrevToUse = Writer.TypeExtQualAbbrev;
} else {
switch (T->getTypeClass()) {
// For all of the concrete, non-dependent types, call the
// appropriate visitor function.
#define TYPE(Class, Base) \
case Type::Class: Visit##Class##Type(cast<Class##Type>(T)); break;
#define ABSTRACT_TYPE(Class, Base)
#include "clang/AST/TypeNodes.def"
}
}
}
void VisitArrayType(const ArrayType *T);
void VisitFunctionType(const FunctionType *T);
void VisitTagType(const TagType *T);
#define TYPE(Class, Base) void Visit##Class##Type(const Class##Type *T);
#define ABSTRACT_TYPE(Class, Base)
#include "clang/AST/TypeNodes.def"
};
} // end namespace clang
void ASTTypeWriter::VisitBuiltinType(const BuiltinType *T) {
llvm_unreachable("Built-in types are never serialized");
}
void ASTTypeWriter::VisitComplexType(const ComplexType *T) {
Record.AddTypeRef(T->getElementType());
Code = TYPE_COMPLEX;
}
void ASTTypeWriter::VisitPointerType(const PointerType *T) {
Record.AddTypeRef(T->getPointeeType());
Code = TYPE_POINTER;
}
void ASTTypeWriter::VisitDecayedType(const DecayedType *T) {
Record.AddTypeRef(T->getOriginalType());
Code = TYPE_DECAYED;
}
void ASTTypeWriter::VisitAdjustedType(const AdjustedType *T) {
Record.AddTypeRef(T->getOriginalType());
Record.AddTypeRef(T->getAdjustedType());
Code = TYPE_ADJUSTED;
}
void ASTTypeWriter::VisitBlockPointerType(const BlockPointerType *T) {
Record.AddTypeRef(T->getPointeeType());
Code = TYPE_BLOCK_POINTER;
}
void ASTTypeWriter::VisitLValueReferenceType(const LValueReferenceType *T) {
Record.AddTypeRef(T->getPointeeTypeAsWritten());
Record.push_back(T->isSpelledAsLValue());
Code = TYPE_LVALUE_REFERENCE;
}
void ASTTypeWriter::VisitRValueReferenceType(const RValueReferenceType *T) {
Record.AddTypeRef(T->getPointeeTypeAsWritten());
Code = TYPE_RVALUE_REFERENCE;
}
void ASTTypeWriter::VisitMemberPointerType(const MemberPointerType *T) {
Record.AddTypeRef(T->getPointeeType());
Record.AddTypeRef(QualType(T->getClass(), 0));
Code = TYPE_MEMBER_POINTER;
}
void ASTTypeWriter::VisitArrayType(const ArrayType *T) {
Record.AddTypeRef(T->getElementType());
Record.push_back(T->getSizeModifier()); // FIXME: stable values
Record.push_back(T->getIndexTypeCVRQualifiers()); // FIXME: stable values
}
void ASTTypeWriter::VisitConstantArrayType(const ConstantArrayType *T) {
VisitArrayType(T);
Record.AddAPInt(T->getSize());
Code = TYPE_CONSTANT_ARRAY;
}
void ASTTypeWriter::VisitIncompleteArrayType(const IncompleteArrayType *T) {
VisitArrayType(T);
Code = TYPE_INCOMPLETE_ARRAY;
}
void ASTTypeWriter::VisitVariableArrayType(const VariableArrayType *T) {
VisitArrayType(T);
Record.AddSourceLocation(T->getLBracketLoc());
Record.AddSourceLocation(T->getRBracketLoc());
Record.AddStmt(T->getSizeExpr());
Code = TYPE_VARIABLE_ARRAY;
}
void ASTTypeWriter::VisitVectorType(const VectorType *T) {
Record.AddTypeRef(T->getElementType());
Record.push_back(T->getNumElements());
Record.push_back(T->getVectorKind());
Code = TYPE_VECTOR;
}
void ASTTypeWriter::VisitExtVectorType(const ExtVectorType *T) {
VisitVectorType(T);
Code = TYPE_EXT_VECTOR;
}
void ASTTypeWriter::VisitFunctionType(const FunctionType *T) {
Record.AddTypeRef(T->getReturnType());
FunctionType::ExtInfo C = T->getExtInfo();
Record.push_back(C.getNoReturn());
Record.push_back(C.getHasRegParm());
Record.push_back(C.getRegParm());
// FIXME: need to stabilize encoding of calling convention...
Record.push_back(C.getCC());
Record.push_back(C.getProducesResult());
Record.push_back(C.getNoCallerSavedRegs());
if (C.getHasRegParm() || C.getRegParm() || C.getProducesResult())
AbbrevToUse = 0;
}
void ASTTypeWriter::VisitFunctionNoProtoType(const FunctionNoProtoType *T) {
VisitFunctionType(T);
Code = TYPE_FUNCTION_NO_PROTO;
}
static void addExceptionSpec(const FunctionProtoType *T,
ASTRecordWriter &Record) {
Record.push_back(T->getExceptionSpecType());
if (T->getExceptionSpecType() == EST_Dynamic) {
Record.push_back(T->getNumExceptions());
for (unsigned I = 0, N = T->getNumExceptions(); I != N; ++I)
Record.AddTypeRef(T->getExceptionType(I));
} else if (T->getExceptionSpecType() == EST_ComputedNoexcept) {
Record.AddStmt(T->getNoexceptExpr());
} else if (T->getExceptionSpecType() == EST_Uninstantiated) {
Record.AddDeclRef(T->getExceptionSpecDecl());
Record.AddDeclRef(T->getExceptionSpecTemplate());
} else if (T->getExceptionSpecType() == EST_Unevaluated) {
Record.AddDeclRef(T->getExceptionSpecDecl());
}
}
void ASTTypeWriter::VisitFunctionProtoType(const FunctionProtoType *T) {
VisitFunctionType(T);
Record.push_back(T->isVariadic());
Record.push_back(T->hasTrailingReturn());
Record.push_back(T->getTypeQuals());
Record.push_back(static_cast<unsigned>(T->getRefQualifier()));
addExceptionSpec(T, Record);
Record.push_back(T->getNumParams());
for (unsigned I = 0, N = T->getNumParams(); I != N; ++I)
Record.AddTypeRef(T->getParamType(I));
if (T->hasExtParameterInfos()) {
for (unsigned I = 0, N = T->getNumParams(); I != N; ++I)
Record.push_back(T->getExtParameterInfo(I).getOpaqueValue());
}
if (T->isVariadic() || T->hasTrailingReturn() || T->getTypeQuals() ||
T->getRefQualifier() || T->getExceptionSpecType() != EST_None ||
T->hasExtParameterInfos())
AbbrevToUse = 0;
Code = TYPE_FUNCTION_PROTO;
}
void ASTTypeWriter::VisitUnresolvedUsingType(const UnresolvedUsingType *T) {
Record.AddDeclRef(T->getDecl());
Code = TYPE_UNRESOLVED_USING;
}
void ASTTypeWriter::VisitTypedefType(const TypedefType *T) {
Record.AddDeclRef(T->getDecl());
assert(!T->isCanonicalUnqualified() && "Invalid typedef ?");
Record.AddTypeRef(T->getCanonicalTypeInternal());
Code = TYPE_TYPEDEF;
}
void ASTTypeWriter::VisitTypeOfExprType(const TypeOfExprType *T) {
Record.AddStmt(T->getUnderlyingExpr());
Code = TYPE_TYPEOF_EXPR;
}
void ASTTypeWriter::VisitTypeOfType(const TypeOfType *T) {
Record.AddTypeRef(T->getUnderlyingType());
Code = TYPE_TYPEOF;
}
void ASTTypeWriter::VisitDecltypeType(const DecltypeType *T) {
Record.AddTypeRef(T->getUnderlyingType());
Record.AddStmt(T->getUnderlyingExpr());
Code = TYPE_DECLTYPE;
}
void ASTTypeWriter::VisitUnaryTransformType(const UnaryTransformType *T) {
Record.AddTypeRef(T->getBaseType());
Record.AddTypeRef(T->getUnderlyingType());
Record.push_back(T->getUTTKind());
Code = TYPE_UNARY_TRANSFORM;
}
void ASTTypeWriter::VisitAutoType(const AutoType *T) {
Record.AddTypeRef(T->getDeducedType());
Record.push_back((unsigned)T->getKeyword());
if (T->getDeducedType().isNull())
Record.push_back(T->isDependentType());
Code = TYPE_AUTO;
}
void ASTTypeWriter::VisitDeducedTemplateSpecializationType(
const DeducedTemplateSpecializationType *T) {
Record.AddTemplateName(T->getTemplateName());
Record.AddTypeRef(T->getDeducedType());
if (T->getDeducedType().isNull())
Record.push_back(T->isDependentType());
Code = TYPE_DEDUCED_TEMPLATE_SPECIALIZATION;
}
void ASTTypeWriter::VisitTagType(const TagType *T) {
Record.push_back(T->isDependentType());
Record.AddDeclRef(T->getDecl()->getCanonicalDecl());
assert(!T->isBeingDefined() &&
"Cannot serialize in the middle of a type definition");
}
void ASTTypeWriter::VisitRecordType(const RecordType *T) {
VisitTagType(T);
Code = TYPE_RECORD;
}
void ASTTypeWriter::VisitEnumType(const EnumType *T) {
VisitTagType(T);
Code = TYPE_ENUM;
}
void ASTTypeWriter::VisitAttributedType(const AttributedType *T) {
Record.AddTypeRef(T->getModifiedType());
Record.AddTypeRef(T->getEquivalentType());
Record.push_back(T->getAttrKind());
Code = TYPE_ATTRIBUTED;
}
void
ASTTypeWriter::VisitSubstTemplateTypeParmType(
const SubstTemplateTypeParmType *T) {
Record.AddTypeRef(QualType(T->getReplacedParameter(), 0));
Record.AddTypeRef(T->getReplacementType());
Code = TYPE_SUBST_TEMPLATE_TYPE_PARM;
}
void
ASTTypeWriter::VisitSubstTemplateTypeParmPackType(
const SubstTemplateTypeParmPackType *T) {
Record.AddTypeRef(QualType(T->getReplacedParameter(), 0));
Record.AddTemplateArgument(T->getArgumentPack());
Code = TYPE_SUBST_TEMPLATE_TYPE_PARM_PACK;
}
void
ASTTypeWriter::VisitTemplateSpecializationType(
const TemplateSpecializationType *T) {
Record.push_back(T->isDependentType());
Record.AddTemplateName(T->getTemplateName());
Record.push_back(T->getNumArgs());
for (const auto &ArgI : *T)
Record.AddTemplateArgument(ArgI);
Record.AddTypeRef(T->isTypeAlias() ? T->getAliasedType()
: T->isCanonicalUnqualified()
? QualType()
: T->getCanonicalTypeInternal());
Code = TYPE_TEMPLATE_SPECIALIZATION;
}
void
ASTTypeWriter::VisitDependentSizedArrayType(const DependentSizedArrayType *T) {
VisitArrayType(T);
Record.AddStmt(T->getSizeExpr());
Record.AddSourceRange(T->getBracketsRange());
Code = TYPE_DEPENDENT_SIZED_ARRAY;
}
void
ASTTypeWriter::VisitDependentSizedExtVectorType(
const DependentSizedExtVectorType *T) {
Record.AddTypeRef(T->getElementType());
Record.AddStmt(T->getSizeExpr());
Record.AddSourceLocation(T->getAttributeLoc());
Code = TYPE_DEPENDENT_SIZED_EXT_VECTOR;
}
void
ASTTypeWriter::VisitTemplateTypeParmType(const TemplateTypeParmType *T) {
Record.push_back(T->getDepth());
Record.push_back(T->getIndex());
Record.push_back(T->isParameterPack());
Record.AddDeclRef(T->getDecl());
Code = TYPE_TEMPLATE_TYPE_PARM;
}
void
ASTTypeWriter::VisitDependentNameType(const DependentNameType *T) {
Record.push_back(T->getKeyword());
Record.AddNestedNameSpecifier(T->getQualifier());
Record.AddIdentifierRef(T->getIdentifier());
Record.AddTypeRef(
T->isCanonicalUnqualified() ? QualType() : T->getCanonicalTypeInternal());
Code = TYPE_DEPENDENT_NAME;
}
void
ASTTypeWriter::VisitDependentTemplateSpecializationType(
const DependentTemplateSpecializationType *T) {
Record.push_back(T->getKeyword());
Record.AddNestedNameSpecifier(T->getQualifier());
Record.AddIdentifierRef(T->getIdentifier());
Record.push_back(T->getNumArgs());
for (const auto &I : *T)
Record.AddTemplateArgument(I);
Code = TYPE_DEPENDENT_TEMPLATE_SPECIALIZATION;
}
void ASTTypeWriter::VisitPackExpansionType(const PackExpansionType *T) {
Record.AddTypeRef(T->getPattern());
if (Optional<unsigned> NumExpansions = T->getNumExpansions())
Record.push_back(*NumExpansions + 1);
else
Record.push_back(0);
Code = TYPE_PACK_EXPANSION;
}
void ASTTypeWriter::VisitParenType(const ParenType *T) {
Record.AddTypeRef(T->getInnerType());
Code = TYPE_PAREN;
}
void ASTTypeWriter::VisitElaboratedType(const ElaboratedType *T) {
Record.push_back(T->getKeyword());
Record.AddNestedNameSpecifier(T->getQualifier());
Record.AddTypeRef(T->getNamedType());
Code = TYPE_ELABORATED;
}
void ASTTypeWriter::VisitInjectedClassNameType(const InjectedClassNameType *T) {
Record.AddDeclRef(T->getDecl()->getCanonicalDecl());
Record.AddTypeRef(T->getInjectedSpecializationType());
Code = TYPE_INJECTED_CLASS_NAME;
}
void ASTTypeWriter::VisitObjCInterfaceType(const ObjCInterfaceType *T) {
Record.AddDeclRef(T->getDecl()->getCanonicalDecl());
Code = TYPE_OBJC_INTERFACE;
}
void ASTTypeWriter::VisitObjCTypeParamType(const ObjCTypeParamType *T) {
Record.AddDeclRef(T->getDecl());
Record.push_back(T->getNumProtocols());
for (const auto *I : T->quals())
Record.AddDeclRef(I);
Code = TYPE_OBJC_TYPE_PARAM;
}
void ASTTypeWriter::VisitObjCObjectType(const ObjCObjectType *T) {
Record.AddTypeRef(T->getBaseType());
Record.push_back(T->getTypeArgsAsWritten().size());
for (auto TypeArg : T->getTypeArgsAsWritten())
Record.AddTypeRef(TypeArg);
Record.push_back(T->getNumProtocols());
for (const auto *I : T->quals())
Record.AddDeclRef(I);
Record.push_back(T->isKindOfTypeAsWritten());
Code = TYPE_OBJC_OBJECT;
}
void
ASTTypeWriter::VisitObjCObjectPointerType(const ObjCObjectPointerType *T) {
Record.AddTypeRef(T->getPointeeType());
Code = TYPE_OBJC_OBJECT_POINTER;
}
void
ASTTypeWriter::VisitAtomicType(const AtomicType *T) {
Record.AddTypeRef(T->getValueType());
Code = TYPE_ATOMIC;
}
void
ASTTypeWriter::VisitPipeType(const PipeType *T) {
Record.AddTypeRef(T->getElementType());
Record.push_back(T->isReadOnly());
Code = TYPE_PIPE;
}
namespace {
class TypeLocWriter : public TypeLocVisitor<TypeLocWriter> {
ASTRecordWriter &Record;
public:
TypeLocWriter(ASTRecordWriter &Record)
: Record(Record) { }
#define ABSTRACT_TYPELOC(CLASS, PARENT)
#define TYPELOC(CLASS, PARENT) \
void Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc);
#include "clang/AST/TypeLocNodes.def"
void VisitArrayTypeLoc(ArrayTypeLoc TyLoc);
void VisitFunctionTypeLoc(FunctionTypeLoc TyLoc);
};
} // end anonymous namespace
void TypeLocWriter::VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
// nothing to do
}
void TypeLocWriter::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) {
Record.AddSourceLocation(TL.getBuiltinLoc());
if (TL.needsExtraLocalData()) {
Record.push_back(TL.getWrittenTypeSpec());
Record.push_back(TL.getWrittenSignSpec());
Record.push_back(TL.getWrittenWidthSpec());
Record.push_back(TL.hasModeAttr());
}
}
void TypeLocWriter::VisitComplexTypeLoc(ComplexTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitPointerTypeLoc(PointerTypeLoc TL) {
Record.AddSourceLocation(TL.getStarLoc());
}
void TypeLocWriter::VisitDecayedTypeLoc(DecayedTypeLoc TL) {
// nothing to do
}
void TypeLocWriter::VisitAdjustedTypeLoc(AdjustedTypeLoc TL) {
// nothing to do
}
void TypeLocWriter::VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) {
Record.AddSourceLocation(TL.getCaretLoc());
}
void TypeLocWriter::VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) {
Record.AddSourceLocation(TL.getAmpLoc());
}
void TypeLocWriter::VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) {
Record.AddSourceLocation(TL.getAmpAmpLoc());
}
void TypeLocWriter::VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) {
Record.AddSourceLocation(TL.getStarLoc());
Record.AddTypeSourceInfo(TL.getClassTInfo());
}
void TypeLocWriter::VisitArrayTypeLoc(ArrayTypeLoc TL) {
Record.AddSourceLocation(TL.getLBracketLoc());
Record.AddSourceLocation(TL.getRBracketLoc());
Record.push_back(TL.getSizeExpr() ? 1 : 0);
if (TL.getSizeExpr())
Record.AddStmt(TL.getSizeExpr());
}
void TypeLocWriter::VisitConstantArrayTypeLoc(ConstantArrayTypeLoc TL) {
VisitArrayTypeLoc(TL);
}
void TypeLocWriter::VisitIncompleteArrayTypeLoc(IncompleteArrayTypeLoc TL) {
VisitArrayTypeLoc(TL);
}
void TypeLocWriter::VisitVariableArrayTypeLoc(VariableArrayTypeLoc TL) {
VisitArrayTypeLoc(TL);
}
void TypeLocWriter::VisitDependentSizedArrayTypeLoc(
DependentSizedArrayTypeLoc TL) {
VisitArrayTypeLoc(TL);
}
void TypeLocWriter::VisitDependentSizedExtVectorTypeLoc(
DependentSizedExtVectorTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitVectorTypeLoc(VectorTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitFunctionTypeLoc(FunctionTypeLoc TL) {
Record.AddSourceLocation(TL.getLocalRangeBegin());
Record.AddSourceLocation(TL.getLParenLoc());
Record.AddSourceLocation(TL.getRParenLoc());
Record.AddSourceRange(TL.getExceptionSpecRange());
Record.AddSourceLocation(TL.getLocalRangeEnd());
for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i)
Record.AddDeclRef(TL.getParam(i));
}
void TypeLocWriter::VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc TL) {
VisitFunctionTypeLoc(TL);
}
void TypeLocWriter::VisitFunctionNoProtoTypeLoc(FunctionNoProtoTypeLoc TL) {
VisitFunctionTypeLoc(TL);
}
void TypeLocWriter::VisitUnresolvedUsingTypeLoc(UnresolvedUsingTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitTypedefTypeLoc(TypedefTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitObjCTypeParamTypeLoc(ObjCTypeParamTypeLoc TL) {
if (TL.getNumProtocols()) {
Record.AddSourceLocation(TL.getProtocolLAngleLoc());
Record.AddSourceLocation(TL.getProtocolRAngleLoc());
}
for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i)
Record.AddSourceLocation(TL.getProtocolLoc(i));
}
void TypeLocWriter::VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) {
Record.AddSourceLocation(TL.getTypeofLoc());
Record.AddSourceLocation(TL.getLParenLoc());
Record.AddSourceLocation(TL.getRParenLoc());
}
void TypeLocWriter::VisitTypeOfTypeLoc(TypeOfTypeLoc TL) {
Record.AddSourceLocation(TL.getTypeofLoc());
Record.AddSourceLocation(TL.getLParenLoc());
Record.AddSourceLocation(TL.getRParenLoc());
Record.AddTypeSourceInfo(TL.getUnderlyingTInfo());
}
void TypeLocWriter::VisitDecltypeTypeLoc(DecltypeTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) {
Record.AddSourceLocation(TL.getKWLoc());
Record.AddSourceLocation(TL.getLParenLoc());
Record.AddSourceLocation(TL.getRParenLoc());
Record.AddTypeSourceInfo(TL.getUnderlyingTInfo());
}
void TypeLocWriter::VisitAutoTypeLoc(AutoTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitDeducedTemplateSpecializationTypeLoc(
DeducedTemplateSpecializationTypeLoc TL) {
Record.AddSourceLocation(TL.getTemplateNameLoc());
}
void TypeLocWriter::VisitRecordTypeLoc(RecordTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitEnumTypeLoc(EnumTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitAttributedTypeLoc(AttributedTypeLoc TL) {
Record.AddSourceLocation(TL.getAttrNameLoc());
if (TL.hasAttrOperand()) {
SourceRange range = TL.getAttrOperandParensRange();
Record.AddSourceLocation(range.getBegin());
Record.AddSourceLocation(range.getEnd());
}
if (TL.hasAttrExprOperand()) {
Expr *operand = TL.getAttrExprOperand();
Record.push_back(operand ? 1 : 0);
if (operand) Record.AddStmt(operand);
} else if (TL.hasAttrEnumOperand()) {
Record.AddSourceLocation(TL.getAttrEnumOperandLoc());
}
}
void TypeLocWriter::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitSubstTemplateTypeParmTypeLoc(
SubstTemplateTypeParmTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitSubstTemplateTypeParmPackTypeLoc(
SubstTemplateTypeParmPackTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitTemplateSpecializationTypeLoc(
TemplateSpecializationTypeLoc TL) {
Record.AddSourceLocation(TL.getTemplateKeywordLoc());
Record.AddSourceLocation(TL.getTemplateNameLoc());
Record.AddSourceLocation(TL.getLAngleLoc());
Record.AddSourceLocation(TL.getRAngleLoc());
for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i)
Record.AddTemplateArgumentLocInfo(TL.getArgLoc(i).getArgument().getKind(),
TL.getArgLoc(i).getLocInfo());
}
void TypeLocWriter::VisitParenTypeLoc(ParenTypeLoc TL) {
Record.AddSourceLocation(TL.getLParenLoc());
Record.AddSourceLocation(TL.getRParenLoc());
}
void TypeLocWriter::VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) {
Record.AddSourceLocation(TL.getElaboratedKeywordLoc());
Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc());
}
void TypeLocWriter::VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
Record.AddSourceLocation(TL.getElaboratedKeywordLoc());
Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc());
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitDependentTemplateSpecializationTypeLoc(
DependentTemplateSpecializationTypeLoc TL) {
Record.AddSourceLocation(TL.getElaboratedKeywordLoc());
Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc());
Record.AddSourceLocation(TL.getTemplateKeywordLoc());
Record.AddSourceLocation(TL.getTemplateNameLoc());
Record.AddSourceLocation(TL.getLAngleLoc());
Record.AddSourceLocation(TL.getRAngleLoc());
for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I)
Record.AddTemplateArgumentLocInfo(TL.getArgLoc(I).getArgument().getKind(),
TL.getArgLoc(I).getLocInfo());
}
void TypeLocWriter::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) {
Record.AddSourceLocation(TL.getEllipsisLoc());
}
void TypeLocWriter::VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) {
Record.AddSourceLocation(TL.getNameLoc());
}
void TypeLocWriter::VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) {
Record.push_back(TL.hasBaseTypeAsWritten());
Record.AddSourceLocation(TL.getTypeArgsLAngleLoc());
Record.AddSourceLocation(TL.getTypeArgsRAngleLoc());
for (unsigned i = 0, e = TL.getNumTypeArgs(); i != e; ++i)
Record.AddTypeSourceInfo(TL.getTypeArgTInfo(i));
Record.AddSourceLocation(TL.getProtocolLAngleLoc());
Record.AddSourceLocation(TL.getProtocolRAngleLoc());
for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i)
Record.AddSourceLocation(TL.getProtocolLoc(i));
}
void TypeLocWriter::VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) {
Record.AddSourceLocation(TL.getStarLoc());
}
void TypeLocWriter::VisitAtomicTypeLoc(AtomicTypeLoc TL) {
Record.AddSourceLocation(TL.getKWLoc());
Record.AddSourceLocation(TL.getLParenLoc());
Record.AddSourceLocation(TL.getRParenLoc());
}
void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) {
Record.AddSourceLocation(TL.getKWLoc());
}
void ASTWriter::WriteTypeAbbrevs() {
using namespace llvm;
std::shared_ptr<BitCodeAbbrev> Abv;
// Abbreviation for TYPE_EXT_QUAL
Abv = std::make_shared<BitCodeAbbrev>();
Abv->Add(BitCodeAbbrevOp(serialization::TYPE_EXT_QUAL));
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 3)); // Quals
TypeExtQualAbbrev = Stream.EmitAbbrev(std::move(Abv));
// Abbreviation for TYPE_FUNCTION_PROTO
Abv = std::make_shared<BitCodeAbbrev>();
Abv->Add(BitCodeAbbrevOp(serialization::TYPE_FUNCTION_PROTO));
// FunctionType
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ReturnType
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // NoReturn
Abv->Add(BitCodeAbbrevOp(0)); // HasRegParm
Abv->Add(BitCodeAbbrevOp(0)); // RegParm
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // CC
Abv->Add(BitCodeAbbrevOp(0)); // ProducesResult
Abv->Add(BitCodeAbbrevOp(0)); // NoCallerSavedRegs
// FunctionProtoType
Abv->Add(BitCodeAbbrevOp(0)); // IsVariadic
Abv->Add(BitCodeAbbrevOp(0)); // HasTrailingReturn
Abv->Add(BitCodeAbbrevOp(0)); // TypeQuals
Abv->Add(BitCodeAbbrevOp(0)); // RefQualifier
Abv->Add(BitCodeAbbrevOp(EST_None)); // ExceptionSpec
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // NumParams
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Params
TypeFunctionProtoAbbrev = Stream.EmitAbbrev(std::move(Abv));
}
//===----------------------------------------------------------------------===//
// ASTWriter Implementation
//===----------------------------------------------------------------------===//
static void EmitBlockID(unsigned ID, const char *Name,
llvm::BitstreamWriter &Stream,
ASTWriter::RecordDataImpl &Record) {
Record.clear();
Record.push_back(ID);
Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
// Emit the block name if present.
if (!Name || Name[0] == 0)
return;
Record.clear();
while (*Name)
Record.push_back(*Name++);
Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
}
static void EmitRecordID(unsigned ID, const char *Name,
llvm::BitstreamWriter &Stream,
ASTWriter::RecordDataImpl &Record) {
Record.clear();
Record.push_back(ID);
while (*Name)
Record.push_back(*Name++);
Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
}
static void AddStmtsExprs(llvm::BitstreamWriter &Stream,
ASTWriter::RecordDataImpl &Record) {
#define RECORD(X) EmitRecordID(X, #X, Stream, Record)
RECORD(STMT_STOP);
RECORD(STMT_NULL_PTR);
RECORD(STMT_REF_PTR);
RECORD(STMT_NULL);
RECORD(STMT_COMPOUND);
RECORD(STMT_CASE);
RECORD(STMT_DEFAULT);
RECORD(STMT_LABEL);
RECORD(STMT_ATTRIBUTED);
RECORD(STMT_IF);
RECORD(STMT_SWITCH);
RECORD(STMT_WHILE);
RECORD(STMT_DO);
RECORD(STMT_FOR);
RECORD(STMT_GOTO);
RECORD(STMT_INDIRECT_GOTO);
RECORD(STMT_CONTINUE);
RECORD(STMT_BREAK);
RECORD(STMT_RETURN);
RECORD(STMT_DECL);
RECORD(STMT_GCCASM);
RECORD(STMT_MSASM);
RECORD(EXPR_PREDEFINED);
RECORD(EXPR_DECL_REF);
RECORD(EXPR_INTEGER_LITERAL);
RECORD(EXPR_FLOATING_LITERAL);
RECORD(EXPR_IMAGINARY_LITERAL);
RECORD(EXPR_STRING_LITERAL);
RECORD(EXPR_CHARACTER_LITERAL);
RECORD(EXPR_PAREN);
RECORD(EXPR_PAREN_LIST);
RECORD(EXPR_UNARY_OPERATOR);
RECORD(EXPR_SIZEOF_ALIGN_OF);
RECORD(EXPR_ARRAY_SUBSCRIPT);
RECORD(EXPR_CALL);
RECORD(EXPR_MEMBER);
RECORD(EXPR_BINARY_OPERATOR);
RECORD(EXPR_COMPOUND_ASSIGN_OPERATOR);
RECORD(EXPR_CONDITIONAL_OPERATOR);
RECORD(EXPR_IMPLICIT_CAST);
RECORD(EXPR_CSTYLE_CAST);
RECORD(EXPR_COMPOUND_LITERAL);
RECORD(EXPR_EXT_VECTOR_ELEMENT);
RECORD(EXPR_INIT_LIST);
RECORD(EXPR_DESIGNATED_INIT);
RECORD(EXPR_DESIGNATED_INIT_UPDATE);
RECORD(EXPR_IMPLICIT_VALUE_INIT);
RECORD(EXPR_NO_INIT);
RECORD(EXPR_VA_ARG);
RECORD(EXPR_ADDR_LABEL);
RECORD(EXPR_STMT);
RECORD(EXPR_CHOOSE);
RECORD(EXPR_GNU_NULL);
RECORD(EXPR_SHUFFLE_VECTOR);
RECORD(EXPR_BLOCK);
RECORD(EXPR_GENERIC_SELECTION);
RECORD(EXPR_OBJC_STRING_LITERAL);
RECORD(EXPR_OBJC_BOXED_EXPRESSION);
RECORD(EXPR_OBJC_ARRAY_LITERAL);
RECORD(EXPR_OBJC_DICTIONARY_LITERAL);
RECORD(EXPR_OBJC_ENCODE);
RECORD(EXPR_OBJC_SELECTOR_EXPR);
RECORD(EXPR_OBJC_PROTOCOL_EXPR);
RECORD(EXPR_OBJC_IVAR_REF_EXPR);
RECORD(EXPR_OBJC_PROPERTY_REF_EXPR);
RECORD(EXPR_OBJC_KVC_REF_EXPR);
RECORD(EXPR_OBJC_MESSAGE_EXPR);
RECORD(STMT_OBJC_FOR_COLLECTION);
RECORD(STMT_OBJC_CATCH);
RECORD(STMT_OBJC_FINALLY);
RECORD(STMT_OBJC_AT_TRY);
RECORD(STMT_OBJC_AT_SYNCHRONIZED);
RECORD(STMT_OBJC_AT_THROW);
RECORD(EXPR_OBJC_BOOL_LITERAL);
RECORD(STMT_CXX_CATCH);
RECORD(STMT_CXX_TRY);
RECORD(STMT_CXX_FOR_RANGE);
RECORD(EXPR_CXX_OPERATOR_CALL);
RECORD(EXPR_CXX_MEMBER_CALL);
RECORD(EXPR_CXX_CONSTRUCT);
RECORD(EXPR_CXX_TEMPORARY_OBJECT);
RECORD(EXPR_CXX_STATIC_CAST);
RECORD(EXPR_CXX_DYNAMIC_CAST);
RECORD(EXPR_CXX_REINTERPRET_CAST);
RECORD(EXPR_CXX_CONST_CAST);
RECORD(EXPR_CXX_FUNCTIONAL_CAST);
RECORD(EXPR_USER_DEFINED_LITERAL);
RECORD(EXPR_CXX_STD_INITIALIZER_LIST);
RECORD(EXPR_CXX_BOOL_LITERAL);
RECORD(EXPR_CXX_NULL_PTR_LITERAL);
RECORD(EXPR_CXX_TYPEID_EXPR);
RECORD(EXPR_CXX_TYPEID_TYPE);
RECORD(EXPR_CXX_THIS);
RECORD(EXPR_CXX_THROW);
RECORD(EXPR_CXX_DEFAULT_ARG);
RECORD(EXPR_CXX_DEFAULT_INIT);
RECORD(EXPR_CXX_BIND_TEMPORARY);
RECORD(EXPR_CXX_SCALAR_VALUE_INIT);
RECORD(EXPR_CXX_NEW);
RECORD(EXPR_CXX_DELETE);
RECORD(EXPR_CXX_PSEUDO_DESTRUCTOR);
RECORD(EXPR_EXPR_WITH_CLEANUPS);
RECORD(EXPR_CXX_DEPENDENT_SCOPE_MEMBER);
RECORD(EXPR_CXX_DEPENDENT_SCOPE_DECL_REF);
RECORD(EXPR_CXX_UNRESOLVED_CONSTRUCT);
RECORD(EXPR_CXX_UNRESOLVED_MEMBER);
RECORD(EXPR_CXX_UNRESOLVED_LOOKUP);
RECORD(EXPR_CXX_EXPRESSION_TRAIT);
RECORD(EXPR_CXX_NOEXCEPT);
RECORD(EXPR_OPAQUE_VALUE);
RECORD(EXPR_BINARY_CONDITIONAL_OPERATOR);
RECORD(EXPR_TYPE_TRAIT);
RECORD(EXPR_ARRAY_TYPE_TRAIT);
RECORD(EXPR_PACK_EXPANSION);
RECORD(EXPR_SIZEOF_PACK);
RECORD(EXPR_SUBST_NON_TYPE_TEMPLATE_PARM);
RECORD(EXPR_SUBST_NON_TYPE_TEMPLATE_PARM_PACK);
RECORD(EXPR_FUNCTION_PARM_PACK);
RECORD(EXPR_MATERIALIZE_TEMPORARY);
RECORD(EXPR_CUDA_KERNEL_CALL);
RECORD(EXPR_CXX_UUIDOF_EXPR);
RECORD(EXPR_CXX_UUIDOF_TYPE);
RECORD(EXPR_LAMBDA);
#undef RECORD
}
void ASTWriter::WriteBlockInfoBlock() {
RecordData Record;
Stream.EnterBlockInfoBlock();
#define BLOCK(X) EmitBlockID(X ## _ID, #X, Stream, Record)
#define RECORD(X) EmitRecordID(X, #X, Stream, Record)
// Control Block.
BLOCK(CONTROL_BLOCK);
RECORD(METADATA);
RECORD(MODULE_NAME);
RECORD(MODULE_DIRECTORY);
RECORD(MODULE_MAP_FILE);
RECORD(IMPORTS);
RECORD(ORIGINAL_FILE);
RECORD(ORIGINAL_PCH_DIR);
RECORD(ORIGINAL_FILE_ID);
RECORD(INPUT_FILE_OFFSETS);
BLOCK(OPTIONS_BLOCK);
RECORD(LANGUAGE_OPTIONS);
RECORD(TARGET_OPTIONS);
RECORD(FILE_SYSTEM_OPTIONS);
RECORD(HEADER_SEARCH_OPTIONS);
RECORD(PREPROCESSOR_OPTIONS);
BLOCK(INPUT_FILES_BLOCK);
RECORD(INPUT_FILE);
// AST Top-Level Block.
BLOCK(AST_BLOCK);
RECORD(TYPE_OFFSET);
RECORD(DECL_OFFSET);
RECORD(IDENTIFIER_OFFSET);
RECORD(IDENTIFIER_TABLE);
RECORD(EAGERLY_DESERIALIZED_DECLS);
RECORD(MODULAR_CODEGEN_DECLS);
RECORD(SPECIAL_TYPES);
RECORD(STATISTICS);
RECORD(TENTATIVE_DEFINITIONS);
RECORD(SELECTOR_OFFSETS);
RECORD(METHOD_POOL);
RECORD(PP_COUNTER_VALUE);
RECORD(SOURCE_LOCATION_OFFSETS);
RECORD(SOURCE_LOCATION_PRELOADS);
RECORD(EXT_VECTOR_DECLS);
RECORD(UNUSED_FILESCOPED_DECLS);
RECORD(PPD_ENTITIES_OFFSETS);
RECORD(VTABLE_USES);
RECORD(REFERENCED_SELECTOR_POOL);
RECORD(TU_UPDATE_LEXICAL);
RECORD(SEMA_DECL_REFS);
RECORD(WEAK_UNDECLARED_IDENTIFIERS);
RECORD(PENDING_IMPLICIT_INSTANTIATIONS);
RECORD(UPDATE_VISIBLE);
RECORD(DECL_UPDATE_OFFSETS);
RECORD(DECL_UPDATES);
RECORD(CUDA_SPECIAL_DECL_REFS);
RECORD(HEADER_SEARCH_TABLE);
RECORD(FP_PRAGMA_OPTIONS);
RECORD(OPENCL_EXTENSIONS);
RECORD(OPENCL_EXTENSION_TYPES);
RECORD(OPENCL_EXTENSION_DECLS);
RECORD(DELEGATING_CTORS);
RECORD(KNOWN_NAMESPACES);
RECORD(MODULE_OFFSET_MAP);
RECORD(SOURCE_MANAGER_LINE_TABLE);
RECORD(OBJC_CATEGORIES_MAP);
RECORD(FILE_SORTED_DECLS);
RECORD(IMPORTED_MODULES);
RECORD(OBJC_CATEGORIES);
RECORD(MACRO_OFFSET);
RECORD(INTERESTING_IDENTIFIERS);
RECORD(UNDEFINED_BUT_USED);
RECORD(LATE_PARSED_TEMPLATE);
RECORD(OPTIMIZE_PRAGMA_OPTIONS);
RECORD(MSSTRUCT_PRAGMA_OPTIONS);
RECORD(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS);
RECORD(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES);
RECORD(DELETE_EXPRS_TO_ANALYZE);
RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
RECORD(PP_CONDITIONAL_STACK);
// SourceManager Block.
BLOCK(SOURCE_MANAGER_BLOCK);
RECORD(SM_SLOC_FILE_ENTRY);
RECORD(SM_SLOC_BUFFER_ENTRY);
RECORD(SM_SLOC_BUFFER_BLOB);
RECORD(SM_SLOC_BUFFER_BLOB_COMPRESSED);
RECORD(SM_SLOC_EXPANSION_ENTRY);
// Preprocessor Block.
BLOCK(PREPROCESSOR_BLOCK);
RECORD(PP_MACRO_DIRECTIVE_HISTORY);
RECORD(PP_MACRO_FUNCTION_LIKE);
RECORD(PP_MACRO_OBJECT_LIKE);
RECORD(PP_MODULE_MACRO);
RECORD(PP_TOKEN);
// Submodule Block.
BLOCK(SUBMODULE_BLOCK);
RECORD(SUBMODULE_METADATA);
RECORD(SUBMODULE_DEFINITION);
RECORD(SUBMODULE_UMBRELLA_HEADER);
RECORD(SUBMODULE_HEADER);
RECORD(SUBMODULE_TOPHEADER);
RECORD(SUBMODULE_UMBRELLA_DIR);
RECORD(SUBMODULE_IMPORTS);
RECORD(SUBMODULE_EXPORTS);
RECORD(SUBMODULE_REQUIRES);
RECORD(SUBMODULE_EXCLUDED_HEADER);
RECORD(SUBMODULE_LINK_LIBRARY);
RECORD(SUBMODULE_CONFIG_MACRO);
RECORD(SUBMODULE_CONFLICT);
RECORD(SUBMODULE_PRIVATE_HEADER);
RECORD(SUBMODULE_TEXTUAL_HEADER);
RECORD(SUBMODULE_PRIVATE_TEXTUAL_HEADER);
RECORD(SUBMODULE_INITIALIZERS);
// Comments Block.
BLOCK(COMMENTS_BLOCK);
RECORD(COMMENTS_RAW_COMMENT);
// Decls and Types block.
BLOCK(DECLTYPES_BLOCK);
RECORD(TYPE_EXT_QUAL);
RECORD(TYPE_COMPLEX);
RECORD(TYPE_POINTER);
RECORD(TYPE_BLOCK_POINTER);
RECORD(TYPE_LVALUE_REFERENCE);
RECORD(TYPE_RVALUE_REFERENCE);
RECORD(TYPE_MEMBER_POINTER);
RECORD(TYPE_CONSTANT_ARRAY);
RECORD(TYPE_INCOMPLETE_ARRAY);
RECORD(TYPE_VARIABLE_ARRAY);
RECORD(TYPE_VECTOR);
RECORD(TYPE_EXT_VECTOR);
RECORD(TYPE_FUNCTION_NO_PROTO);
RECORD(TYPE_FUNCTION_PROTO);
RECORD(TYPE_TYPEDEF);
RECORD(TYPE_TYPEOF_EXPR);
RECORD(TYPE_TYPEOF);
RECORD(TYPE_RECORD);
RECORD(TYPE_ENUM);
RECORD(TYPE_OBJC_INTERFACE);
RECORD(TYPE_OBJC_OBJECT_POINTER);
RECORD(TYPE_DECLTYPE);
RECORD(TYPE_ELABORATED);
RECORD(TYPE_SUBST_TEMPLATE_TYPE_PARM);
RECORD(TYPE_UNRESOLVED_USING);
RECORD(TYPE_INJECTED_CLASS_NAME);
RECORD(TYPE_OBJC_OBJECT);
RECORD(TYPE_TEMPLATE_TYPE_PARM);
RECORD(TYPE_TEMPLATE_SPECIALIZATION);
RECORD(TYPE_DEPENDENT_NAME);
RECORD(TYPE_DEPENDENT_TEMPLATE_SPECIALIZATION);
RECORD(TYPE_DEPENDENT_SIZED_ARRAY);
RECORD(TYPE_PAREN);
RECORD(TYPE_PACK_EXPANSION);
RECORD(TYPE_ATTRIBUTED);
RECORD(TYPE_SUBST_TEMPLATE_TYPE_PARM_PACK);
RECORD(TYPE_AUTO);
RECORD(TYPE_UNARY_TRANSFORM);
RECORD(TYPE_ATOMIC);
RECORD(TYPE_DECAYED);
RECORD(TYPE_ADJUSTED);
RECORD(TYPE_OBJC_TYPE_PARAM);
RECORD(LOCAL_REDECLARATIONS);
RECORD(DECL_TYPEDEF);
RECORD(DECL_TYPEALIAS);
RECORD(DECL_ENUM);
RECORD(DECL_RECORD);
RECORD(DECL_ENUM_CONSTANT);
RECORD(DECL_FUNCTION);
RECORD(DECL_OBJC_METHOD);
RECORD(DECL_OBJC_INTERFACE);
RECORD(DECL_OBJC_PROTOCOL);
RECORD(DECL_OBJC_IVAR);
RECORD(DECL_OBJC_AT_DEFS_FIELD);
RECORD(DECL_OBJC_CATEGORY);
RECORD(DECL_OBJC_CATEGORY_IMPL);
RECORD(DECL_OBJC_IMPLEMENTATION);
RECORD(DECL_OBJC_COMPATIBLE_ALIAS);
RECORD(DECL_OBJC_PROPERTY);
RECORD(DECL_OBJC_PROPERTY_IMPL);
RECORD(DECL_FIELD);
RECORD(DECL_MS_PROPERTY);
RECORD(DECL_VAR);
RECORD(DECL_IMPLICIT_PARAM);
RECORD(DECL_PARM_VAR);
RECORD(DECL_FILE_SCOPE_ASM);
RECORD(DECL_BLOCK);
RECORD(DECL_CONTEXT_LEXICAL);
RECORD(DECL_CONTEXT_VISIBLE);
RECORD(DECL_NAMESPACE);
RECORD(DECL_NAMESPACE_ALIAS);
RECORD(DECL_USING);
RECORD(DECL_USING_SHADOW);
RECORD(DECL_USING_DIRECTIVE);
RECORD(DECL_UNRESOLVED_USING_VALUE);
RECORD(DECL_UNRESOLVED_USING_TYPENAME);
RECORD(DECL_LINKAGE_SPEC);
RECORD(DECL_CXX_RECORD);
RECORD(DECL_CXX_METHOD);
RECORD(DECL_CXX_CONSTRUCTOR);
RECORD(DECL_CXX_INHERITED_CONSTRUCTOR);
RECORD(DECL_CXX_DESTRUCTOR);
RECORD(DECL_CXX_CONVERSION);
RECORD(DECL_ACCESS_SPEC);
RECORD(DECL_FRIEND);
RECORD(DECL_FRIEND_TEMPLATE);
RECORD(DECL_CLASS_TEMPLATE);
RECORD(DECL_CLASS_TEMPLATE_SPECIALIZATION);
RECORD(DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION);
RECORD(DECL_VAR_TEMPLATE);
RECORD(DECL_VAR_TEMPLATE_SPECIALIZATION);
RECORD(DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION);
RECORD(DECL_FUNCTION_TEMPLATE);
RECORD(DECL_TEMPLATE_TYPE_PARM);
RECORD(DECL_NON_TYPE_TEMPLATE_PARM);
RECORD(DECL_TEMPLATE_TEMPLATE_PARM);
RECORD(DECL_TYPE_ALIAS_TEMPLATE);
RECORD(DECL_STATIC_ASSERT);
RECORD(DECL_CXX_BASE_SPECIFIERS);
RECORD(DECL_CXX_CTOR_INITIALIZERS);
RECORD(DECL_INDIRECTFIELD);
RECORD(DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK);
RECORD(DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK);
RECORD(DECL_CLASS_SCOPE_FUNCTION_SPECIALIZATION);
RECORD(DECL_IMPORT);
RECORD(DECL_OMP_THREADPRIVATE);
RECORD(DECL_EMPTY);
RECORD(DECL_OBJC_TYPE_PARAM);
RECORD(DECL_OMP_CAPTUREDEXPR);
RECORD(DECL_PRAGMA_COMMENT);
RECORD(DECL_PRAGMA_DETECT_MISMATCH);
RECORD(DECL_OMP_DECLARE_REDUCTION);
// Statements and Exprs can occur in the Decls and Types block.
AddStmtsExprs(Stream, Record);
BLOCK(PREPROCESSOR_DETAIL_BLOCK);
RECORD(PPD_MACRO_EXPANSION);
RECORD(PPD_MACRO_DEFINITION);
RECORD(PPD_INCLUSION_DIRECTIVE);
// Decls and Types block.
BLOCK(EXTENSION_BLOCK);
RECORD(EXTENSION_METADATA);
BLOCK(UNHASHED_CONTROL_BLOCK);
RECORD(SIGNATURE);
RECORD(DIAGNOSTIC_OPTIONS);
RECORD(DIAG_PRAGMA_MAPPINGS);
#undef RECORD
#undef BLOCK
Stream.ExitBlock();
}
/// \brief Prepares a path for being written to an AST file by converting it
/// to an absolute path and removing nested './'s.
///
/// \return \c true if the path was changed.
static bool cleanPathForOutput(FileManager &FileMgr,
SmallVectorImpl<char> &Path) {
bool Changed = FileMgr.makeAbsolutePath(Path);
return Changed | llvm::sys::path::remove_dots(Path);
}
/// \brief Adjusts the given filename to only write out the portion of the
/// filename that is not part of the system root directory.
///
/// \param Filename the file name to adjust.
///
/// \param BaseDir When non-NULL, the PCH file is a relocatable AST file and
/// the returned filename will be adjusted by this root directory.
///
/// \returns either the original filename (if it needs no adjustment) or the
/// adjusted filename (which points into the @p Filename parameter).
static const char *
adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) {
assert(Filename && "No file name to adjust?");
if (BaseDir.empty())
return Filename;
// Verify that the filename and the system root have the same prefix.
unsigned Pos = 0;
for (; Filename[Pos] && Pos < BaseDir.size(); ++Pos)
if (Filename[Pos] != BaseDir[Pos])
return Filename; // Prefixes don't match.
// We hit the end of the filename before we hit the end of the system root.
if (!Filename[Pos])
return Filename;
// If there's not a path separator at the end of the base directory nor
// immediately after it, then this isn't within the base directory.
if (!llvm::sys::path::is_separator(Filename[Pos])) {
if (!llvm::sys::path::is_separator(BaseDir.back()))
return Filename;
} else {
// If the file name has a '/' at the current position, skip over the '/'.
// We distinguish relative paths from absolute paths by the
// absence of '/' at the beginning of relative paths.
//
// FIXME: This is wrong. We distinguish them by asking if the path is
// absolute, which isn't the same thing. And there might be multiple '/'s
// in a row. Use a better mechanism to indicate whether we have emitted an
// absolute or relative path.
++Pos;
}
return Filename + Pos;
}
ASTFileSignature ASTWriter::createSignature(StringRef Bytes) {
// Calculate the hash till start of UNHASHED_CONTROL_BLOCK.
llvm::SHA1 Hasher;
Hasher.update(ArrayRef<uint8_t>(Bytes.bytes_begin(), Bytes.size()));
auto Hash = Hasher.result();
// Convert to an array [5*i32].
ASTFileSignature Signature;
auto LShift = [&](unsigned char Val, unsigned Shift) {
return (uint32_t)Val << Shift;
};
for (int I = 0; I != 5; ++I)
Signature[I] = LShift(Hash[I * 4 + 0], 24) | LShift(Hash[I * 4 + 1], 16) |
LShift(Hash[I * 4 + 2], 8) | LShift(Hash[I * 4 + 3], 0);
return Signature;
}
ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
ASTContext &Context) {
// Flush first to prepare the PCM hash (signature).
Stream.FlushToWord();
auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3;
// Enter the block and prepare to write records.
RecordData Record;
Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);
// For implicit modules, write the hash of the PCM as its signature.
ASTFileSignature Signature;
if (WritingModule &&
PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
Signature = createSignature(StringRef(Buffer.begin(), StartOfUnhashedControl));
Record.append(Signature.begin(), Signature.end());
Stream.EmitRecord(SIGNATURE, Record);
Record.clear();
}
// Diagnostic options.
const auto &Diags = Context.getDiagnostics();
const DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions();
#define DIAGOPT(Name, Bits, Default) Record.push_back(DiagOpts.Name);
#define ENUM_DIAGOPT(Name, Type, Bits, Default) \
Record.push_back(static_cast<unsigned>(DiagOpts.get##Name()));
#include "clang/Basic/DiagnosticOptions.def"
Record.push_back(DiagOpts.Warnings.size());
for (unsigned I = 0, N = DiagOpts.Warnings.size(); I != N; ++I)
AddString(DiagOpts.Warnings[I], Record);
Record.push_back(DiagOpts.Remarks.size());
for (unsigned I = 0, N = DiagOpts.Remarks.size(); I != N; ++I)
AddString(DiagOpts.Remarks[I], Record);
// Note: we don't serialize the log or serialization file names, because they
// are generally transient files and will almost always be overridden.
Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record);
// Write out the diagnostic/pragma mappings.
WritePragmaDiagnosticMappings(Diags, /* IsModule = */ WritingModule);
// Leave the options block.
Stream.ExitBlock();
return Signature;
}
/// \brief Write the control block.
void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
StringRef isysroot,
const std::string &OutputFile) {
using namespace llvm;
Stream.EnterSubblock(CONTROL_BLOCK_ID, 5);
RecordData Record;
// Metadata
auto MetadataAbbrev = std::make_shared<BitCodeAbbrev>();
MetadataAbbrev->Add(BitCodeAbbrevOp(METADATA));
MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Major
MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Minor
MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Clang maj.
MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Clang min.
MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Relocatable
MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Timestamps
MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Errors
MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // SVN branch/tag
unsigned MetadataAbbrevCode = Stream.EmitAbbrev(std::move(MetadataAbbrev));
assert((!WritingModule || isysroot.empty()) &&
"writing module as a relocatable PCH?");
{
RecordData::value_type Record[] = {METADATA, VERSION_MAJOR, VERSION_MINOR,
CLANG_VERSION_MAJOR, CLANG_VERSION_MINOR,
!isysroot.empty(), IncludeTimestamps,
ASTHasCompilerErrors};
Stream.EmitRecordWithBlob(MetadataAbbrevCode, Record,
getClangFullRepositoryVersion());
}
if (WritingModule) {
// Module name
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(MODULE_NAME));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
RecordData::value_type Record[] = {MODULE_NAME};
Stream.EmitRecordWithBlob(AbbrevCode, Record, WritingModule->Name);
}
if (WritingModule && WritingModule->Directory) {
SmallString<128> BaseDir(WritingModule->Directory->getName());
cleanPathForOutput(Context.getSourceManager().getFileManager(), BaseDir);
// If the home of the module is the current working directory, then we
// want to pick up the cwd of the build process loading the module, not
// our cwd, when we load this module.
if (!PP.getHeaderSearchInfo()
.getHeaderSearchOpts()
.ModuleMapFileHomeIsCwd ||
WritingModule->Directory->getName() != StringRef(".")) {
// Module directory.
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory
unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
RecordData::value_type Record[] = {MODULE_DIRECTORY};
Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDir);
}
// Write out all other paths relative to the base directory if possible.
BaseDirectory.assign(BaseDir.begin(), BaseDir.end());
} else if (!isysroot.empty()) {
// Write out paths relative to the sysroot if possible.
BaseDirectory = isysroot;
}
// Module map file
if (WritingModule && WritingModule->Kind == Module::ModuleMapModule) {
Record.clear();
auto &Map = PP.getHeaderSearchInfo().getModuleMap();
AddPath(WritingModule->PresumedModuleMapFile.empty()
? Map.getModuleMapFileForUniquing(WritingModule)->getName()
: StringRef(WritingModule->PresumedModuleMapFile),
Record);
// Additional module map files.
if (auto *AdditionalModMaps =
Map.getAdditionalModuleMapFiles(WritingModule)) {
Record.push_back(AdditionalModMaps->size());
for (const FileEntry *F : *AdditionalModMaps)
AddPath(F->getName(), Record);
} else {
Record.push_back(0);
}
Stream.EmitRecord(MODULE_MAP_FILE, Record);
}
// Imports
if (Chain) {
serialization::ModuleManager &Mgr = Chain->getModuleManager();
Record.clear();
for (ModuleFile &M : Mgr) {
// Skip modules that weren't directly imported.
if (!M.isDirectlyImported())
continue;
Record.push_back((unsigned)M.Kind); // FIXME: Stable encoding
AddSourceLocation(M.ImportLoc, Record);
// If we have calculated signature, there is no need to store
// the size or timestamp.
Record.push_back(M.Signature ? 0 : M.File->getSize());
Record.push_back(M.Signature ? 0 : getTimestampForOutput(M.File));
for (auto I : M.Signature)
Record.push_back(I);
AddPath(M.FileName, Record);
}
Stream.EmitRecord(IMPORTS, Record);
}
// Write the options block.
Stream.EnterSubblock(OPTIONS_BLOCK_ID, 4);
// Language options.
Record.clear();
const LangOptions &LangOpts = Context.getLangOpts();
#define LANGOPT(Name, Bits, Default, Description) \
Record.push_back(LangOpts.Name);
#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
Record.push_back(static_cast<unsigned>(LangOpts.get##Name()));
#include "clang/Basic/LangOptions.def"
#define SANITIZER(NAME, ID) \
Record.push_back(LangOpts.Sanitize.has(SanitizerKind::ID));
#include "clang/Basic/Sanitizers.def"
Record.push_back(LangOpts.ModuleFeatures.size());
for (StringRef Feature : LangOpts.ModuleFeatures)
AddString(Feature, Record);
Record.push_back((unsigned) LangOpts.ObjCRuntime.getKind());
AddVersionTuple(LangOpts.ObjCRuntime.getVersion(), Record);
AddString(LangOpts.CurrentModule, Record);
// Comment options.
Record.push_back(LangOpts.CommentOpts.BlockCommandNames.size());
for (const auto &I : LangOpts.CommentOpts.BlockCommandNames) {
AddString(I, Record);
}
Record.push_back(LangOpts.CommentOpts.ParseAllComments);
// OpenMP offloading options.
Record.push_back(LangOpts.OMPTargetTriples.size());
for (auto &T : LangOpts.OMPTargetTriples)
AddString(T.getTriple(), Record);
AddString(LangOpts.OMPHostIRFile, Record);
Stream.EmitRecord(LANGUAGE_OPTIONS, Record);
// Target options.
Record.clear();
const TargetInfo &Target = Context.getTargetInfo();
const TargetOptions &TargetOpts = Target.getTargetOpts();
AddString(TargetOpts.Triple, Record);
AddString(TargetOpts.CPU, Record);
AddString(TargetOpts.ABI, Record);
Record.push_back(TargetOpts.FeaturesAsWritten.size());
for (unsigned I = 0, N = TargetOpts.FeaturesAsWritten.size(); I != N; ++I) {
AddString(TargetOpts.FeaturesAsWritten[I], Record);
}
Record.push_back(TargetOpts.Features.size());
for (unsigned I = 0, N = TargetOpts.Features.size(); I != N; ++I) {
AddString(TargetOpts.Features[I], Record);
}
Stream.EmitRecord(TARGET_OPTIONS, Record);
// File system options.
Record.clear();
const FileSystemOptions &FSOpts =
Context.getSourceManager().getFileManager().getFileSystemOpts();
AddString(FSOpts.WorkingDir, Record);
Stream.EmitRecord(FILE_SYSTEM_OPTIONS, Record);
// Header search options.
Record.clear();
const HeaderSearchOptions &HSOpts
= PP.getHeaderSearchInfo().getHeaderSearchOpts();
AddString(HSOpts.Sysroot, Record);
// Include entries.
Record.push_back(HSOpts.UserEntries.size());
for (unsigned I = 0, N = HSOpts.UserEntries.size(); I != N; ++I) {
const HeaderSearchOptions::Entry &Entry = HSOpts.UserEntries[I];
AddString(Entry.Path, Record);
Record.push_back(static_cast<unsigned>(Entry.Group));
Record.push_back(Entry.IsFramework);
Record.push_back(Entry.IgnoreSysRoot);
}
// System header prefixes.
Record.push_back(HSOpts.SystemHeaderPrefixes.size());
for (unsigned I = 0, N = HSOpts.SystemHeaderPrefixes.size(); I != N; ++I) {
AddString(HSOpts.SystemHeaderPrefixes[I].Prefix, Record);
Record.push_back(HSOpts.SystemHeaderPrefixes[I].IsSystemHeader);
}
AddString(HSOpts.ResourceDir, Record);
AddString(HSOpts.ModuleCachePath, Record);
AddString(HSOpts.ModuleUserBuildPath, Record);
Record.push_back(HSOpts.DisableModuleHash);
Record.push_back(HSOpts.ImplicitModuleMaps);
Record.push_back(HSOpts.ModuleMapFileHomeIsCwd);
Record.push_back(HSOpts.UseBuiltinIncludes);
Record.push_back(HSOpts.UseStandardSystemIncludes);
Record.push_back(HSOpts.UseStandardCXXIncludes);
Record.push_back(HSOpts.UseLibcxx);
// Write out the specific module cache path that contains the module files.
AddString(PP.getHeaderSearchInfo().getModuleCachePath(), Record);
Stream.EmitRecord(HEADER_SEARCH_OPTIONS, Record);
// Preprocessor options.
Record.clear();
const PreprocessorOptions &PPOpts = PP.getPreprocessorOpts();
// Macro definitions.
Record.push_back(PPOpts.Macros.size());
for (unsigned I = 0, N = PPOpts.Macros.size(); I != N; ++I) {
AddString(PPOpts.Macros[I].first, Record);
Record.push_back(PPOpts.Macros[I].second);
}
// Includes
Record.push_back(PPOpts.Includes.size());
for (unsigned I = 0, N = PPOpts.Includes.size(); I != N; ++I)
AddString(PPOpts.Includes[I], Record);
// Macro includes
Record.push_back(PPOpts.MacroIncludes.size());
for (unsigned I = 0, N = PPOpts.MacroIncludes.size(); I != N; ++I)
AddString(PPOpts.MacroIncludes[I], Record);
Record.push_back(PPOpts.UsePredefines);
// Detailed record is important since it is used for the module cache hash.
Record.push_back(PPOpts.DetailedRecord);
AddString(PPOpts.ImplicitPCHInclude, Record);
AddString(PPOpts.ImplicitPTHInclude, Record);
Record.push_back(static_cast<unsigned>(PPOpts.ObjCXXARCStandardLibrary));
Stream.EmitRecord(PREPROCESSOR_OPTIONS, Record);
// Leave the options block.
Stream.ExitBlock();
// Original file name and file ID
SourceManager &SM = Context.getSourceManager();
if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
auto FileAbbrev = std::make_shared<BitCodeAbbrev>();
FileAbbrev->Add(BitCodeAbbrevOp(ORIGINAL_FILE));
FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File ID
FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
unsigned FileAbbrevCode = Stream.EmitAbbrev(std::move(FileAbbrev));
Record.clear();
Record.push_back(ORIGINAL_FILE);
Record.push_back(SM.getMainFileID().getOpaqueValue());
EmitRecordWithPath(FileAbbrevCode, Record, MainFile->getName());
}
Record.clear();
Record.push_back(SM.getMainFileID().getOpaqueValue());
Stream.EmitRecord(ORIGINAL_FILE_ID, Record);
// Original PCH directory
if (!OutputFile.empty() && OutputFile != "-") {
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(ORIGINAL_PCH_DIR));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
SmallString<128> OutputPath(OutputFile);
SM.getFileManager().makeAbsolutePath(OutputPath);
StringRef origDir = llvm::sys::path::parent_path(OutputPath);
RecordData::value_type Record[] = {ORIGINAL_PCH_DIR};
Stream.EmitRecordWithBlob(AbbrevCode, Record, origDir);
}
WriteInputFiles(Context.SourceMgr,
PP.getHeaderSearchInfo().getHeaderSearchOpts(),
PP.getLangOpts().Modules);
Stream.ExitBlock();
}
namespace {
/// \brief An input file.
struct InputFileEntry {
const FileEntry *File;
bool IsSystemFile;
bool IsTransient;
bool BufferOverridden;
bool IsTopLevelModuleMap;
};
} // end anonymous namespace
void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
HeaderSearchOptions &HSOpts,
bool Modules) {
using namespace llvm;
Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4);
// Create input-file abbreviation.
auto IFAbbrev = std::make_shared<BitCodeAbbrev>();
IFAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE));
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); // Size
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev));
// Get all ContentCache objects for files, sorted by whether the file is a
// system one or not. System files go at the back, users files at the front.
std::deque<InputFileEntry> SortedFiles;
for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size(); I != N; ++I) {
// Get this source location entry.
const SrcMgr::SLocEntry *SLoc = &SourceMgr.getLocalSLocEntry(I);
assert(&SourceMgr.getSLocEntry(FileID::get(I)) == SLoc);
// We only care about file entries that were not overridden.
if (!SLoc->isFile())
continue;
const SrcMgr::FileInfo &File = SLoc->getFile();
const SrcMgr::ContentCache *Cache = File.getContentCache();
if (!Cache->OrigEntry)
continue;
InputFileEntry Entry;
Entry.File = Cache->OrigEntry;
Entry.IsSystemFile = Cache->IsSystemFile;
Entry.IsTransient = Cache->IsTransient;
Entry.BufferOverridden = Cache->BufferOverridden;
Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) &&
File.getIncludeLoc().isInvalid();
if (Cache->IsSystemFile)
SortedFiles.push_back(Entry);
else
SortedFiles.push_front(Entry);
}
unsigned UserFilesNum = 0;
// Write out all of the input files.
std::vector<uint64_t> InputFileOffsets;
for (const auto &Entry : SortedFiles) {
uint32_t &InputFileID = InputFileIDs[Entry.File];
if (InputFileID != 0)
continue; // already recorded this file.
// Record this entry's offset.
InputFileOffsets.push_back(Stream.GetCurrentBitNo());
InputFileID = InputFileOffsets.size();
if (!Entry.IsSystemFile)
++UserFilesNum;
// Emit size/modification time for this file.
// And whether this file was overridden.
RecordData::value_type Record[] = {
INPUT_FILE,
InputFileOffsets.size(),
(uint64_t)Entry.File->getSize(),
(uint64_t)getTimestampForOutput(Entry.File),
Entry.BufferOverridden,
Entry.IsTransient,
Entry.IsTopLevelModuleMap};
EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName());
}
Stream.ExitBlock();
// Create input file offsets abbreviation.
auto OffsetsAbbrev = std::make_shared<BitCodeAbbrev>();
OffsetsAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE_OFFSETS));
OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # input files
OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # non-system
// input files
OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Array
unsigned OffsetsAbbrevCode = Stream.EmitAbbrev(std::move(OffsetsAbbrev));
// Write input file offsets.
RecordData::value_type Record[] = {INPUT_FILE_OFFSETS,
InputFileOffsets.size(), UserFilesNum};
Stream.EmitRecordWithBlob(OffsetsAbbrevCode, Record, bytes(InputFileOffsets));
}
//===----------------------------------------------------------------------===//
// Source Manager Serialization
//===----------------------------------------------------------------------===//
/// \brief Create an abbreviation for the SLocEntry that refers to a
/// file.
static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) {
using namespace llvm;
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_FILE_ENTRY));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Characteristic
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Line directives
// FileEntry fields.
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Input File ID
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumCreatedFIDs
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 24)); // FirstDeclIndex
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumDecls
return Stream.EmitAbbrev(std::move(Abbrev));
}
/// \brief Create an abbreviation for the SLocEntry that refers to a
/// buffer.
static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) {
using namespace llvm;
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_ENTRY));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Characteristic
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Line directives
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Buffer name blob
return Stream.EmitAbbrev(std::move(Abbrev));
}
/// \brief Create an abbreviation for the SLocEntry that refers to a
/// buffer's blob.
static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream,
bool Compressed) {
using namespace llvm;
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(Compressed ? SM_SLOC_BUFFER_BLOB_COMPRESSED
: SM_SLOC_BUFFER_BLOB));
if (Compressed)
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Uncompressed size
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob
return Stream.EmitAbbrev(std::move(Abbrev));
}
/// \brief Create an abbreviation for the SLocEntry that refers to a macro
/// expansion.
static unsigned CreateSLocExpansionAbbrev(llvm::BitstreamWriter &Stream) {
using namespace llvm;
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_EXPANSION_ENTRY));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Spelling location
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Start location
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // End location
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Token length
return Stream.EmitAbbrev(std::move(Abbrev));
}
namespace {
// Trait used for the on-disk hash table of header search information.
class HeaderFileInfoTrait {
ASTWriter &Writer;
// Keep track of the framework names we've used during serialization.
SmallVector<char, 128> FrameworkStringData;
llvm::StringMap<unsigned> FrameworkNameOffset;
public:
HeaderFileInfoTrait(ASTWriter &Writer) : Writer(Writer) {}
struct key_type {
StringRef Filename;
off_t Size;
time_t ModTime;
};
typedef const key_type &key_type_ref;
using UnresolvedModule =
llvm::PointerIntPair<Module *, 2, ModuleMap::ModuleHeaderRole>;
struct data_type {
const HeaderFileInfo &HFI;
ArrayRef<ModuleMap::KnownHeader> KnownHeaders;
UnresolvedModule Unresolved;
};
typedef const data_type &data_type_ref;
typedef unsigned hash_value_type;
typedef unsigned offset_type;
hash_value_type ComputeHash(key_type_ref key) {
// The hash is based only on size/time of the file, so that the reader can
// match even when symlinking or excess path elements ("foo/../", "../")
// change the form of the name. However, complete path is still the key.
return llvm::hash_combine(key.Size, key.ModTime);
}
std::pair<unsigned,unsigned>
EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
LE.write<uint16_t>(KeyLen);
unsigned DataLen = 1 + 2 + 4 + 4;
for (auto ModInfo : Data.KnownHeaders)
if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
DataLen += 4;
if (Data.Unresolved.getPointer())
DataLen += 4;
LE.write<uint8_t>(DataLen);
return std::make_pair(KeyLen, DataLen);
}
void EmitKey(raw_ostream& Out, key_type_ref key, unsigned KeyLen) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
LE.write<uint64_t>(key.Size);
KeyLen -= 8;
LE.write<uint64_t>(key.ModTime);
KeyLen -= 8;
Out.write(key.Filename.data(), KeyLen);
}
void EmitData(raw_ostream &Out, key_type_ref key,
data_type_ref Data, unsigned DataLen) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
uint64_t Start = Out.tell(); (void)Start;
unsigned char Flags = (Data.HFI.isImport << 5)
| (Data.HFI.isPragmaOnce << 4)
| (Data.HFI.DirInfo << 1)
| Data.HFI.IndexHeaderMapHeader;
LE.write<uint8_t>(Flags);
LE.write<uint16_t>(Data.HFI.NumIncludes);
if (!Data.HFI.ControllingMacro)
LE.write<uint32_t>(Data.HFI.ControllingMacroID);
else
LE.write<uint32_t>(Writer.getIdentifierRef(Data.HFI.ControllingMacro));
unsigned Offset = 0;
if (!Data.HFI.Framework.empty()) {
// If this header refers into a framework, save the framework name.
llvm::StringMap<unsigned>::iterator Pos
= FrameworkNameOffset.find(Data.HFI.Framework);
if (Pos == FrameworkNameOffset.end()) {
Offset = FrameworkStringData.size() + 1;
FrameworkStringData.append(Data.HFI.Framework.begin(),
Data.HFI.Framework.end());
FrameworkStringData.push_back(0);
FrameworkNameOffset[Data.HFI.Framework] = Offset;
} else
Offset = Pos->second;
}
LE.write<uint32_t>(Offset);
auto EmitModule = [&](Module *M, ModuleMap::ModuleHeaderRole Role) {
if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) {
uint32_t Value = (ModID << 2) | (unsigned)Role;
assert((Value >> 2) == ModID && "overflow in header module info");
LE.write<uint32_t>(Value);
}
};
// FIXME: If the header is excluded, we should write out some
// record of that fact.
for (auto ModInfo : Data.KnownHeaders)
EmitModule(ModInfo.getModule(), ModInfo.getRole());
if (Data.Unresolved.getPointer())
EmitModule(Data.Unresolved.getPointer(), Data.Unresolved.getInt());
assert(Out.tell() - Start == DataLen && "Wrong data length");
}
const char *strings_begin() const { return FrameworkStringData.begin(); }
const char *strings_end() const { return FrameworkStringData.end(); }
};
} // end anonymous namespace
/// \brief Write the header search block for the list of files that
///
/// \param HS The header search structure to save.
void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
HeaderFileInfoTrait GeneratorTrait(*this);
llvm::OnDiskChainedHashTableGenerator<HeaderFileInfoTrait> Generator;
SmallVector<const char *, 4> SavedStrings;
unsigned NumHeaderSearchEntries = 0;
// Find all unresolved headers for the current module. We generally will
// have resolved them before we get here, but not necessarily: we might be
// compiling a preprocessed module, where there is no requirement for the
// original files to exist any more.
const HeaderFileInfo Empty; // So we can take a reference.
if (WritingModule) {
llvm::SmallVector<Module *, 16> Worklist(1, WritingModule);
while (!Worklist.empty()) {
Module *M = Worklist.pop_back_val();
if (!M->isAvailable())
continue;
// Map to disk files where possible, to pick up any missing stat
// information. This also means we don't need to check the unresolved
// headers list when emitting resolved headers in the first loop below.
// FIXME: It'd be preferable to avoid doing this if we were given
// sufficient stat information in the module map.
HS.getModuleMap().resolveHeaderDirectives(M);
// If the file didn't exist, we can still create a module if we were given
// enough information in the module map.
for (auto U : M->MissingHeaders) {
// Check that we were given enough information to build a module
// without this file existing on disk.
if (!U.Size || (!U.ModTime && IncludeTimestamps)) {
PP->Diag(U.FileNameLoc, diag::err_module_no_size_mtime_for_header)
<< WritingModule->getFullModuleName() << U.Size.hasValue()
<< U.FileName;
continue;
}
// Form the effective relative pathname for the file.
SmallString<128> Filename(M->Directory->getName());
llvm::sys::path::append(Filename, U.FileName);
PreparePathForOutput(Filename);
StringRef FilenameDup = strdup(Filename.c_str());
SavedStrings.push_back(FilenameDup.data());
HeaderFileInfoTrait::key_type Key = {
FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0
};
HeaderFileInfoTrait::data_type Data = {
Empty, {}, {M, ModuleMap::headerKindToRole(U.Kind)}
};
// FIXME: Deal with cases where there are multiple unresolved header
// directives in different submodules for the same header.
Generator.insert(Key, Data, GeneratorTrait);
++NumHeaderSearchEntries;
}
Worklist.append(M->submodule_begin(), M->submodule_end());
}
}
SmallVector<const FileEntry *, 16> FilesByUID;
HS.getFileMgr().GetUniqueIDMapping(FilesByUID);
if (FilesByUID.size() > HS.header_file_size())
FilesByUID.resize(HS.header_file_size());
for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) {
const FileEntry *File = FilesByUID[UID];
if (!File)
continue;
// Get the file info. This will load info from the external source if
// necessary. Skip emitting this file if we have no information on it
// as a header file (in which case HFI will be null) or if it hasn't
// changed since it was loaded. Also skip it if it's for a modular header
// from a different module; in that case, we rely on the module(s)
// containing the header to provide this information.
const HeaderFileInfo *HFI =
HS.getExistingFileInfo(File, /*WantExternal*/!Chain);
if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader))
continue;
// Massage the file path into an appropriate form.
StringRef Filename = File->getName();
SmallString<128> FilenameTmp(Filename);
if (PreparePathForOutput(FilenameTmp)) {
// If we performed any translation on the file name at all, we need to
// save this string, since the generator will refer to it later.
Filename = StringRef(strdup(FilenameTmp.c_str()));
SavedStrings.push_back(Filename.data());
}
HeaderFileInfoTrait::key_type Key = {
Filename, File->getSize(), getTimestampForOutput(File)
};
HeaderFileInfoTrait::data_type Data = {
*HFI, HS.getModuleMap().findAllModulesForHeader(File), {}
};
Generator.insert(Key, Data, GeneratorTrait);
++NumHeaderSearchEntries;
}
// Create the on-disk hash table in a buffer.
SmallString<4096> TableData;
uint32_t BucketOffset;
{
using namespace llvm::support;
llvm::raw_svector_ostream Out(TableData);
// Make sure that no bucket is at offset 0
endian::Writer<little>(Out).write<uint32_t>(0);
BucketOffset = Generator.Emit(Out, GeneratorTrait);
}
// Create a blob abbreviation
using namespace llvm;
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_TABLE));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned TableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
// Write the header search table
RecordData::value_type Record[] = {HEADER_SEARCH_TABLE, BucketOffset,
NumHeaderSearchEntries, TableData.size()};
TableData.append(GeneratorTrait.strings_begin(),GeneratorTrait.strings_end());
Stream.EmitRecordWithBlob(TableAbbrev, Record, TableData);
// Free all of the strings we had to duplicate.
for (unsigned I = 0, N = SavedStrings.size(); I != N; ++I)
free(const_cast<char *>(SavedStrings[I]));
}
static void emitBlob(llvm::BitstreamWriter &Stream, StringRef Blob,
unsigned SLocBufferBlobCompressedAbbrv,
unsigned SLocBufferBlobAbbrv) {
typedef ASTWriter::RecordData::value_type RecordDataType;
// Compress the buffer if possible. We expect that almost all PCM
// consumers will not want its contents.
SmallString<0> CompressedBuffer;
if (llvm::zlib::isAvailable()) {
llvm::Error E = llvm::zlib::compress(Blob.drop_back(1), CompressedBuffer);
if (!E) {
RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED,
Blob.size() - 1};
Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record,
CompressedBuffer);
return;
}
llvm::consumeError(std::move(E));
}
RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB};
Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, Blob);
}
/// \brief Writes the block containing the serialized form of the
/// source manager.
///
/// TODO: We should probably use an on-disk hash table (stored in a
/// blob), indexed based on the file name, so that we only create
/// entries for files that we actually need. In the common case (no
/// errors), we probably won't have to create file entries for any of
/// the files in the AST.
void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
const Preprocessor &PP) {
RecordData Record;
// Enter the source manager block.
Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 4);
// Abbreviations for the various kinds of source-location entries.
unsigned SLocFileAbbrv = CreateSLocFileAbbrev(Stream);
unsigned SLocBufferAbbrv = CreateSLocBufferAbbrev(Stream);
unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream, false);
unsigned SLocBufferBlobCompressedAbbrv =
CreateSLocBufferBlobAbbrev(Stream, true);
unsigned SLocExpansionAbbrv = CreateSLocExpansionAbbrev(Stream);
// Write out the source location entry table. We skip the first
// entry, which is always the same dummy entry.
std::vector<uint32_t> SLocEntryOffsets;
RecordData PreloadSLocs;
SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1);
for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size();
I != N; ++I) {
// Get this source location entry.
const SrcMgr::SLocEntry *SLoc = &SourceMgr.getLocalSLocEntry(I);
FileID FID = FileID::get(I);
assert(&SourceMgr.getSLocEntry(FID) == SLoc);
// Record the offset of this source-location entry.
SLocEntryOffsets.push_back(Stream.GetCurrentBitNo());
// Figure out which record code to use.
unsigned Code;
if (SLoc->isFile()) {
const SrcMgr::ContentCache *Cache = SLoc->getFile().getContentCache();
if (Cache->OrigEntry) {
Code = SM_SLOC_FILE_ENTRY;
} else
Code = SM_SLOC_BUFFER_ENTRY;
} else
Code = SM_SLOC_EXPANSION_ENTRY;
Record.clear();
Record.push_back(Code);
// Starting offset of this entry within this module, so skip the dummy.
Record.push_back(SLoc->getOffset() - 2);
if (SLoc->isFile()) {
const SrcMgr::FileInfo &File = SLoc->getFile();
AddSourceLocation(File.getIncludeLoc(), Record);
Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding
Record.push_back(File.hasLineDirectives());
const SrcMgr::ContentCache *Content = File.getContentCache();
bool EmitBlob = false;
if (Content->OrigEntry) {
assert(Content->OrigEntry == Content->ContentsEntry &&
"Writing to AST an overridden file is not supported");
// The source location entry is a file. Emit input file ID.
assert(InputFileIDs[Content->OrigEntry] != 0 && "Missed file entry");
Record.push_back(InputFileIDs[Content->OrigEntry]);
Record.push_back(File.NumCreatedFIDs);
FileDeclIDsTy::iterator FDI = FileDeclIDs.find(FID);
if (FDI != FileDeclIDs.end()) {
Record.push_back(FDI->second->FirstDeclIndex);
Record.push_back(FDI->second->DeclIDs.size());
} else {
Record.push_back(0);
Record.push_back(0);
}
Stream.EmitRecordWithAbbrev(SLocFileAbbrv, Record);
if (Content->BufferOverridden || Content->IsTransient)
EmitBlob = true;
} else {
// The source location entry is a buffer. The blob associated
// with this entry contains the contents of the buffer.
// We add one to the size so that we capture the trailing NULL
// that is required by llvm::MemoryBuffer::getMemBuffer (on
// the reader side).
const llvm::MemoryBuffer *Buffer
= Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
StringRef Name = Buffer->getBufferIdentifier();
Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record,
StringRef(Name.data(), Name.size() + 1));
EmitBlob = true;
if (Name == "<built-in>")
PreloadSLocs.push_back(SLocEntryOffsets.size());
}
if (EmitBlob) {
// Include the implicit terminating null character in the on-disk buffer
// if we're writing it uncompressed.
const llvm::MemoryBuffer *Buffer =
Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
StringRef Blob(Buffer->getBufferStart(), Buffer->getBufferSize() + 1);
emitBlob(Stream, Blob, SLocBufferBlobCompressedAbbrv,
SLocBufferBlobAbbrv);
}
} else {
// The source location entry is a macro expansion.
const SrcMgr::ExpansionInfo &Expansion = SLoc->getExpansion();
AddSourceLocation(Expansion.getSpellingLoc(), Record);
AddSourceLocation(Expansion.getExpansionLocStart(), Record);
AddSourceLocation(Expansion.isMacroArgExpansion()
? SourceLocation()
: Expansion.getExpansionLocEnd(),
Record);
// Compute the token length for this macro expansion.
unsigned NextOffset = SourceMgr.getNextLocalOffset();
if (I + 1 != N)
NextOffset = SourceMgr.getLocalSLocEntry(I + 1).getOffset();
Record.push_back(NextOffset - SLoc->getOffset() - 1);
Stream.EmitRecordWithAbbrev(SLocExpansionAbbrv, Record);
}
}
Stream.ExitBlock();
if (SLocEntryOffsets.empty())
return;
// Write the source-location offsets table into the AST block. This
// table is used for lazily loading source-location information.
using namespace llvm;
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets
unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
{
RecordData::value_type Record[] = {
SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(),
SourceMgr.getNextLocalOffset() - 1 /* skip dummy */};
Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record,
bytes(SLocEntryOffsets));
}
// Write the source location entry preloads array, telling the AST
// reader which source locations entries it should load eagerly.
Stream.EmitRecord(SOURCE_LOCATION_PRELOADS, PreloadSLocs);
// Write the line table. It depends on remapping working, so it must come
// after the source location offsets.
if (SourceMgr.hasLineTable()) {
LineTableInfo &LineTable = SourceMgr.getLineTable();
Record.clear();
// Emit the needed file names.
llvm::DenseMap<int, int> FilenameMap;
for (const auto &L : LineTable) {
if (L.first.ID < 0)
continue;
for (auto &LE : L.second) {
if (FilenameMap.insert(std::make_pair(LE.FilenameID,
FilenameMap.size())).second)
AddPath(LineTable.getFilename(LE.FilenameID), Record);
}
}
Record.push_back(0);
// Emit the line entries
for (const auto &L : LineTable) {
// Only emit entries for local files.
if (L.first.ID < 0)
continue;
// Emit the file ID
Record.push_back(L.first.ID);
// Emit the line entries
Record.push_back(L.second.size());
for (const auto &LE : L.second) {
Record.push_back(LE.FileOffset);
Record.push_back(LE.LineNo);
Record.push_back(FilenameMap[LE.FilenameID]);
Record.push_back((unsigned)LE.FileKind);
Record.push_back(LE.IncludeOffset);
}
}
Stream.EmitRecord(SOURCE_MANAGER_LINE_TABLE, Record);
}
}
//===----------------------------------------------------------------------===//
// Preprocessor Serialization
//===----------------------------------------------------------------------===//
static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
const Preprocessor &PP) {
if (MacroInfo *MI = MD->getMacroInfo())
if (MI->isBuiltinMacro())
return true;
if (IsModule) {
SourceLocation Loc = MD->getLocation();
if (Loc.isInvalid())
return true;
if (PP.getSourceManager().getFileID(Loc) == PP.getPredefinesFileID())
return true;
}
return false;
}
/// \brief Writes the block containing the serialized form of the
/// preprocessor.
///
void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
PreprocessingRecord *PPRec = PP.getPreprocessingRecord();
if (PPRec)
WritePreprocessorDetail(*PPRec);
RecordData Record;
RecordData ModuleMacroRecord;
// If the preprocessor __COUNTER__ value has been bumped, remember it.
if (PP.getCounterValue() != 0) {
RecordData::value_type Record[] = {PP.getCounterValue()};
Stream.EmitRecord(PP_COUNTER_VALUE, Record);
}
if (PP.isRecordingPreamble() && PP.hasRecordedPreamble()) {
assert(!IsModule);
for (const auto &Cond : PP.getPreambleConditionalStack()) {
AddSourceLocation(Cond.IfLoc, Record);
Record.push_back(Cond.WasSkipping);
Record.push_back(Cond.FoundNonSkip);
Record.push_back(Cond.FoundElse);
}
Stream.EmitRecord(PP_CONDITIONAL_STACK, Record);
Record.clear();
}
// Enter the preprocessor block.
Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3);
// If the AST file contains __DATE__ or __TIME__ emit a warning about this.
// FIXME: Include a location for the use, and say which one was used.
if (PP.SawDateOrTime())
PP.Diag(SourceLocation(), diag::warn_module_uses_date_time) << IsModule;
// Loop over all the macro directives that are live at the end of the file,
// emitting each to the PP section.
// Construct the list of identifiers with macro directives that need to be
// serialized.
SmallVector<const IdentifierInfo *, 128> MacroIdentifiers;
for (auto &Id : PP.getIdentifierTable())
if (Id.second->hadMacroDefinition() &&
(!Id.second->isFromAST() ||
Id.second->hasChangedSinceDeserialization()))
MacroIdentifiers.push_back(Id.second);
// Sort the set of macro definitions that need to be serialized by the
// name of the macro, to provide a stable ordering.
std::sort(MacroIdentifiers.begin(), MacroIdentifiers.end(),
llvm::less_ptr<IdentifierInfo>());
// Emit the macro directives as a list and associate the offset with the
// identifier they belong to.
for (const IdentifierInfo *Name : MacroIdentifiers) {
MacroDirective *MD = PP.getLocalMacroDirectiveHistory(Name);
auto StartOffset = Stream.GetCurrentBitNo();
// Emit the macro directives in reverse source order.
for (; MD; MD = MD->getPrevious()) {
// Once we hit an ignored macro, we're done: the rest of the chain
// will all be ignored macros.
if (shouldIgnoreMacro(MD, IsModule, PP))
break;
AddSourceLocation(MD->getLocation(), Record);
Record.push_back(MD->getKind());
if (auto *DefMD = dyn_cast<DefMacroDirective>(MD)) {
Record.push_back(getMacroRef(DefMD->getInfo(), Name));
} else if (auto *VisMD = dyn_cast<VisibilityMacroDirective>(MD)) {
Record.push_back(VisMD->isPublic());
}
}
// Write out any exported module macros.
bool EmittedModuleMacros = false;
// We write out exported module macros for PCH as well.
auto Leafs = PP.getLeafModuleMacros(Name);
SmallVector<ModuleMacro*, 8> Worklist(Leafs.begin(), Leafs.end());
llvm::DenseMap<ModuleMacro*, unsigned> Visits;
while (!Worklist.empty()) {
auto *Macro = Worklist.pop_back_val();
// Emit a record indicating this submodule exports this macro.
ModuleMacroRecord.push_back(
getSubmoduleID(Macro->getOwningModule()));
ModuleMacroRecord.push_back(getMacroRef(Macro->getMacroInfo(), Name));
for (auto *M : Macro->overrides())
ModuleMacroRecord.push_back(getSubmoduleID(M->getOwningModule()));
Stream.EmitRecord(PP_MODULE_MACRO, ModuleMacroRecord);
ModuleMacroRecord.clear();
// Enqueue overridden macros once we've visited all their ancestors.
for (auto *M : Macro->overrides())
if (++Visits[M] == M->getNumOverridingMacros())
Worklist.push_back(M);
EmittedModuleMacros = true;
}
if (Record.empty() && !EmittedModuleMacros)
continue;
IdentMacroDirectivesOffsetMap[Name] = StartOffset;
Stream.EmitRecord(PP_MACRO_DIRECTIVE_HISTORY, Record);
Record.clear();
}
/// \brief Offsets of each of the macros into the bitstream, indexed by
/// the local macro ID
///
/// For each identifier that is associated with a macro, this map
/// provides the offset into the bitstream where that macro is
/// defined.
std::vector<uint32_t> MacroOffsets;
for (unsigned I = 0, N = MacroInfosToEmit.size(); I != N; ++I) {
const IdentifierInfo *Name = MacroInfosToEmit[I].Name;
MacroInfo *MI = MacroInfosToEmit[I].MI;
MacroID ID = MacroInfosToEmit[I].ID;
if (ID < FirstMacroID) {
assert(0 && "Loaded MacroInfo entered MacroInfosToEmit ?");
continue;
}
// Record the local offset of this macro.
unsigned Index = ID - FirstMacroID;
if (Index == MacroOffsets.size())
MacroOffsets.push_back(Stream.GetCurrentBitNo());
else {
if (Index > MacroOffsets.size())
MacroOffsets.resize(Index + 1);
MacroOffsets[Index] = Stream.GetCurrentBitNo();
}
AddIdentifierRef(Name, Record);
AddSourceLocation(MI->getDefinitionLoc(), Record);
AddSourceLocation(MI->getDefinitionEndLoc(), Record);
Record.push_back(MI->isUsed());
Record.push_back(MI->isUsedForHeaderGuard());
unsigned Code;
if (MI->isObjectLike()) {
Code = PP_MACRO_OBJECT_LIKE;
} else {
Code = PP_MACRO_FUNCTION_LIKE;
Record.push_back(MI->isC99Varargs());
Record.push_back(MI->isGNUVarargs());
Record.push_back(MI->hasCommaPasting());
Record.push_back(MI->getNumParams());
for (const IdentifierInfo *Param : MI->params())
AddIdentifierRef(Param, Record);
}
// If we have a detailed preprocessing record, record the macro definition
// ID that corresponds to this macro.
if (PPRec)
Record.push_back(MacroDefinitions[PPRec->findMacroDefinition(MI)]);
Stream.EmitRecord(Code, Record);
Record.clear();
// Emit the tokens array.
for (unsigned TokNo = 0, e = MI->getNumTokens(); TokNo != e; ++TokNo) {
// Note that we know that the preprocessor does not have any annotation
// tokens in it because they are created by the parser, and thus can't
// be in a macro definition.
const Token &Tok = MI->getReplacementToken(TokNo);
AddToken(Tok, Record);
Stream.EmitRecord(PP_TOKEN, Record);
Record.clear();
}
++NumMacros;
}
Stream.ExitBlock();
// Write the offsets table for macro IDs.
using namespace llvm;
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
{
RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(),
FirstMacroID - NUM_PREDEF_MACRO_IDS};
Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
}
}
void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
if (PPRec.local_begin() == PPRec.local_end())
return;
SmallVector<PPEntityOffset, 64> PreprocessedEntityOffsets;
// Enter the preprocessor block.
Stream.EnterSubblock(PREPROCESSOR_DETAIL_BLOCK_ID, 3);
// If the preprocessor has a preprocessing record, emit it.
unsigned NumPreprocessingRecords = 0;
using namespace llvm;
// Set up the abbreviation for
unsigned InclusionAbbrev = 0;
{
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(PPD_INCLUSION_DIRECTIVE));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // filename length
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // in quotes
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // kind
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // imported module
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
InclusionAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
}
unsigned FirstPreprocessorEntityID
= (Chain ? PPRec.getNumLoadedPreprocessedEntities() : 0)
+ NUM_PREDEF_PP_ENTITY_IDS;
unsigned NextPreprocessorEntityID = FirstPreprocessorEntityID;
RecordData Record;
for (PreprocessingRecord::iterator E = PPRec.local_begin(),
EEnd = PPRec.local_end();
E != EEnd;
(void)++E, ++NumPreprocessingRecords, ++NextPreprocessorEntityID) {
Record.clear();
PreprocessedEntityOffsets.push_back(
PPEntityOffset((*E)->getSourceRange(), Stream.GetCurrentBitNo()));
if (auto *MD = dyn_cast<MacroDefinitionRecord>(*E)) {
// Record this macro definition's ID.
MacroDefinitions[MD] = NextPreprocessorEntityID;
AddIdentifierRef(MD->getName(), Record);
Stream.EmitRecord(PPD_MACRO_DEFINITION, Record);
continue;
}
if (auto *ME = dyn_cast<MacroExpansion>(*E)) {
Record.push_back(ME->isBuiltinMacro());
if (ME->isBuiltinMacro())
AddIdentifierRef(ME->getName(), Record);
else
Record.push_back(MacroDefinitions[ME->getDefinition()]);
Stream.EmitRecord(PPD_MACRO_EXPANSION, Record);
continue;
}
if (auto *ID = dyn_cast<InclusionDirective>(*E)) {
Record.push_back(PPD_INCLUSION_DIRECTIVE);
Record.push_back(ID->getFileName().size());
Record.push_back(ID->wasInQuotes());
Record.push_back(static_cast<unsigned>(ID->getKind()));
Record.push_back(ID->importedModule());
SmallString<64> Buffer;
Buffer += ID->getFileName();
// Check that the FileEntry is not null because it was not resolved and
// we create a PCH even with compiler errors.
if (ID->getFile())
Buffer += ID->getFile()->getName();
Stream.EmitRecordWithBlob(InclusionAbbrev, Record, Buffer);
continue;
}
llvm_unreachable("Unhandled PreprocessedEntity in ASTWriter");
}
Stream.ExitBlock();
// Write the offsets table for the preprocessing record.
if (NumPreprocessingRecords > 0) {
assert(PreprocessedEntityOffsets.size() == NumPreprocessingRecords);
// Write the offsets table for identifier IDs.
using namespace llvm;
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(PPD_ENTITIES_OFFSETS));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first pp entity
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned PPEOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
RecordData::value_type Record[] = {PPD_ENTITIES_OFFSETS,
FirstPreprocessorEntityID -
NUM_PREDEF_PP_ENTITY_IDS};
Stream.EmitRecordWithBlob(PPEOffsetAbbrev, Record,
bytes(PreprocessedEntityOffsets));
}
}
unsigned ASTWriter::getLocalOrImportedSubmoduleID(Module *Mod) {
if (!Mod)
return 0;
llvm::DenseMap<Module *, unsigned>::iterator Known = SubmoduleIDs.find(Mod);
if (Known != SubmoduleIDs.end())
return Known->second;
auto *Top = Mod->getTopLevelModule();
if (Top != WritingModule &&
(getLangOpts().CompilingPCH ||
!Top->fullModuleNameIs(StringRef(getLangOpts().CurrentModule))))
return 0;
return SubmoduleIDs[Mod] = NextSubmoduleID++;
}
unsigned ASTWriter::getSubmoduleID(Module *Mod) {
// FIXME: This can easily happen, if we have a reference to a submodule that
// did not result in us loading a module file for that submodule. For
// instance, a cross-top-level-module 'conflict' declaration will hit this.
unsigned ID = getLocalOrImportedSubmoduleID(Mod);
assert((ID || !Mod) &&
"asked for module ID for non-local, non-imported module");
return ID;
}
/// \brief Compute the number of modules within the given tree (including the
/// given module).
static unsigned getNumberOfModules(Module *Mod) {
unsigned ChildModules = 0;
for (auto Sub = Mod->submodule_begin(), SubEnd = Mod->submodule_end();
Sub != SubEnd; ++Sub)
ChildModules += getNumberOfModules(*Sub);
return ChildModules + 1;
}
void ASTWriter::WriteSubmodules(Module *WritingModule) {
// Enter the submodule description block.
Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5);
// Write the abbreviations needed for the submodules block.
using namespace llvm;
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_DEFINITION));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExplicit
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsSystem
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExternC
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferSubmodules...
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferExplicit...
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferExportWild...
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ConfigMacrosExh...
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned DefinitionAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA_HEADER));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned UmbrellaAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_HEADER));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned HeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_TOPHEADER));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned TopHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA_DIR));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned UmbrellaDirAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_REQUIRES));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // State
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Feature
unsigned RequiresAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_EXCLUDED_HEADER));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned ExcludedHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_TEXTUAL_HEADER));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned TextualHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_PRIVATE_HEADER));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned PrivateHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_PRIVATE_TEXTUAL_HEADER));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned PrivateTextualHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_LINK_LIBRARY));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
unsigned LinkLibraryAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CONFIG_MACRO));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Macro name
unsigned ConfigMacroAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CONFLICT));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Other module
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Message
unsigned ConflictAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
// Write the submodule metadata block.
RecordData::value_type Record[] = {
getNumberOfModules(WritingModule),
FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS,
(unsigned)WritingModule->Kind};
Stream.EmitRecord(SUBMODULE_METADATA, Record);
// Write all of the submodules.
std::queue<Module *> Q;
Q.push(WritingModule);
while (!Q.empty()) {
Module *Mod = Q.front();
Q.pop();
unsigned ID = getSubmoduleID(Mod);
uint64_t ParentID = 0;
if (Mod->Parent) {
assert(SubmoduleIDs[Mod->Parent] && "Submodule parent not written?");
ParentID = SubmoduleIDs[Mod->Parent];
}
// Emit the definition of the block.
{
RecordData::value_type Record[] = {SUBMODULE_DEFINITION,
ID,
ParentID,
Mod->IsFramework,
Mod->IsExplicit,
Mod->IsSystem,
Mod->IsExternC,
Mod->InferSubmodules,
Mod->InferExplicitSubmodules,
Mod->InferExportWildcard,
Mod->ConfigMacrosExhaustive};
Stream.EmitRecordWithBlob(DefinitionAbbrev, Record, Mod->Name);
}
// Emit the requirements.
for (const auto &R : Mod->Requirements) {
RecordData::value_type Record[] = {SUBMODULE_REQUIRES, R.second};
Stream.EmitRecordWithBlob(RequiresAbbrev, Record, R.first);
}
// Emit the umbrella header, if there is one.
if (auto UmbrellaHeader = Mod->getUmbrellaHeader()) {
RecordData::value_type Record[] = {SUBMODULE_UMBRELLA_HEADER};
Stream.EmitRecordWithBlob(UmbrellaAbbrev, Record,
UmbrellaHeader.NameAsWritten);
} else if (auto UmbrellaDir = Mod->getUmbrellaDir()) {
RecordData::value_type Record[] = {SUBMODULE_UMBRELLA_DIR};
Stream.EmitRecordWithBlob(UmbrellaDirAbbrev, Record,
UmbrellaDir.NameAsWritten);
}
// Emit the headers.
struct {
unsigned RecordKind;
unsigned Abbrev;
Module::HeaderKind HeaderKind;
} HeaderLists[] = {
{SUBMODULE_HEADER, HeaderAbbrev, Module::HK_Normal},
{SUBMODULE_TEXTUAL_HEADER, TextualHeaderAbbrev, Module::HK_Textual},
{SUBMODULE_PRIVATE_HEADER, PrivateHeaderAbbrev, Module::HK_Private},
{SUBMODULE_PRIVATE_TEXTUAL_HEADER, PrivateTextualHeaderAbbrev,
Module::HK_PrivateTextual},
{SUBMODULE_EXCLUDED_HEADER, ExcludedHeaderAbbrev, Module::HK_Excluded}
};
for (auto &HL : HeaderLists) {
RecordData::value_type Record[] = {HL.RecordKind};
for (auto &H : Mod->Headers[HL.HeaderKind])
Stream.EmitRecordWithBlob(HL.Abbrev, Record, H.NameAsWritten);
}
// Emit the top headers.
{
auto TopHeaders = Mod->getTopHeaders(PP->getFileManager());
RecordData::value_type Record[] = {SUBMODULE_TOPHEADER};
for (auto *H : TopHeaders)
Stream.EmitRecordWithBlob(TopHeaderAbbrev, Record, H->getName());
}
// Emit the imports.
if (!Mod->Imports.empty()) {
RecordData Record;
for (auto *I : Mod->Imports)
Record.push_back(getSubmoduleID(I));
Stream.EmitRecord(SUBMODULE_IMPORTS, Record);
}
// Emit the exports.
if (!Mod->Exports.empty()) {
RecordData Record;
for (const auto &E : Mod->Exports) {
// FIXME: This may fail; we don't require that all exported modules
// are local or imported.
Record.push_back(getSubmoduleID(E.getPointer()));
Record.push_back(E.getInt());
}
Stream.EmitRecord(SUBMODULE_EXPORTS, Record);
}
//FIXME: How do we emit the 'use'd modules? They may not be submodules.
// Might be unnecessary as use declarations are only used to build the
// module itself.
// Emit the link libraries.
for (const auto &LL : Mod->LinkLibraries) {
RecordData::value_type Record[] = {SUBMODULE_LINK_LIBRARY,
LL.IsFramework};
Stream.EmitRecordWithBlob(LinkLibraryAbbrev, Record, LL.Library);
}
// Emit the conflicts.
for (const auto &C : Mod->Conflicts) {
// FIXME: This may fail; we don't require that all conflicting modules
// are local or imported.
RecordData::value_type Record[] = {SUBMODULE_CONFLICT,
getSubmoduleID(C.Other)};
Stream.EmitRecordWithBlob(ConflictAbbrev, Record, C.Message);
}
// Emit the configuration macros.
for (const auto &CM : Mod->ConfigMacros) {
RecordData::value_type Record[] = {SUBMODULE_CONFIG_MACRO};
Stream.EmitRecordWithBlob(ConfigMacroAbbrev, Record, CM);
}
// Emit the initializers, if any.
RecordData Inits;
for (Decl *D : Context->getModuleInitializers(Mod))
Inits.push_back(GetDeclRef(D));
if (!Inits.empty())
Stream.EmitRecord(SUBMODULE_INITIALIZERS, Inits);
// Queue up the submodules of this module.
for (auto *M : Mod->submodules())
Q.push(M);
}
Stream.ExitBlock();
assert((NextSubmoduleID - FirstSubmoduleID ==
getNumberOfModules(WritingModule)) &&
"Wrong # of submodules; found a reference to a non-local, "
"non-imported submodule?");
}
void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
bool isModule) {
llvm::SmallDenseMap<const DiagnosticsEngine::DiagState *, unsigned, 64>
DiagStateIDMap;
unsigned CurrID = 0;
RecordData Record;
auto EncodeDiagStateFlags =
[](const DiagnosticsEngine::DiagState *DS) -> unsigned {
unsigned Result = (unsigned)DS->ExtBehavior;
for (unsigned Val :
{(unsigned)DS->IgnoreAllWarnings, (unsigned)DS->EnableAllWarnings,
(unsigned)DS->WarningsAsErrors, (unsigned)DS->ErrorsAsFatal,
(unsigned)DS->SuppressSystemWarnings})
Result = (Result << 1) | Val;
return Result;
};
unsigned Flags = EncodeDiagStateFlags(Diag.DiagStatesByLoc.FirstDiagState);
Record.push_back(Flags);
auto AddDiagState = [&](const DiagnosticsEngine::DiagState *State,
bool IncludeNonPragmaStates) {
// Ensure that the diagnostic state wasn't modified since it was created.
// We will not correctly round-trip this information otherwise.
assert(Flags == EncodeDiagStateFlags(State) &&
"diag state flags vary in single AST file");
unsigned &DiagStateID = DiagStateIDMap[State];
Record.push_back(DiagStateID);
if (DiagStateID == 0) {
DiagStateID = ++CurrID;
// Add a placeholder for the number of mappings.
auto SizeIdx = Record.size();
Record.emplace_back();
for (const auto &I : *State) {
if (I.second.isPragma() || IncludeNonPragmaStates) {
Record.push_back(I.first);
Record.push_back(I.second.serialize());
}
}
// Update the placeholder.
Record[SizeIdx] = (Record.size() - SizeIdx) / 2;
}
};
AddDiagState(Diag.DiagStatesByLoc.FirstDiagState, isModule);
// Reserve a spot for the number of locations with state transitions.
auto NumLocationsIdx = Record.size();
Record.emplace_back();
// Emit the state transitions.
unsigned NumLocations = 0;
for (auto &FileIDAndFile : Diag.DiagStatesByLoc.Files) {
if (!FileIDAndFile.first.isValid() ||
!FileIDAndFile.second.HasLocalTransitions)
continue;
++NumLocations;
AddSourceLocation(Diag.SourceMgr->getLocForStartOfFile(FileIDAndFile.first),
Record);
Record.push_back(FileIDAndFile.second.StateTransitions.size());
for (auto &StatePoint : FileIDAndFile.second.StateTransitions) {
Record.push_back(StatePoint.Offset);
AddDiagState(StatePoint.State, false);
}
}
// Backpatch the number of locations.
Record[NumLocationsIdx] = NumLocations;
// Emit CurDiagStateLoc. Do it last in order to match source order.
//
// This also protects against a hypothetical corner case with simulating
// -Werror settings for implicit modules in the ASTReader, where reading
// CurDiagState out of context could change whether warning pragmas are
// treated as errors.
AddSourceLocation(Diag.DiagStatesByLoc.CurDiagStateLoc, Record);
AddDiagState(Diag.DiagStatesByLoc.CurDiagState, false);
Stream.EmitRecord(DIAG_PRAGMA_MAPPINGS, Record);
}
//===----------------------------------------------------------------------===//
// Type Serialization
//===----------------------------------------------------------------------===//
/// \brief Write the representation of a type to the AST stream.
void ASTWriter::WriteType(QualType T) {
TypeIdx &IdxRef = TypeIdxs[T];
if (IdxRef.getIndex() == 0) // we haven't seen this type before.
IdxRef = TypeIdx(NextTypeID++);
TypeIdx Idx = IdxRef;
assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST");
RecordData Record;
// Emit the type's representation.
ASTTypeWriter W(*this, Record);
W.Visit(T);
uint64_t Offset = W.Emit();
// Record the offset for this type.
unsigned Index = Idx.getIndex() - FirstTypeID;
if (TypeOffsets.size() == Index)
TypeOffsets.push_back(Offset);
else if (TypeOffsets.size() < Index) {
TypeOffsets.resize(Index + 1);
TypeOffsets[Index] = Offset;
} else {
llvm_unreachable("Types emitted in wrong order");
}
}
//===----------------------------------------------------------------------===//
// Declaration Serialization
//===----------------------------------------------------------------------===//
/// \brief Write the block containing all of the declaration IDs
/// lexically declared within the given DeclContext.
///
/// \returns the offset of the DECL_CONTEXT_LEXICAL block within the
/// bistream, or 0 if no block was written.
uint64_t ASTWriter::WriteDeclContextLexicalBlock(ASTContext &Context,
DeclContext *DC) {
if (DC->decls_empty())
return 0;
uint64_t Offset = Stream.GetCurrentBitNo();
SmallVector<uint32_t, 128> KindDeclPairs;
for (const auto *D : DC->decls()) {
KindDeclPairs.push_back(D->getKind());
KindDeclPairs.push_back(GetDeclRef(D));
}
++NumLexicalDeclContexts;
RecordData::value_type Record[] = {DECL_CONTEXT_LEXICAL};
Stream.EmitRecordWithBlob(DeclContextLexicalAbbrev, Record,
bytes(KindDeclPairs));
return Offset;
}
void ASTWriter::WriteTypeDeclOffsets() {
using namespace llvm;
// Write the type offsets array
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(TYPE_OFFSET));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of types
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base type index
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // types block
unsigned TypeOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
{
RecordData::value_type Record[] = {TYPE_OFFSET, TypeOffsets.size(),
FirstTypeID - NUM_PREDEF_TYPE_IDS};
Stream.EmitRecordWithBlob(TypeOffsetAbbrev, Record, bytes(TypeOffsets));
}
// Write the declaration offsets array
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(DECL_OFFSET));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of declarations
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base decl ID
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // declarations block
unsigned DeclOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
{
RecordData::value_type Record[] = {DECL_OFFSET, DeclOffsets.size(),
FirstDeclID - NUM_PREDEF_DECL_IDS};
Stream.EmitRecordWithBlob(DeclOffsetAbbrev, Record, bytes(DeclOffsets));
}
}
void ASTWriter::WriteFileDeclIDsMap() {
using namespace llvm;
SmallVector<std::pair<FileID, DeclIDInFileInfo *>, 64> SortedFileDeclIDs(
FileDeclIDs.begin(), FileDeclIDs.end());
std::sort(SortedFileDeclIDs.begin(), SortedFileDeclIDs.end(),
llvm::less_first());
// Join the vectors of DeclIDs from all files.
SmallVector<DeclID, 256> FileGroupedDeclIDs;
for (auto &FileDeclEntry : SortedFileDeclIDs) {
DeclIDInFileInfo &Info = *FileDeclEntry.second;
Info.FirstDeclIndex = FileGroupedDeclIDs.size();
for (auto &LocDeclEntry : Info.DeclIDs)
FileGroupedDeclIDs.push_back(LocDeclEntry.second);
}
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(FILE_SORTED_DECLS));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
RecordData::value_type Record[] = {FILE_SORTED_DECLS,
FileGroupedDeclIDs.size()};
Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs));
}
void ASTWriter::WriteComments() {
Stream.EnterSubblock(COMMENTS_BLOCK_ID, 3);
ArrayRef<RawComment *> RawComments = Context->Comments.getComments();
RecordData Record;
for (const auto *I : RawComments) {
Record.clear();
AddSourceRange(I->getSourceRange(), Record);
Record.push_back(I->getKind());
Record.push_back(I->isTrailingComment());
Record.push_back(I->isAlmostTrailingComment());
Stream.EmitRecord(COMMENTS_RAW_COMMENT, Record);
}
Stream.ExitBlock();
}
//===----------------------------------------------------------------------===//
// Global Method Pool and Selector Serialization
//===----------------------------------------------------------------------===//
namespace {
// Trait used for the on-disk hash table used in the method pool.
class ASTMethodPoolTrait {
ASTWriter &Writer;
public:
typedef Selector key_type;
typedef key_type key_type_ref;
struct data_type {
SelectorID ID;
ObjCMethodList Instance, Factory;
};
typedef const data_type& data_type_ref;
typedef unsigned hash_value_type;
typedef unsigned offset_type;
explicit ASTMethodPoolTrait(ASTWriter &Writer) : Writer(Writer) { }
static hash_value_type ComputeHash(Selector Sel) {
return serialization::ComputeHash(Sel);
}
std::pair<unsigned,unsigned>
EmitKeyDataLength(raw_ostream& Out, Selector Sel,
data_type_ref Methods) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
unsigned KeyLen = 2 + (Sel.getNumArgs()? Sel.getNumArgs() * 4 : 4);
LE.write<uint16_t>(KeyLen);
unsigned DataLen = 4 + 2 + 2; // 2 bytes for each of the method counts
for (const ObjCMethodList *Method = &Methods.Instance; Method;
Method = Method->getNext())
if (Method->getMethod())
DataLen += 4;
for (const ObjCMethodList *Method = &Methods.Factory; Method;
Method = Method->getNext())
if (Method->getMethod())
DataLen += 4;
LE.write<uint16_t>(DataLen);
return std::make_pair(KeyLen, DataLen);
}
void EmitKey(raw_ostream& Out, Selector Sel, unsigned) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
uint64_t Start = Out.tell();
assert((Start >> 32) == 0 && "Selector key offset too large");
Writer.SetSelectorOffset(Sel, Start);
unsigned N = Sel.getNumArgs();
LE.write<uint16_t>(N);
if (N == 0)
N = 1;
for (unsigned I = 0; I != N; ++I)
LE.write<uint32_t>(
Writer.getIdentifierRef(Sel.getIdentifierInfoForSlot(I)));
}
void EmitData(raw_ostream& Out, key_type_ref,
data_type_ref Methods, unsigned DataLen) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
uint64_t Start = Out.tell(); (void)Start;
LE.write<uint32_t>(Methods.ID);
unsigned NumInstanceMethods = 0;
for (const ObjCMethodList *Method = &Methods.Instance; Method;
Method = Method->getNext())
if (Method->getMethod())
++NumInstanceMethods;
unsigned NumFactoryMethods = 0;
for (const ObjCMethodList *Method = &Methods.Factory; Method;
Method = Method->getNext())
if (Method->getMethod())
++NumFactoryMethods;
unsigned InstanceBits = Methods.Instance.getBits();
assert(InstanceBits < 4);
unsigned InstanceHasMoreThanOneDeclBit =
Methods.Instance.hasMoreThanOneDecl();
unsigned FullInstanceBits = (NumInstanceMethods << 3) |
(InstanceHasMoreThanOneDeclBit << 2) |
InstanceBits;
unsigned FactoryBits = Methods.Factory.getBits();
assert(FactoryBits < 4);
unsigned FactoryHasMoreThanOneDeclBit =
Methods.Factory.hasMoreThanOneDecl();
unsigned FullFactoryBits = (NumFactoryMethods << 3) |
(FactoryHasMoreThanOneDeclBit << 2) |
FactoryBits;
LE.write<uint16_t>(FullInstanceBits);
LE.write<uint16_t>(FullFactoryBits);
for (const ObjCMethodList *Method = &Methods.Instance; Method;
Method = Method->getNext())
if (Method->getMethod())
LE.write<uint32_t>(Writer.getDeclID(Method->getMethod()));
for (const ObjCMethodList *Method = &Methods.Factory; Method;
Method = Method->getNext())
if (Method->getMethod())
LE.write<uint32_t>(Writer.getDeclID(Method->getMethod()));
assert(Out.tell() - Start == DataLen && "Data length is wrong");
}
};
} // end anonymous namespace
/// \brief Write ObjC data: selectors and the method pool.
///
/// The method pool contains both instance and factory methods, stored
/// in an on-disk hash table indexed by the selector. The hash table also
/// contains an empty entry for every other selector known to Sema.
void ASTWriter::WriteSelectors(Sema &SemaRef) {
using namespace llvm;
// Do we have to do anything at all?
if (SemaRef.MethodPool.empty() && SelectorIDs.empty())
return;
unsigned NumTableEntries = 0;
// Create and write out the blob that contains selectors and the method pool.
{
llvm::OnDiskChainedHashTableGenerator<ASTMethodPoolTrait> Generator;
ASTMethodPoolTrait Trait(*this);
// Create the on-disk hash table representation. We walk through every
// selector we've seen and look it up in the method pool.
SelectorOffsets.resize(NextSelectorID - FirstSelectorID);
for (auto &SelectorAndID : SelectorIDs) {
Selector S = SelectorAndID.first;
SelectorID ID = SelectorAndID.second;
Sema::GlobalMethodPool::iterator F = SemaRef.MethodPool.find(S);
ASTMethodPoolTrait::data_type Data = {
ID,
ObjCMethodList(),
ObjCMethodList()
};
if (F != SemaRef.MethodPool.end()) {
Data.Instance = F->second.first;
Data.Factory = F->second.second;
}
// Only write this selector if it's not in an existing AST or something
// changed.
if (Chain && ID < FirstSelectorID) {
// Selector already exists. Did it change?
bool changed = false;
for (ObjCMethodList *M = &Data.Instance;
!changed && M && M->getMethod(); M = M->getNext()) {
if (!M->getMethod()->isFromASTFile())
changed = true;
}
for (ObjCMethodList *M = &Data.Factory; !changed && M && M->getMethod();
M = M->getNext()) {
if (!M->getMethod()->isFromASTFile())
changed = true;
}
if (!changed)
continue;
} else if (Data.Instance.getMethod() || Data.Factory.getMethod()) {
// A new method pool entry.
++NumTableEntries;
}
Generator.insert(S, Data, Trait);
}
// Create the on-disk hash table in a buffer.
SmallString<4096> MethodPool;
uint32_t BucketOffset;
{
using namespace llvm::support;
ASTMethodPoolTrait Trait(*this);
llvm::raw_svector_ostream Out(MethodPool);
// Make sure that no bucket is at offset 0
endian::Writer<little>(Out).write<uint32_t>(0);
BucketOffset = Generator.Emit(Out, Trait);
}
// Create a blob abbreviation
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(METHOD_POOL));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned MethodPoolAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
// Write the method pool
{
RecordData::value_type Record[] = {METHOD_POOL, BucketOffset,
NumTableEntries};
Stream.EmitRecordWithBlob(MethodPoolAbbrev, Record, MethodPool);
}
// Create a blob abbreviation for the selector table offsets.
Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SELECTOR_OFFSETS));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // size
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned SelectorOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
// Write the selector offsets table.
{
RecordData::value_type Record[] = {
SELECTOR_OFFSETS, SelectorOffsets.size(),
FirstSelectorID - NUM_PREDEF_SELECTOR_IDS};
Stream.EmitRecordWithBlob(SelectorOffsetAbbrev, Record,
bytes(SelectorOffsets));
}
}
}
/// \brief Write the selectors referenced in @selector expression into AST file.
void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) {
using namespace llvm;
if (SemaRef.ReferencedSelectors.empty())
return;
RecordData Record;
ASTRecordWriter Writer(*this, Record);
// Note: this writes out all references even for a dependent AST. But it is
// very tricky to fix, and given that @selector shouldn't really appear in
// headers, probably not worth it. It's not a correctness issue.
for (auto &SelectorAndLocation : SemaRef.ReferencedSelectors) {
Selector Sel = SelectorAndLocation.first;
SourceLocation Loc = SelectorAndLocation.second;
Writer.AddSelectorRef(Sel);
Writer.AddSourceLocation(Loc);
}
Writer.Emit(REFERENCED_SELECTOR_POOL);
}
//===----------------------------------------------------------------------===//
// Identifier Table Serialization
//===----------------------------------------------------------------------===//
/// Determine the declaration that should be put into the name lookup table to
/// represent the given declaration in this module. This is usually D itself,
/// but if D was imported and merged into a local declaration, we want the most
/// recent local declaration instead. The chosen declaration will be the most
/// recent declaration in any module that imports this one.
static NamedDecl *getDeclForLocalLookup(const LangOptions &LangOpts,
NamedDecl *D) {
if (!LangOpts.Modules || !D->isFromASTFile())
return D;
if (Decl *Redecl = D->getPreviousDecl()) {
// For Redeclarable decls, a prior declaration might be local.
for (; Redecl; Redecl = Redecl->getPreviousDecl()) {
// If we find a local decl, we're done.
if (!Redecl->isFromASTFile()) {
// Exception: in very rare cases (for injected-class-names), not all
// redeclarations are in the same semantic context. Skip ones in a
// different context. They don't go in this lookup table at all.
if (!Redecl->getDeclContext()->getRedeclContext()->Equals(
D->getDeclContext()->getRedeclContext()))
continue;
return cast<NamedDecl>(Redecl);
}
// If we find a decl from a (chained-)PCH stop since we won't find a
// local one.
if (Redecl->getOwningModuleID() == 0)
break;
}
} else if (Decl *First = D->getCanonicalDecl()) {
// For Mergeable decls, the first decl might be local.
if (!First->isFromASTFile())
return cast<NamedDecl>(First);
}
// All declarations are imported. Our most recent declaration will also be
// the most recent one in anyone who imports us.
return D;
}
namespace {
class ASTIdentifierTableTrait {
ASTWriter &Writer;
Preprocessor &PP;
IdentifierResolver &IdResolver;
bool IsModule;
bool NeedDecls;
ASTWriter::RecordData *InterestingIdentifierOffsets;
/// \brief Determines whether this is an "interesting" identifier that needs a
/// full IdentifierInfo structure written into the hash table. Notably, this
/// doesn't check whether the name has macros defined; use PublicMacroIterator
/// to check that.
bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
if (MacroOffset ||
II->isPoisoned() ||
(IsModule ? II->hasRevertedBuiltin() : II->getObjCOrBuiltinID()) ||
II->hasRevertedTokenIDToIdentifier() ||
(NeedDecls && II->getFETokenInfo<void>()))
return true;
return false;
}
public:
typedef IdentifierInfo* key_type;
typedef key_type key_type_ref;
typedef IdentID data_type;
typedef data_type data_type_ref;
typedef unsigned hash_value_type;
typedef unsigned offset_type;
ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
IdentifierResolver &IdResolver, bool IsModule,
ASTWriter::RecordData *InterestingIdentifierOffsets)
: Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule),
NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus),
InterestingIdentifierOffsets(InterestingIdentifierOffsets) {}
bool needDecls() const { return NeedDecls; }
static hash_value_type ComputeHash(const IdentifierInfo* II) {
return llvm::HashString(II->getName());
}
bool isInterestingIdentifier(const IdentifierInfo *II) {
auto MacroOffset = Writer.getMacroDirectivesOffset(II);
return isInterestingIdentifier(II, MacroOffset);
}
bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
return isInterestingIdentifier(II, 0);
}
std::pair<unsigned,unsigned>
EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) {
unsigned KeyLen = II->getLength() + 1;
unsigned DataLen = 4; // 4 bytes for the persistent ID << 1
auto MacroOffset = Writer.getMacroDirectivesOffset(II);
if (isInterestingIdentifier(II, MacroOffset)) {
DataLen += 2; // 2 bytes for builtin ID
DataLen += 2; // 2 bytes for flags
if (MacroOffset)
DataLen += 4; // MacroDirectives offset.
if (NeedDecls) {
for (IdentifierResolver::iterator D = IdResolver.begin(II),
DEnd = IdResolver.end();
D != DEnd; ++D)
DataLen += 4;
}
}
using namespace llvm::support;
endian::Writer<little> LE(Out);
assert((uint16_t)DataLen == DataLen && (uint16_t)KeyLen == KeyLen);
LE.write<uint16_t>(DataLen);
// We emit the key length after the data length so that every
// string is preceded by a 16-bit length. This matches the PTH
// format for storing identifiers.
LE.write<uint16_t>(KeyLen);
return std::make_pair(KeyLen, DataLen);
}
void EmitKey(raw_ostream& Out, const IdentifierInfo* II,
unsigned KeyLen) {
// Record the location of the key data. This is used when generating
// the mapping from persistent IDs to strings.
Writer.SetIdentifierOffset(II, Out.tell());
// Emit the offset of the key/data length information to the interesting
// identifiers table if necessary.
if (InterestingIdentifierOffsets && isInterestingIdentifier(II))
InterestingIdentifierOffsets->push_back(Out.tell() - 4);
Out.write(II->getNameStart(), KeyLen);
}
void EmitData(raw_ostream& Out, IdentifierInfo* II,
IdentID ID, unsigned) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
auto MacroOffset = Writer.getMacroDirectivesOffset(II);
if (!isInterestingIdentifier(II, MacroOffset)) {
LE.write<uint32_t>(ID << 1);
return;
}
LE.write<uint32_t>((ID << 1) | 0x01);
uint32_t Bits = (uint32_t)II->getObjCOrBuiltinID();
assert((Bits & 0xffff) == Bits && "ObjCOrBuiltinID too big for ASTReader.");
LE.write<uint16_t>(Bits);
Bits = 0;
bool HadMacroDefinition = MacroOffset != 0;
Bits = (Bits << 1) | unsigned(HadMacroDefinition);
Bits = (Bits << 1) | unsigned(II->isExtensionToken());
Bits = (Bits << 1) | unsigned(II->isPoisoned());
Bits = (Bits << 1) | unsigned(II->hasRevertedBuiltin());
Bits = (Bits << 1) | unsigned(II->hasRevertedTokenIDToIdentifier());
Bits = (Bits << 1) | unsigned(II->isCPlusPlusOperatorKeyword());
LE.write<uint16_t>(Bits);
if (HadMacroDefinition)
LE.write<uint32_t>(MacroOffset);
if (NeedDecls) {
// Emit the declaration IDs in reverse order, because the
// IdentifierResolver provides the declarations as they would be
// visible (e.g., the function "stat" would come before the struct
// "stat"), but the ASTReader adds declarations to the end of the list
// (so we need to see the struct "stat" before the function "stat").
// Only emit declarations that aren't from a chained PCH, though.
SmallVector<NamedDecl *, 16> Decls(IdResolver.begin(II),
IdResolver.end());
for (SmallVectorImpl<NamedDecl *>::reverse_iterator D = Decls.rbegin(),
DEnd = Decls.rend();
D != DEnd; ++D)
LE.write<uint32_t>(
Writer.getDeclID(getDeclForLocalLookup(PP.getLangOpts(), *D)));
}
}
};
} // end anonymous namespace
/// \brief Write the identifier table into the AST file.
///
/// The identifier table consists of a blob containing string data
/// (the actual identifiers themselves) and a separate "offsets" index
/// that maps identifier IDs to locations within the blob.
void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
IdentifierResolver &IdResolver,
bool IsModule) {
using namespace llvm;
RecordData InterestingIdents;
// Create and write out the blob that contains the identifier
// strings.
{
llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
ASTIdentifierTableTrait Trait(
*this, PP, IdResolver, IsModule,
(getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr);
// Look for any identifiers that were named while processing the
// headers, but are otherwise not needed. We add these to the hash
// table to enable checking of the predefines buffer in the case
// where the user adds new macro definitions when building the AST
// file.
SmallVector<const IdentifierInfo *, 128> IIs;
for (const auto &ID : PP.getIdentifierTable())
IIs.push_back(ID.second);
// Sort the identifiers lexicographically before getting them references so
// that their order is stable.
std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
for (const IdentifierInfo *II : IIs)
if (Trait.isInterestingNonMacroIdentifier(II))
getIdentifierRef(II);
// Create the on-disk hash table representation. We only store offsets
// for identifiers that appear here for the first time.
IdentifierOffsets.resize(NextIdentID - FirstIdentID);
for (auto IdentIDPair : IdentifierIDs) {
auto *II = const_cast<IdentifierInfo *>(IdentIDPair.first);
IdentID ID = IdentIDPair.second;
assert(II && "NULL identifier in identifier table");
// Write out identifiers if either the ID is local or the identifier has
// changed since it was loaded.
if (ID >= FirstIdentID || !Chain || !II->isFromAST()
|| II->hasChangedSinceDeserialization() ||
(Trait.needDecls() &&
II->hasFETokenInfoChangedSinceDeserialization()))
Generator.insert(II, ID, Trait);
}
// Create the on-disk hash table in a buffer.
SmallString<4096> IdentifierTable;
uint32_t BucketOffset;
{
using namespace llvm::support;
llvm::raw_svector_ostream Out(IdentifierTable);
// Make sure that no bucket is at offset 0
endian::Writer<little>(Out).write<uint32_t>(0);
BucketOffset = Generator.Emit(Out, Trait);
}
// Create a blob abbreviation
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_TABLE));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
// Write the identifier table
RecordData::value_type Record[] = {IDENTIFIER_TABLE, BucketOffset};
Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
}
// Write the offsets table for identifier IDs.
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_OFFSET));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
#ifndef NDEBUG
for (unsigned I = 0, N = IdentifierOffsets.size(); I != N; ++I)
assert(IdentifierOffsets[I] && "Missing identifier offset?");
#endif
RecordData::value_type Record[] = {IDENTIFIER_OFFSET,
IdentifierOffsets.size(),
FirstIdentID - NUM_PREDEF_IDENT_IDS};
Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record,
bytes(IdentifierOffsets));
// In C++, write the list of interesting identifiers (those that are
// defined as macros, poisoned, or similar unusual things).
if (!InterestingIdents.empty())
Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents);
}
//===----------------------------------------------------------------------===//
// DeclContext's Name Lookup Table Serialization
//===----------------------------------------------------------------------===//
namespace {
// Trait used for the on-disk hash table used in the method pool.
class ASTDeclContextNameLookupTrait {
ASTWriter &Writer;
llvm::SmallVector<DeclID, 64> DeclIDs;
public:
typedef DeclarationNameKey key_type;
typedef key_type key_type_ref;
/// A start and end index into DeclIDs, representing a sequence of decls.
typedef std::pair<unsigned, unsigned> data_type;
typedef const data_type& data_type_ref;
typedef unsigned hash_value_type;
typedef unsigned offset_type;
explicit ASTDeclContextNameLookupTrait(ASTWriter &Writer) : Writer(Writer) { }
template<typename Coll>
data_type getData(const Coll &Decls) {
unsigned Start = DeclIDs.size();
for (NamedDecl *D : Decls) {
DeclIDs.push_back(
Writer.GetDeclRef(getDeclForLocalLookup(Writer.getLangOpts(), D)));
}
return std::make_pair(Start, DeclIDs.size());
}
data_type ImportData(const reader::ASTDeclContextNameLookupTrait::data_type &FromReader) {
unsigned Start = DeclIDs.size();
for (auto ID : FromReader)
DeclIDs.push_back(ID);
return std::make_pair(Start, DeclIDs.size());
}
static bool EqualKey(key_type_ref a, key_type_ref b) {
return a == b;
}
hash_value_type ComputeHash(DeclarationNameKey Name) {
return Name.getHash();
}
void EmitFileRef(raw_ostream &Out, ModuleFile *F) const {
assert(Writer.hasChain() &&
"have reference to loaded module file but no chain?");
using namespace llvm::support;
endian::Writer<little>(Out)
.write<uint32_t>(Writer.getChain()->getModuleFileID(F));
}
std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream &Out,
DeclarationNameKey Name,
data_type_ref Lookup) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
unsigned KeyLen = 1;
switch (Name.getKind()) {
case DeclarationName::Identifier:
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
case DeclarationName::CXXLiteralOperatorName:
case DeclarationName::CXXDeductionGuideName:
KeyLen += 4;
break;
case DeclarationName::CXXOperatorName:
KeyLen += 1;
break;
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
case DeclarationName::CXXConversionFunctionName:
case DeclarationName::CXXUsingDirective:
break;
}
LE.write<uint16_t>(KeyLen);
// 4 bytes for each DeclID.
unsigned DataLen = 4 * (Lookup.second - Lookup.first);
assert(uint16_t(DataLen) == DataLen &&
"too many decls for serialized lookup result");
LE.write<uint16_t>(DataLen);
return std::make_pair(KeyLen, DataLen);
}
void EmitKey(raw_ostream &Out, DeclarationNameKey Name, unsigned) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
LE.write<uint8_t>(Name.getKind());
switch (Name.getKind()) {
case DeclarationName::Identifier:
case DeclarationName::CXXLiteralOperatorName:
case DeclarationName::CXXDeductionGuideName:
LE.write<uint32_t>(Writer.getIdentifierRef(Name.getIdentifier()));
return;
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
LE.write<uint32_t>(Writer.getSelectorRef(Name.getSelector()));
return;
case DeclarationName::CXXOperatorName:
assert(Name.getOperatorKind() < NUM_OVERLOADED_OPERATORS &&
"Invalid operator?");
LE.write<uint8_t>(Name.getOperatorKind());
return;
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
case DeclarationName::CXXConversionFunctionName:
case DeclarationName::CXXUsingDirective:
return;
}
llvm_unreachable("Invalid name kind?");
}
void EmitData(raw_ostream &Out, key_type_ref, data_type Lookup,
unsigned DataLen) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
uint64_t Start = Out.tell(); (void)Start;
for (unsigned I = Lookup.first, N = Lookup.second; I != N; ++I)
LE.write<uint32_t>(DeclIDs[I]);
assert(Out.tell() - Start == DataLen && "Data length is wrong");
}
};
} // end anonymous namespace
bool ASTWriter::isLookupResultExternal(StoredDeclsList &Result,
DeclContext *DC) {
return Result.hasExternalDecls() && DC->NeedToReconcileExternalVisibleStorage;
}
bool ASTWriter::isLookupResultEntirelyExternal(StoredDeclsList &Result,
DeclContext *DC) {
for (auto *D : Result.getLookupResult())
if (!getDeclForLocalLookup(getLangOpts(), D)->isFromASTFile())
return false;
return true;
}
void
ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
llvm::SmallVectorImpl<char> &LookupTable) {
assert(!ConstDC->HasLazyLocalLexicalLookups &&
!ConstDC->HasLazyExternalLexicalLookups &&
"must call buildLookups first");
// FIXME: We need to build the lookups table, which is logically const.
auto *DC = const_cast<DeclContext*>(ConstDC);
assert(DC == DC->getPrimaryContext() && "only primary DC has lookup table");
// Create the on-disk hash table representation.
MultiOnDiskHashTableGenerator<reader::ASTDeclContextNameLookupTrait,
ASTDeclContextNameLookupTrait> Generator;
ASTDeclContextNameLookupTrait Trait(*this);
// The first step is to collect the declaration names which we need to
// serialize into the name lookup table, and to collect them in a stable
// order.
SmallVector<DeclarationName, 16> Names;
// We also build up small sets of the constructor and conversion function
// names which are visible.
llvm::SmallSet<DeclarationName, 8> ConstructorNameSet, ConversionNameSet;
for (auto &Lookup : *DC->buildLookup()) {
auto &Name = Lookup.first;
auto &Result = Lookup.second;
// If there are no local declarations in our lookup result, we
// don't need to write an entry for the name at all. If we can't
// write out a lookup set without performing more deserialization,
// just skip this entry.
if (isLookupResultExternal(Result, DC) &&
isLookupResultEntirelyExternal(Result, DC))
continue;
// We also skip empty results. If any of the results could be external and
// the currently available results are empty, then all of the results are
// external and we skip it above. So the only way we get here with an empty
// results is when no results could have been external *and* we have
// external results.
//
// FIXME: While we might want to start emitting on-disk entries for negative
// lookups into a decl context as an optimization, today we *have* to skip
// them because there are names with empty lookup results in decl contexts
// which we can't emit in any stable ordering: we lookup constructors and
// conversion functions in the enclosing namespace scope creating empty
// results for them. This in almost certainly a bug in Clang's name lookup,
// but that is likely to be hard or impossible to fix and so we tolerate it
// here by omitting lookups with empty results.
if (Lookup.second.getLookupResult().empty())
continue;
switch (Lookup.first.getNameKind()) {
default:
Names.push_back(Lookup.first);
break;
case DeclarationName::CXXConstructorName:
assert(isa<CXXRecordDecl>(DC) &&
"Cannot have a constructor name outside of a class!");
ConstructorNameSet.insert(Name);
break;
case DeclarationName::CXXConversionFunctionName:
assert(isa<CXXRecordDecl>(DC) &&
"Cannot have a conversion function name outside of a class!");
ConversionNameSet.insert(Name);
break;
}
}
// Sort the names into a stable order.
std::sort(Names.begin(), Names.end());
if (auto *D = dyn_cast<CXXRecordDecl>(DC)) {
// We need to establish an ordering of constructor and conversion function
// names, and they don't have an intrinsic ordering.
// First we try the easy case by forming the current context's constructor
// name and adding that name first. This is a very useful optimization to
// avoid walking the lexical declarations in many cases, and it also
// handles the only case where a constructor name can come from some other
// lexical context -- when that name is an implicit constructor merged from
// another declaration in the redecl chain. Any non-implicit constructor or
// conversion function which doesn't occur in all the lexical contexts
// would be an ODR violation.
auto ImplicitCtorName = Context->DeclarationNames.getCXXConstructorName(
Context->getCanonicalType(Context->getRecordType(D)));
if (ConstructorNameSet.erase(ImplicitCtorName))
Names.push_back(ImplicitCtorName);
// If we still have constructors or conversion functions, we walk all the
// names in the decl and add the constructors and conversion functions
// which are visible in the order they lexically occur within the context.
if (!ConstructorNameSet.empty() || !ConversionNameSet.empty())
for (Decl *ChildD : cast<CXXRecordDecl>(DC)->decls())
if (auto *ChildND = dyn_cast<NamedDecl>(ChildD)) {
auto Name = ChildND->getDeclName();
switch (Name.getNameKind()) {
default:
continue;
case DeclarationName::CXXConstructorName:
if (ConstructorNameSet.erase(Name))
Names.push_back(Name);
break;
case DeclarationName::CXXConversionFunctionName:
if (ConversionNameSet.erase(Name))
Names.push_back(Name);
break;
}
if (ConstructorNameSet.empty() && ConversionNameSet.empty())
break;
}
assert(ConstructorNameSet.empty() && "Failed to find all of the visible "
"constructors by walking all the "
"lexical members of the context.");
assert(ConversionNameSet.empty() && "Failed to find all of the visible "
"conversion functions by walking all "
"the lexical members of the context.");
}
// Next we need to do a lookup with each name into this decl context to fully
// populate any results from external sources. We don't actually use the
// results of these lookups because we only want to use the results after all
// results have been loaded and the pointers into them will be stable.
for (auto &Name : Names)
DC->lookup(Name);
// Now we need to insert the results for each name into the hash table. For
// constructor names and conversion function names, we actually need to merge
// all of the results for them into one list of results each and insert
// those.
SmallVector<NamedDecl *, 8> ConstructorDecls;
SmallVector<NamedDecl *, 8> ConversionDecls;
// Now loop over the names, either inserting them or appending for the two
// special cases.
for (auto &Name : Names) {
DeclContext::lookup_result Result = DC->noload_lookup(Name);
switch (Name.getNameKind()) {
default:
Generator.insert(Name, Trait.getData(Result), Trait);
break;
case DeclarationName::CXXConstructorName:
ConstructorDecls.append(Result.begin(), Result.end());
break;
case DeclarationName::CXXConversionFunctionName:
ConversionDecls.append(Result.begin(), Result.end());
break;
}
}
// Handle our two special cases if we ended up having any. We arbitrarily use
// the first declaration's name here because the name itself isn't part of
// the key, only the kind of name is used.
if (!ConstructorDecls.empty())
Generator.insert(ConstructorDecls.front()->getDeclName(),
Trait.getData(ConstructorDecls), Trait);
if (!ConversionDecls.empty())
Generator.insert(ConversionDecls.front()->getDeclName(),
Trait.getData(ConversionDecls), Trait);
// Create the on-disk hash table. Also emit the existing imported and
// merged table if there is one.
auto *Lookups = Chain ? Chain->getLoadedLookupTables(DC) : nullptr;
Generator.emit(LookupTable, Trait, Lookups ? &Lookups->Table : nullptr);
}
/// \brief Write the block containing all of the declaration IDs
/// visible from the given DeclContext.
///
/// \returns the offset of the DECL_CONTEXT_VISIBLE block within the
/// bitstream, or 0 if no block was written.
uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context,
DeclContext *DC) {
// If we imported a key declaration of this namespace, write the visible
// lookup results as an update record for it rather than including them
// on this declaration. We will only look at key declarations on reload.
if (isa<NamespaceDecl>(DC) && Chain &&
Chain->getKeyDeclaration(cast<Decl>(DC))->isFromASTFile()) {
// Only do this once, for the first local declaration of the namespace.
for (auto *Prev = cast<NamespaceDecl>(DC)->getPreviousDecl(); Prev;
Prev = Prev->getPreviousDecl())
if (!Prev->isFromASTFile())
return 0;
// Note that we need to emit an update record for the primary context.
UpdatedDeclContexts.insert(DC->getPrimaryContext());
// Make sure all visible decls are written. They will be recorded later. We
// do this using a side data structure so we can sort the names into
// a deterministic order.
StoredDeclsMap *Map = DC->getPrimaryContext()->buildLookup();
SmallVector<std::pair<DeclarationName, DeclContext::lookup_result>, 16>
LookupResults;
if (Map) {
LookupResults.reserve(Map->size());
for (auto &Entry : *Map)
LookupResults.push_back(
std::make_pair(Entry.first, Entry.second.getLookupResult()));
}
std::sort(LookupResults.begin(), LookupResults.end(), llvm::less_first());
for (auto &NameAndResult : LookupResults) {
DeclarationName Name = NameAndResult.first;
DeclContext::lookup_result Result = NameAndResult.second;
if (Name.getNameKind() == DeclarationName::CXXConstructorName ||
Name.getNameKind() == DeclarationName::CXXConversionFunctionName) {
// We have to work around a name lookup bug here where negative lookup
// results for these names get cached in namespace lookup tables (these
// names should never be looked up in a namespace).
assert(Result.empty() && "Cannot have a constructor or conversion "
"function name in a namespace!");
continue;
}
for (NamedDecl *ND : Result)
if (!ND->isFromASTFile())
GetDeclRef(ND);
}
return 0;
}
if (DC->getPrimaryContext() != DC)
return 0;
// Skip contexts which don't support name lookup.
if (!DC->isLookupContext())
return 0;
// If not in C++, we perform name lookup for the translation unit via the
// IdentifierInfo chains, don't bother to build a visible-declarations table.
if (DC->isTranslationUnit() && !Context.getLangOpts().CPlusPlus)
return 0;
// Serialize the contents of the mapping used for lookup. Note that,
// although we have two very different code paths, the serialized
// representation is the same for both cases: a declaration name,
// followed by a size, followed by references to the visible
// declarations that have that name.
uint64_t Offset = Stream.GetCurrentBitNo();
StoredDeclsMap *Map = DC->buildLookup();
if (!Map || Map->empty())
return 0;
// Create the on-disk hash table in a buffer.
SmallString<4096> LookupTable;
GenerateNameLookupTable(DC, LookupTable);
// Write the lookup table
RecordData::value_type Record[] = {DECL_CONTEXT_VISIBLE};
Stream.EmitRecordWithBlob(DeclContextVisibleLookupAbbrev, Record,
LookupTable);
++NumVisibleDeclContexts;
return Offset;
}
/// \brief Write an UPDATE_VISIBLE block for the given context.
///
/// UPDATE_VISIBLE blocks contain the declarations that are added to an existing
/// DeclContext in a dependent AST file. As such, they only exist for the TU
/// (in C++), for namespaces, and for classes with forward-declared unscoped
/// enumeration members (in C++11).
void ASTWriter::WriteDeclContextVisibleUpdate(const DeclContext *DC) {
StoredDeclsMap *Map = DC->getLookupPtr();
if (!Map || Map->empty())
return;
// Create the on-disk hash table in a buffer.
SmallString<4096> LookupTable;
GenerateNameLookupTable(DC, LookupTable);
// If we're updating a namespace, select a key declaration as the key for the
// update record; those are the only ones that will be checked on reload.
if (isa<NamespaceDecl>(DC))
DC = cast<DeclContext>(Chain->getKeyDeclaration(cast<Decl>(DC)));
// Write the lookup table
RecordData::value_type Record[] = {UPDATE_VISIBLE, getDeclID(cast<Decl>(DC))};
Stream.EmitRecordWithBlob(UpdateVisibleAbbrev, Record, LookupTable);
}
/// \brief Write an FP_PRAGMA_OPTIONS block for the given FPOptions.
void ASTWriter::WriteFPPragmaOptions(const FPOptions &Opts) {
RecordData::value_type Record[] = {Opts.getInt()};
Stream.EmitRecord(FP_PRAGMA_OPTIONS, Record);
}
/// \brief Write an OPENCL_EXTENSIONS block for the given OpenCLOptions.
void ASTWriter::WriteOpenCLExtensions(Sema &SemaRef) {
if (!SemaRef.Context.getLangOpts().OpenCL)
return;
const OpenCLOptions &Opts = SemaRef.getOpenCLOptions();
RecordData Record;
for (const auto &I:Opts.OptMap) {
AddString(I.getKey(), Record);
auto V = I.getValue();
Record.push_back(V.Supported ? 1 : 0);
Record.push_back(V.Enabled ? 1 : 0);
Record.push_back(V.Avail);
Record.push_back(V.Core);
}
Stream.EmitRecord(OPENCL_EXTENSIONS, Record);
}
void ASTWriter::WriteOpenCLExtensionTypes(Sema &SemaRef) {
if (!SemaRef.Context.getLangOpts().OpenCL)
return;
RecordData Record;
for (const auto &I : SemaRef.OpenCLTypeExtMap) {
Record.push_back(
static_cast<unsigned>(getTypeID(I.first->getCanonicalTypeInternal())));
Record.push_back(I.second.size());
for (auto Ext : I.second)
AddString(Ext, Record);
}
Stream.EmitRecord(OPENCL_EXTENSION_TYPES, Record);
}
void ASTWriter::WriteOpenCLExtensionDecls(Sema &SemaRef) {
if (!SemaRef.Context.getLangOpts().OpenCL)
return;
RecordData Record;
for (const auto &I : SemaRef.OpenCLDeclExtMap) {
Record.push_back(getDeclID(I.first));
Record.push_back(static_cast<unsigned>(I.second.size()));
for (auto Ext : I.second)
AddString(Ext, Record);
}
Stream.EmitRecord(OPENCL_EXTENSION_DECLS, Record);
}
void ASTWriter::WriteCUDAPragmas(Sema &SemaRef) {
if (SemaRef.ForceCUDAHostDeviceDepth > 0) {
RecordData::value_type Record[] = {SemaRef.ForceCUDAHostDeviceDepth};
Stream.EmitRecord(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH, Record);
}
}
void ASTWriter::WriteObjCCategories() {
SmallVector<ObjCCategoriesInfo, 2> CategoriesMap;
RecordData Categories;
for (unsigned I = 0, N = ObjCClassesWithCategories.size(); I != N; ++I) {
unsigned Size = 0;
unsigned StartIndex = Categories.size();
ObjCInterfaceDecl *Class = ObjCClassesWithCategories[I];
// Allocate space for the size.
Categories.push_back(0);
// Add the categories.
for (ObjCInterfaceDecl::known_categories_iterator
Cat = Class->known_categories_begin(),
CatEnd = Class->known_categories_end();
Cat != CatEnd; ++Cat, ++Size) {
assert(getDeclID(*Cat) != 0 && "Bogus category");
AddDeclRef(*Cat, Categories);
}
// Update the size.
Categories[StartIndex] = Size;
// Record this interface -> category map.
ObjCCategoriesInfo CatInfo = { getDeclID(Class), StartIndex };
CategoriesMap.push_back(CatInfo);
}
// Sort the categories map by the definition ID, since the reader will be
// performing binary searches on this information.
llvm::array_pod_sort(CategoriesMap.begin(), CategoriesMap.end());
// Emit the categories map.
using namespace llvm;
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(OBJC_CATEGORIES_MAP));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of entries
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned AbbrevID = Stream.EmitAbbrev(std::move(Abbrev));
RecordData::value_type Record[] = {OBJC_CATEGORIES_MAP, CategoriesMap.size()};
Stream.EmitRecordWithBlob(AbbrevID, Record,
reinterpret_cast<char *>(CategoriesMap.data()),
CategoriesMap.size() * sizeof(ObjCCategoriesInfo));
// Emit the category lists.
Stream.EmitRecord(OBJC_CATEGORIES, Categories);
}
void ASTWriter::WriteLateParsedTemplates(Sema &SemaRef) {
Sema::LateParsedTemplateMapT &LPTMap = SemaRef.LateParsedTemplateMap;
if (LPTMap.empty())
return;
RecordData Record;
for (auto &LPTMapEntry : LPTMap) {
const FunctionDecl *FD = LPTMapEntry.first;
LateParsedTemplate &LPT = *LPTMapEntry.second;
AddDeclRef(FD, Record);
AddDeclRef(LPT.D, Record);
Record.push_back(LPT.Toks.size());
for (const auto &Tok : LPT.Toks) {
AddToken(Tok, Record);
}
}
Stream.EmitRecord(LATE_PARSED_TEMPLATE, Record);
}
/// \brief Write the state of 'pragma clang optimize' at the end of the module.
void ASTWriter::WriteOptimizePragmaOptions(Sema &SemaRef) {
RecordData Record;
SourceLocation PragmaLoc = SemaRef.getOptimizeOffPragmaLocation();
AddSourceLocation(PragmaLoc, Record);
Stream.EmitRecord(OPTIMIZE_PRAGMA_OPTIONS, Record);
}
/// \brief Write the state of 'pragma ms_struct' at the end of the module.
void ASTWriter::WriteMSStructPragmaOptions(Sema &SemaRef) {
RecordData Record;
Record.push_back(SemaRef.MSStructPragmaOn ? PMSST_ON : PMSST_OFF);
Stream.EmitRecord(MSSTRUCT_PRAGMA_OPTIONS, Record);
}
/// \brief Write the state of 'pragma pointers_to_members' at the end of the
//module.
void ASTWriter::WriteMSPointersToMembersPragmaOptions(Sema &SemaRef) {
RecordData Record;
Record.push_back(SemaRef.MSPointerToMemberRepresentationMethod);
AddSourceLocation(SemaRef.ImplicitMSInheritanceAttrLoc, Record);
Stream.EmitRecord(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS, Record);
}
/// \brief Write the state of 'pragma pack' at the end of the module.
void ASTWriter::WritePackPragmaOptions(Sema &SemaRef) {
// Don't serialize pragma pack state for modules, since it should only take
// effect on a per-submodule basis.
if (WritingModule)
return;
RecordData Record;
Record.push_back(SemaRef.PackStack.CurrentValue);
AddSourceLocation(SemaRef.PackStack.CurrentPragmaLocation, Record);
Record.push_back(SemaRef.PackStack.Stack.size());
for (const auto &StackEntry : SemaRef.PackStack.Stack) {
Record.push_back(StackEntry.Value);
AddSourceLocation(StackEntry.PragmaLocation, Record);
AddString(StackEntry.StackSlotLabel, Record);
}
Stream.EmitRecord(PACK_PRAGMA_OPTIONS, Record);
}
void ASTWriter::WriteModuleFileExtension(Sema &SemaRef,
ModuleFileExtensionWriter &Writer) {
// Enter the extension block.
Stream.EnterSubblock(EXTENSION_BLOCK_ID, 4);
// Emit the metadata record abbreviation.
auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
Abv->Add(llvm::BitCodeAbbrevOp(EXTENSION_METADATA));
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv));
// Emit the metadata record.
RecordData Record;
auto Metadata = Writer.getExtension()->getExtensionMetadata();
Record.push_back(EXTENSION_METADATA);
Record.push_back(Metadata.MajorVersion);
Record.push_back(Metadata.MinorVersion);
Record.push_back(Metadata.BlockName.size());
Record.push_back(Metadata.UserInfo.size());
SmallString<64> Buffer;
Buffer += Metadata.BlockName;
Buffer += Metadata.UserInfo;
Stream.EmitRecordWithBlob(Abbrev, Record, Buffer);
// Emit the contents of the extension block.
Writer.writeExtensionContents(SemaRef, Stream);
// Exit the extension block.
Stream.ExitBlock();
}
//===----------------------------------------------------------------------===//
// General Serialization Routines
//===----------------------------------------------------------------------===//
/// \brief Emit the list of attributes to the specified record.
void ASTRecordWriter::AddAttributes(ArrayRef<const Attr *> Attrs) {
auto &Record = *this;
Record.push_back(Attrs.size());
for (const auto *A : Attrs) {
Record.push_back(A->getKind()); // FIXME: stable encoding, target attrs
Record.AddSourceRange(A->getRange());
#include "clang/Serialization/AttrPCHWrite.inc"
}
}
void ASTWriter::AddToken(const Token &Tok, RecordDataImpl &Record) {
AddSourceLocation(Tok.getLocation(), Record);
Record.push_back(Tok.getLength());
// FIXME: When reading literal tokens, reconstruct the literal pointer
// if it is needed.
AddIdentifierRef(Tok.getIdentifierInfo(), Record);
// FIXME: Should translate token kind to a stable encoding.
Record.push_back(Tok.getKind());
// FIXME: Should translate token flags to a stable encoding.
Record.push_back(Tok.getFlags());
}
void ASTWriter::AddString(StringRef Str, RecordDataImpl &Record) {
Record.push_back(Str.size());
Record.insert(Record.end(), Str.begin(), Str.end());
}
bool ASTWriter::PreparePathForOutput(SmallVectorImpl<char> &Path) {
assert(Context && "should have context when outputting path");
bool Changed =
cleanPathForOutput(Context->getSourceManager().getFileManager(), Path);
// Remove a prefix to make the path relative, if relevant.
const char *PathBegin = Path.data();
const char *PathPtr =
adjustFilenameForRelocatableAST(PathBegin, BaseDirectory);
if (PathPtr != PathBegin) {
Path.erase(Path.begin(), Path.begin() + (PathPtr - PathBegin));
Changed = true;
}
return Changed;
}
void ASTWriter::AddPath(StringRef Path, RecordDataImpl &Record) {
SmallString<128> FilePath(Path);
PreparePathForOutput(FilePath);
AddString(FilePath, Record);
}
void ASTWriter::EmitRecordWithPath(unsigned Abbrev, RecordDataRef Record,
StringRef Path) {
SmallString<128> FilePath(Path);
PreparePathForOutput(FilePath);
Stream.EmitRecordWithBlob(Abbrev, Record, FilePath);
}
void ASTWriter::AddVersionTuple(const VersionTuple &Version,
RecordDataImpl &Record) {
Record.push_back(Version.getMajor());
if (Optional<unsigned> Minor = Version.getMinor())
Record.push_back(*Minor + 1);
else
Record.push_back(0);
if (Optional<unsigned> Subminor = Version.getSubminor())
Record.push_back(*Subminor + 1);
else
Record.push_back(0);
}
/// \brief Note that the identifier II occurs at the given offset
/// within the identifier table.
void ASTWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) {
IdentID ID = IdentifierIDs[II];
// Only store offsets new to this AST file. Other identifier names are looked
// up earlier in the chain and thus don't need an offset.
if (ID >= FirstIdentID)
IdentifierOffsets[ID - FirstIdentID] = Offset;
}
/// \brief Note that the selector Sel occurs at the given offset
/// within the method pool/selector table.
void ASTWriter::SetSelectorOffset(Selector Sel, uint32_t Offset) {
unsigned ID = SelectorIDs[Sel];
assert(ID && "Unknown selector");
// Don't record offsets for selectors that are also available in a different
// file.
if (ID < FirstSelectorID)
return;
SelectorOffsets[ID - FirstSelectorID] = Offset;
}
ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream,
SmallVectorImpl<char> &Buffer, MemoryBufferCache &PCMCache,
ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions,
bool IncludeTimestamps)
: Stream(Stream), Buffer(Buffer), PCMCache(PCMCache),
IncludeTimestamps(IncludeTimestamps) {
for (const auto &Ext : Extensions) {
if (auto Writer = Ext->createExtensionWriter(*this))
ModuleFileExtensionWriters.push_back(std::move(Writer));
}
}
ASTWriter::~ASTWriter() {
llvm::DeleteContainerSeconds(FileDeclIDs);
}
const LangOptions &ASTWriter::getLangOpts() const {
assert(WritingAST && "can't determine lang opts when not writing AST");
return Context->getLangOpts();
}
time_t ASTWriter::getTimestampForOutput(const FileEntry *E) const {
return IncludeTimestamps ? E->getModificationTime() : 0;
}
ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef,
const std::string &OutputFile,
Module *WritingModule, StringRef isysroot,
bool hasErrors) {
WritingAST = true;
ASTHasCompilerErrors = hasErrors;
// Emit the file header.
Stream.Emit((unsigned)'C', 8);
Stream.Emit((unsigned)'P', 8);
Stream.Emit((unsigned)'C', 8);
Stream.Emit((unsigned)'H', 8);
WriteBlockInfoBlock();
Context = &SemaRef.Context;
PP = &SemaRef.PP;
this->WritingModule = WritingModule;
ASTFileSignature Signature =
WriteASTCore(SemaRef, isysroot, OutputFile, WritingModule);
Context = nullptr;
PP = nullptr;
this->WritingModule = nullptr;
this->BaseDirectory.clear();
WritingAST = false;
if (SemaRef.Context.getLangOpts().ImplicitModules && WritingModule) {
// Construct MemoryBuffer and update buffer manager.
PCMCache.addBuffer(OutputFile,
llvm::MemoryBuffer::getMemBufferCopy(
StringRef(Buffer.begin(), Buffer.size())));
}
return Signature;
}
template<typename Vector>
static void AddLazyVectorDecls(ASTWriter &Writer, Vector &Vec,
ASTWriter::RecordData &Record) {
for (typename Vector::iterator I = Vec.begin(nullptr, true), E = Vec.end();
I != E; ++I) {
Writer.AddDeclRef(*I, Record);
}
}
ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
const std::string &OutputFile,
Module *WritingModule) {
using namespace llvm;
bool isModule = WritingModule != nullptr;
// Make sure that the AST reader knows to finalize itself.
if (Chain)
Chain->finalizeForWriting();
ASTContext &Context = SemaRef.Context;
Preprocessor &PP = SemaRef.PP;
// Set up predefined declaration IDs.
auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) {
if (D) {
assert(D->isCanonicalDecl() && "predefined decl is not canonical");
DeclIDs[D] = ID;
}
};
RegisterPredefDecl(Context.getTranslationUnitDecl(),
PREDEF_DECL_TRANSLATION_UNIT_ID);
RegisterPredefDecl(Context.ObjCIdDecl, PREDEF_DECL_OBJC_ID_ID);
RegisterPredefDecl(Context.ObjCSelDecl, PREDEF_DECL_OBJC_SEL_ID);
RegisterPredefDecl(Context.ObjCClassDecl, PREDEF_DECL_OBJC_CLASS_ID);
RegisterPredefDecl(Context.ObjCProtocolClassDecl,
PREDEF_DECL_OBJC_PROTOCOL_ID);
RegisterPredefDecl(Context.Int128Decl, PREDEF_DECL_INT_128_ID);
RegisterPredefDecl(Context.UInt128Decl, PREDEF_DECL_UNSIGNED_INT_128_ID);
RegisterPredefDecl(Context.ObjCInstanceTypeDecl,
PREDEF_DECL_OBJC_INSTANCETYPE_ID);
RegisterPredefDecl(Context.BuiltinVaListDecl, PREDEF_DECL_BUILTIN_VA_LIST_ID);
RegisterPredefDecl(Context.VaListTagDecl, PREDEF_DECL_VA_LIST_TAG);
RegisterPredefDecl(Context.BuiltinMSVaListDecl,
PREDEF_DECL_BUILTIN_MS_VA_LIST_ID);
RegisterPredefDecl(Context.ExternCContext, PREDEF_DECL_EXTERN_C_CONTEXT_ID);
RegisterPredefDecl(Context.MakeIntegerSeqDecl,
PREDEF_DECL_MAKE_INTEGER_SEQ_ID);
RegisterPredefDecl(Context.CFConstantStringTypeDecl,
PREDEF_DECL_CF_CONSTANT_STRING_ID);
RegisterPredefDecl(Context.CFConstantStringTagDecl,
PREDEF_DECL_CF_CONSTANT_STRING_TAG_ID);
RegisterPredefDecl(Context.TypePackElementDecl,
PREDEF_DECL_TYPE_PACK_ELEMENT_ID);
// Build a record containing all of the tentative definitions in this file, in
// TentativeDefinitions order. Generally, this record will be empty for
// headers.
RecordData TentativeDefinitions;
AddLazyVectorDecls(*this, SemaRef.TentativeDefinitions, TentativeDefinitions);
// Build a record containing all of the file scoped decls in this file.
RecordData UnusedFileScopedDecls;
if (!isModule)
AddLazyVectorDecls(*this, SemaRef.UnusedFileScopedDecls,
UnusedFileScopedDecls);
// Build a record containing all of the delegating constructors we still need
// to resolve.
RecordData DelegatingCtorDecls;
if (!isModule)
AddLazyVectorDecls(*this, SemaRef.DelegatingCtorDecls, DelegatingCtorDecls);
// Write the set of weak, undeclared identifiers. We always write the
// entire table, since later PCH files in a PCH chain are only interested in
// the results at the end of the chain.
RecordData WeakUndeclaredIdentifiers;
for (auto &WeakUndeclaredIdentifier : SemaRef.WeakUndeclaredIdentifiers) {
IdentifierInfo *II = WeakUndeclaredIdentifier.first;
WeakInfo &WI = WeakUndeclaredIdentifier.second;
AddIdentifierRef(II, WeakUndeclaredIdentifiers);
AddIdentifierRef(WI.getAlias(), WeakUndeclaredIdentifiers);
AddSourceLocation(WI.getLocation(), WeakUndeclaredIdentifiers);
WeakUndeclaredIdentifiers.push_back(WI.getUsed());
}
// Build a record containing all of the ext_vector declarations.
RecordData ExtVectorDecls;
AddLazyVectorDecls(*this, SemaRef.ExtVectorDecls, ExtVectorDecls);
// Build a record containing all of the VTable uses information.
RecordData VTableUses;
if (!SemaRef.VTableUses.empty()) {
for (unsigned I = 0, N = SemaRef.VTableUses.size(); I != N; ++I) {
AddDeclRef(SemaRef.VTableUses[I].first, VTableUses);
AddSourceLocation(SemaRef.VTableUses[I].second, VTableUses);
VTableUses.push_back(SemaRef.VTablesUsed[SemaRef.VTableUses[I].first]);
}
}
// Build a record containing all of the UnusedLocalTypedefNameCandidates.
RecordData UnusedLocalTypedefNameCandidates;
for (const TypedefNameDecl *TD : SemaRef.UnusedLocalTypedefNameCandidates)
AddDeclRef(TD, UnusedLocalTypedefNameCandidates);
// Build a record containing all of pending implicit instantiations.
RecordData PendingInstantiations;
for (const auto &I : SemaRef.PendingInstantiations) {
AddDeclRef(I.first, PendingInstantiations);
AddSourceLocation(I.second, PendingInstantiations);
}
assert(SemaRef.PendingLocalImplicitInstantiations.empty() &&
"There are local ones at end of translation unit!");
// Build a record containing some declaration references.
RecordData SemaDeclRefs;
if (SemaRef.StdNamespace || SemaRef.StdBadAlloc || SemaRef.StdAlignValT) {
AddDeclRef(SemaRef.getStdNamespace(), SemaDeclRefs);
AddDeclRef(SemaRef.getStdBadAlloc(), SemaDeclRefs);
AddDeclRef(SemaRef.getStdAlignValT(), SemaDeclRefs);
}
RecordData CUDASpecialDeclRefs;
if (Context.getcudaConfigureCallDecl()) {
AddDeclRef(Context.getcudaConfigureCallDecl(), CUDASpecialDeclRefs);
}
// Build a record containing all of the known namespaces.
RecordData KnownNamespaces;
for (const auto &I : SemaRef.KnownNamespaces) {
if (!I.second)
AddDeclRef(I.first, KnownNamespaces);
}
// Build a record of all used, undefined objects that require definitions.
RecordData UndefinedButUsed;
SmallVector<std::pair<NamedDecl *, SourceLocation>, 16> Undefined;
SemaRef.getUndefinedButUsed(Undefined);
for (const auto &I : Undefined) {
AddDeclRef(I.first, UndefinedButUsed);
AddSourceLocation(I.second, UndefinedButUsed);
}
// Build a record containing all delete-expressions that we would like to
// analyze later in AST.
RecordData DeleteExprsToAnalyze;
for (const auto &DeleteExprsInfo :
SemaRef.getMismatchingDeleteExpressions()) {
AddDeclRef(DeleteExprsInfo.first, DeleteExprsToAnalyze);
DeleteExprsToAnalyze.push_back(DeleteExprsInfo.second.size());
for (const auto &DeleteLoc : DeleteExprsInfo.second) {
AddSourceLocation(DeleteLoc.first, DeleteExprsToAnalyze);
DeleteExprsToAnalyze.push_back(DeleteLoc.second);
}
}
// Write the control block
WriteControlBlock(PP, Context, isysroot, OutputFile);
// Write the remaining AST contents.
Stream.EnterSubblock(AST_BLOCK_ID, 5);
// This is so that older clang versions, before the introduction
// of the control block, can read and reject the newer PCH format.
{
RecordData Record = {VERSION_MAJOR};
Stream.EmitRecord(METADATA_OLD_FORMAT, Record);
}
// Create a lexical update block containing all of the declarations in the
// translation unit that do not come from other AST files.
const TranslationUnitDecl *TU = Context.getTranslationUnitDecl();
SmallVector<uint32_t, 128> NewGlobalKindDeclPairs;
for (const auto *D : TU->noload_decls()) {
if (!D->isFromASTFile()) {
NewGlobalKindDeclPairs.push_back(D->getKind());
NewGlobalKindDeclPairs.push_back(GetDeclRef(D));
}
}
auto Abv = std::make_shared<BitCodeAbbrev>();
Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL));
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv));
{
RecordData::value_type Record[] = {TU_UPDATE_LEXICAL};
Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record,
bytes(NewGlobalKindDeclPairs));
}
// And a visible updates block for the translation unit.
Abv = std::make_shared<BitCodeAbbrev>();
Abv->Add(llvm::BitCodeAbbrevOp(UPDATE_VISIBLE));
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv));
WriteDeclContextVisibleUpdate(TU);
// If we have any extern "C" names, write out a visible update for them.
if (Context.ExternCContext)
WriteDeclContextVisibleUpdate(Context.ExternCContext);
// If the translation unit has an anonymous namespace, and we don't already
// have an update block for it, write it as an update block.
// FIXME: Why do we not do this if there's already an update block?
if (NamespaceDecl *NS = TU->getAnonymousNamespace()) {
ASTWriter::UpdateRecord &Record = DeclUpdates[TU];
if (Record.empty())
Record.push_back(DeclUpdate(UPD_CXX_ADDED_ANONYMOUS_NAMESPACE, NS));
}
// Add update records for all mangling numbers and static local numbers.
// These aren't really update records, but this is a convenient way of
// tagging this rare extra data onto the declarations.
for (const auto &Number : Context.MangleNumbers)
if (!Number.first->isFromASTFile())
DeclUpdates[Number.first].push_back(DeclUpdate(UPD_MANGLING_NUMBER,
Number.second));
for (const auto &Number : Context.StaticLocalNumbers)
if (!Number.first->isFromASTFile())
DeclUpdates[Number.first].push_back(DeclUpdate(UPD_STATIC_LOCAL_NUMBER,
Number.second));
// Make sure visible decls, added to DeclContexts previously loaded from
// an AST file, are registered for serialization. Likewise for template
// specializations added to imported templates.
for (const auto *I : DeclsToEmitEvenIfUnreferenced) {
GetDeclRef(I);
}
// Make sure all decls associated with an identifier are registered for
// serialization, if we're storing decls with identifiers.
if (!WritingModule || !getLangOpts().CPlusPlus) {
llvm::SmallVector<const IdentifierInfo*, 256> IIs;
for (const auto &ID : PP.getIdentifierTable()) {
const IdentifierInfo *II = ID.second;
if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization())
IIs.push_back(II);
}
// Sort the identifiers to visit based on their name.
std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
for (const IdentifierInfo *II : IIs) {
for (IdentifierResolver::iterator D = SemaRef.IdResolver.begin(II),
DEnd = SemaRef.IdResolver.end();
D != DEnd; ++D) {
GetDeclRef(*D);
}
}
}
// For method pool in the module, if it contains an entry for a selector,
// the entry should be complete, containing everything introduced by that
// module and all modules it imports. It's possible that the entry is out of
// date, so we need to pull in the new content here.
// It's possible that updateOutOfDateSelector can update SelectorIDs. To be
// safe, we copy all selectors out.
llvm::SmallVector<Selector, 256> AllSelectors;
for (auto &SelectorAndID : SelectorIDs)
AllSelectors.push_back(SelectorAndID.first);
for (auto &Selector : AllSelectors)
SemaRef.updateOutOfDateSelector(Selector);
// Form the record of special types.
RecordData SpecialTypes;
AddTypeRef(Context.getRawCFConstantStringType(), SpecialTypes);
AddTypeRef(Context.getFILEType(), SpecialTypes);
AddTypeRef(Context.getjmp_bufType(), SpecialTypes);
AddTypeRef(Context.getsigjmp_bufType(), SpecialTypes);
AddTypeRef(Context.ObjCIdRedefinitionType, SpecialTypes);
AddTypeRef(Context.ObjCClassRedefinitionType, SpecialTypes);
AddTypeRef(Context.ObjCSelRedefinitionType, SpecialTypes);
AddTypeRef(Context.getucontext_tType(), SpecialTypes);
if (Chain) {
// Write the mapping information describing our module dependencies and how
// each of those modules were mapped into our own offset/ID space, so that
// the reader can build the appropriate mapping to its own offset/ID space.
// The map consists solely of a blob with the following format:
// *(module-name-len:i16 module-name:len*i8
// source-location-offset:i32
// identifier-id:i32
// preprocessed-entity-id:i32
// macro-definition-id:i32
// submodule-id:i32
// selector-id:i32
// declaration-id:i32
// c++-base-specifiers-id:i32
// type-id:i32)
//
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned ModuleOffsetMapAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
SmallString<2048> Buffer;
{
llvm::raw_svector_ostream Out(Buffer);
for (ModuleFile &M : Chain->ModuleMgr) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
StringRef FileName = M.FileName;
LE.write<uint16_t>(FileName.size());
Out.write(FileName.data(), FileName.size());
// Note: if a base ID was uint max, it would not be possible to load
// another module after it or have more than one entity inside it.
uint32_t None = std::numeric_limits<uint32_t>::max();
auto writeBaseIDOrNone = [&](uint32_t BaseID, bool ShouldWrite) {
assert(BaseID < std::numeric_limits<uint32_t>::max() && "base id too high");
if (ShouldWrite)
LE.write<uint32_t>(BaseID);
else
LE.write<uint32_t>(None);
};
// These values should be unique within a chain, since they will be read
// as keys into ContinuousRangeMaps.
writeBaseIDOrNone(M.SLocEntryBaseOffset, M.LocalNumSLocEntries);
writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers);
writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros);
writeBaseIDOrNone(M.BasePreprocessedEntityID,
M.NumPreprocessedEntities);
writeBaseIDOrNone(M.BaseSubmoduleID, M.LocalNumSubmodules);
writeBaseIDOrNone(M.BaseSelectorID, M.LocalNumSelectors);
writeBaseIDOrNone(M.BaseDeclID, M.LocalNumDecls);
writeBaseIDOrNone(M.BaseTypeIndex, M.LocalNumTypes);
}
}
RecordData::value_type Record[] = {MODULE_OFFSET_MAP};
Stream.EmitRecordWithBlob(ModuleOffsetMapAbbrev, Record,
Buffer.data(), Buffer.size());
}
RecordData DeclUpdatesOffsetsRecord;
// Keep writing types, declarations, and declaration update records
// until we've emitted all of them.
Stream.EnterSubblock(DECLTYPES_BLOCK_ID, /*bits for abbreviations*/5);
WriteTypeAbbrevs();
WriteDeclAbbrevs();
do {
WriteDeclUpdatesBlocks(DeclUpdatesOffsetsRecord);
while (!DeclTypesToEmit.empty()) {
DeclOrType DOT = DeclTypesToEmit.front();
DeclTypesToEmit.pop();
if (DOT.isType())
WriteType(DOT.getType());
else
WriteDecl(Context, DOT.getDecl());
}
} while (!DeclUpdates.empty());
Stream.ExitBlock();
DoneWritingDeclsAndTypes = true;
// These things can only be done once we've written out decls and types.
WriteTypeDeclOffsets();
if (!DeclUpdatesOffsetsRecord.empty())
Stream.EmitRecord(DECL_UPDATE_OFFSETS, DeclUpdatesOffsetsRecord);
WriteFileDeclIDsMap();
WriteSourceManagerBlock(Context.getSourceManager(), PP);
WriteComments();
WritePreprocessor(PP, isModule);
WriteHeaderSearch(PP.getHeaderSearchInfo());
WriteSelectors(SemaRef);
WriteReferencedSelectorsPool(SemaRef);
WriteLateParsedTemplates(SemaRef);
WriteIdentifierTable(PP, SemaRef.IdResolver, isModule);
WriteFPPragmaOptions(SemaRef.getFPOptions());
WriteOpenCLExtensions(SemaRef);
WriteOpenCLExtensionTypes(SemaRef);
WriteOpenCLExtensionDecls(SemaRef);
WriteCUDAPragmas(SemaRef);
// If we're emitting a module, write out the submodule information.
if (WritingModule)
WriteSubmodules(WritingModule);
Stream.EmitRecord(SPECIAL_TYPES, SpecialTypes);
// Write the record containing external, unnamed definitions.
if (!EagerlyDeserializedDecls.empty())
Stream.EmitRecord(EAGERLY_DESERIALIZED_DECLS, EagerlyDeserializedDecls);
if (!ModularCodegenDecls.empty())
Stream.EmitRecord(MODULAR_CODEGEN_DECLS, ModularCodegenDecls);
// Write the record containing tentative definitions.
if (!TentativeDefinitions.empty())
Stream.EmitRecord(TENTATIVE_DEFINITIONS, TentativeDefinitions);
// Write the record containing unused file scoped decls.
if (!UnusedFileScopedDecls.empty())
Stream.EmitRecord(UNUSED_FILESCOPED_DECLS, UnusedFileScopedDecls);
// Write the record containing weak undeclared identifiers.
if (!WeakUndeclaredIdentifiers.empty())
Stream.EmitRecord(WEAK_UNDECLARED_IDENTIFIERS,
WeakUndeclaredIdentifiers);
// Write the record containing ext_vector type names.
if (!ExtVectorDecls.empty())
Stream.EmitRecord(EXT_VECTOR_DECLS, ExtVectorDecls);
// Write the record containing VTable uses information.
if (!VTableUses.empty())
Stream.EmitRecord(VTABLE_USES, VTableUses);
// Write the record containing potentially unused local typedefs.
if (!UnusedLocalTypedefNameCandidates.empty())
Stream.EmitRecord(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES,
UnusedLocalTypedefNameCandidates);
// Write the record containing pending implicit instantiations.
if (!PendingInstantiations.empty())
Stream.EmitRecord(PENDING_IMPLICIT_INSTANTIATIONS, PendingInstantiations);
// Write the record containing declaration references of Sema.
if (!SemaDeclRefs.empty())
Stream.EmitRecord(SEMA_DECL_REFS, SemaDeclRefs);
// Write the record containing CUDA-specific declaration references.
if (!CUDASpecialDeclRefs.empty())
Stream.EmitRecord(CUDA_SPECIAL_DECL_REFS, CUDASpecialDeclRefs);
// Write the delegating constructors.
if (!DelegatingCtorDecls.empty())
Stream.EmitRecord(DELEGATING_CTORS, DelegatingCtorDecls);
// Write the known namespaces.
if (!KnownNamespaces.empty())
Stream.EmitRecord(KNOWN_NAMESPACES, KnownNamespaces);
// Write the undefined internal functions and variables, and inline functions.
if (!UndefinedButUsed.empty())
Stream.EmitRecord(UNDEFINED_BUT_USED, UndefinedButUsed);
if (!DeleteExprsToAnalyze.empty())
Stream.EmitRecord(DELETE_EXPRS_TO_ANALYZE, DeleteExprsToAnalyze);
// Write the visible updates to DeclContexts.
for (auto *DC : UpdatedDeclContexts)
WriteDeclContextVisibleUpdate(DC);
if (!WritingModule) {
// Write the submodules that were imported, if any.
struct ModuleInfo {
uint64_t ID;
Module *M;
ModuleInfo(uint64_t ID, Module *M) : ID(ID), M(M) {}
};
llvm::SmallVector<ModuleInfo, 64> Imports;
for (const auto *I : Context.local_imports()) {
assert(SubmoduleIDs.find(I->getImportedModule()) != SubmoduleIDs.end());
Imports.push_back(ModuleInfo(SubmoduleIDs[I->getImportedModule()],
I->getImportedModule()));
}
if (!Imports.empty()) {
auto Cmp = [](const ModuleInfo &A, const ModuleInfo &B) {
return A.ID < B.ID;
};
auto Eq = [](const ModuleInfo &A, const ModuleInfo &B) {
return A.ID == B.ID;
};
// Sort and deduplicate module IDs.
std::sort(Imports.begin(), Imports.end(), Cmp);
Imports.erase(std::unique(Imports.begin(), Imports.end(), Eq),
Imports.end());
RecordData ImportedModules;
for (const auto &Import : Imports) {
ImportedModules.push_back(Import.ID);
// FIXME: If the module has macros imported then later has declarations
// imported, this location won't be the right one as a location for the
// declaration imports.
AddSourceLocation(PP.getModuleImportLoc(Import.M), ImportedModules);
}
Stream.EmitRecord(IMPORTED_MODULES, ImportedModules);
}
}
WriteObjCCategories();
if(!WritingModule) {
WriteOptimizePragmaOptions(SemaRef);
WriteMSStructPragmaOptions(SemaRef);
WriteMSPointersToMembersPragmaOptions(SemaRef);
}
WritePackPragmaOptions(SemaRef);
// Some simple statistics
RecordData::value_type Record[] = {
NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts};
Stream.EmitRecord(STATISTICS, Record);
Stream.ExitBlock();
// Write the module file extension blocks.
for (const auto &ExtWriter : ModuleFileExtensionWriters)
WriteModuleFileExtension(SemaRef, *ExtWriter);
return writeUnhashedControlBlock(PP, Context);
}
void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
if (DeclUpdates.empty())
return;
DeclUpdateMap LocalUpdates;
LocalUpdates.swap(DeclUpdates);
for (auto &DeclUpdate : LocalUpdates) {
const Decl *D = DeclUpdate.first;
bool HasUpdatedBody = false;
RecordData RecordData;
ASTRecordWriter Record(*this, RecordData);
for (auto &Update : DeclUpdate.second) {
DeclUpdateKind Kind = (DeclUpdateKind)Update.getKind();
// An updated body is emitted last, so that the reader doesn't need
// to skip over the lazy body to reach statements for other records.
if (Kind == UPD_CXX_ADDED_FUNCTION_DEFINITION)
HasUpdatedBody = true;
else
Record.push_back(Kind);
switch (Kind) {
case UPD_CXX_ADDED_IMPLICIT_MEMBER:
case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION:
case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE:
assert(Update.getDecl() && "no decl to add?");
Record.push_back(GetDeclRef(Update.getDecl()));
break;
case UPD_CXX_ADDED_FUNCTION_DEFINITION:
break;
case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER: {
const VarDecl *VD = cast<VarDecl>(D);
Record.AddSourceLocation(Update.getLoc());
if (VD->getInit()) {
Record.push_back(!VD->isInitKnownICE() ? 1
: (VD->isInitICE() ? 3 : 2));
Record.AddStmt(const_cast<Expr*>(VD->getInit()));
} else {
Record.push_back(0);
}
break;
}
case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT:
Record.AddStmt(const_cast<Expr *>(
cast<ParmVarDecl>(Update.getDecl())->getDefaultArg()));
break;
case UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER:
Record.AddStmt(
cast<FieldDecl>(Update.getDecl())->getInClassInitializer());
break;
case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: {
auto *RD = cast<CXXRecordDecl>(D);
UpdatedDeclContexts.insert(RD->getPrimaryContext());
Record.AddCXXDefinitionData(RD);
Record.AddOffset(WriteDeclContextLexicalBlock(
*Context, const_cast<CXXRecordDecl *>(RD)));
// This state is sometimes updated by template instantiation, when we
// switch from the specialization referring to the template declaration
// to it referring to the template definition.
if (auto *MSInfo = RD->getMemberSpecializationInfo()) {
Record.push_back(MSInfo->getTemplateSpecializationKind());
Record.AddSourceLocation(MSInfo->getPointOfInstantiation());
} else {
auto *Spec = cast<ClassTemplateSpecializationDecl>(RD);
Record.push_back(Spec->getTemplateSpecializationKind());
Record.AddSourceLocation(Spec->getPointOfInstantiation());
// The instantiation might have been resolved to a partial
// specialization. If so, record which one.
auto From = Spec->getInstantiatedFrom();
if (auto PartialSpec =
From.dyn_cast<ClassTemplatePartialSpecializationDecl*>()) {
Record.push_back(true);
Record.AddDeclRef(PartialSpec);
Record.AddTemplateArgumentList(
&Spec->getTemplateInstantiationArgs());
} else {
Record.push_back(false);
}
}
Record.push_back(RD->getTagKind());
Record.AddSourceLocation(RD->getLocation());
Record.AddSourceLocation(RD->getLocStart());
Record.AddSourceRange(RD->getBraceRange());
// Instantiation may change attributes; write them all out afresh.
Record.push_back(D->hasAttrs());
if (D->hasAttrs())
Record.AddAttributes(D->getAttrs());
// FIXME: Ensure we don't get here for explicit instantiations.
break;
}
case UPD_CXX_RESOLVED_DTOR_DELETE:
Record.AddDeclRef(Update.getDecl());
break;
case UPD_CXX_RESOLVED_EXCEPTION_SPEC:
addExceptionSpec(
cast<FunctionDecl>(D)->getType()->castAs<FunctionProtoType>(),
Record);
break;
case UPD_CXX_DEDUCED_RETURN_TYPE:
Record.push_back(GetOrCreateTypeID(Update.getType()));
break;
case UPD_DECL_MARKED_USED:
break;
case UPD_MANGLING_NUMBER:
case UPD_STATIC_LOCAL_NUMBER:
Record.push_back(Update.getNumber());
break;
case UPD_DECL_MARKED_OPENMP_THREADPRIVATE:
Record.AddSourceRange(
D->getAttr<OMPThreadPrivateDeclAttr>()->getRange());
break;
case UPD_DECL_MARKED_OPENMP_DECLARETARGET:
Record.AddSourceRange(
D->getAttr<OMPDeclareTargetDeclAttr>()->getRange());
break;
case UPD_DECL_EXPORTED:
Record.push_back(getSubmoduleID(Update.getModule()));
break;
case UPD_ADDED_ATTR_TO_RECORD:
Record.AddAttributes(llvm::makeArrayRef(Update.getAttr()));
break;
}
}
if (HasUpdatedBody) {
const auto *Def = cast<FunctionDecl>(D);
Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION);
Record.push_back(Def->isInlined());
Record.AddSourceLocation(Def->getInnerLocStart());
Record.AddFunctionDefinition(Def);
}
OffsetsRecord.push_back(GetDeclRef(D));
OffsetsRecord.push_back(Record.Emit(DECL_UPDATES));
}
}
void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record) {
uint32_t Raw = Loc.getRawEncoding();
Record.push_back((Raw << 1) | (Raw >> 31));
}
void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record) {
AddSourceLocation(Range.getBegin(), Record);
AddSourceLocation(Range.getEnd(), Record);
}
void ASTRecordWriter::AddAPInt(const llvm::APInt &Value) {
Record->push_back(Value.getBitWidth());
const uint64_t *Words = Value.getRawData();
Record->append(Words, Words + Value.getNumWords());
}
void ASTRecordWriter::AddAPSInt(const llvm::APSInt &Value) {
Record->push_back(Value.isUnsigned());
AddAPInt(Value);
}
void ASTRecordWriter::AddAPFloat(const llvm::APFloat &Value) {
AddAPInt(Value.bitcastToAPInt());
}
void ASTWriter::AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record) {
Record.push_back(getIdentifierRef(II));
}
IdentID ASTWriter::getIdentifierRef(const IdentifierInfo *II) {
if (!II)
return 0;
IdentID &ID = IdentifierIDs[II];
if (ID == 0)
ID = NextIdentID++;
return ID;
}
MacroID ASTWriter::getMacroRef(MacroInfo *MI, const IdentifierInfo *Name) {
// Don't emit builtin macros like __LINE__ to the AST file unless they
// have been redefined by the header (in which case they are not
// isBuiltinMacro).
if (!MI || MI->isBuiltinMacro())
return 0;
MacroID &ID = MacroIDs[MI];
if (ID == 0) {
ID = NextMacroID++;
MacroInfoToEmitData Info = { Name, MI, ID };
MacroInfosToEmit.push_back(Info);
}
return ID;
}
MacroID ASTWriter::getMacroID(MacroInfo *MI) {
if (!MI || MI->isBuiltinMacro())
return 0;
assert(MacroIDs.find(MI) != MacroIDs.end() && "Macro not emitted!");
return MacroIDs[MI];
}
uint64_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
return IdentMacroDirectivesOffsetMap.lookup(Name);
}
void ASTRecordWriter::AddSelectorRef(const Selector SelRef) {
Record->push_back(Writer->getSelectorRef(SelRef));
}
SelectorID ASTWriter::getSelectorRef(Selector Sel) {
if (Sel.getAsOpaquePtr() == nullptr) {
return 0;
}
SelectorID SID = SelectorIDs[Sel];
if (SID == 0 && Chain) {
// This might trigger a ReadSelector callback, which will set the ID for
// this selector.
Chain->LoadSelector(Sel);
SID = SelectorIDs[Sel];
}
if (SID == 0) {
SID = NextSelectorID++;
SelectorIDs[Sel] = SID;
}
return SID;
}
void ASTRecordWriter::AddCXXTemporary(const CXXTemporary *Temp) {
AddDeclRef(Temp->getDestructor());
}
void ASTRecordWriter::AddTemplateArgumentLocInfo(
TemplateArgument::ArgKind Kind, const TemplateArgumentLocInfo &Arg) {
switch (Kind) {
case TemplateArgument::Expression:
AddStmt(Arg.getAsExpr());
break;
case TemplateArgument::Type:
AddTypeSourceInfo(Arg.getAsTypeSourceInfo());
break;
case TemplateArgument::Template:
AddNestedNameSpecifierLoc(Arg.getTemplateQualifierLoc());
AddSourceLocation(Arg.getTemplateNameLoc());
break;
case TemplateArgument::TemplateExpansion:
AddNestedNameSpecifierLoc(Arg.getTemplateQualifierLoc());
AddSourceLocation(Arg.getTemplateNameLoc());
AddSourceLocation(Arg.getTemplateEllipsisLoc());
break;
case TemplateArgument::Null:
case TemplateArgument::Integral:
case TemplateArgument::Declaration:
case TemplateArgument::NullPtr:
case TemplateArgument::Pack:
// FIXME: Is this right?
break;
}
}
void ASTRecordWriter::AddTemplateArgumentLoc(const TemplateArgumentLoc &Arg) {
AddTemplateArgument(Arg.getArgument());
if (Arg.getArgument().getKind() == TemplateArgument::Expression) {
bool InfoHasSameExpr
= Arg.getArgument().getAsExpr() == Arg.getLocInfo().getAsExpr();
Record->push_back(InfoHasSameExpr);
if (InfoHasSameExpr)
return; // Avoid storing the same expr twice.
}
AddTemplateArgumentLocInfo(Arg.getArgument().getKind(), Arg.getLocInfo());
}
void ASTRecordWriter::AddTypeSourceInfo(TypeSourceInfo *TInfo) {
if (!TInfo) {
AddTypeRef(QualType());
return;
}
AddTypeLoc(TInfo->getTypeLoc());
}
void ASTRecordWriter::AddTypeLoc(TypeLoc TL) {
AddTypeRef(TL.getType());
TypeLocWriter TLW(*this);
for (; !TL.isNull(); TL = TL.getNextTypeLoc())
TLW.Visit(TL);
}
void ASTWriter::AddTypeRef(QualType T, RecordDataImpl &Record) {
Record.push_back(GetOrCreateTypeID(T));
}
TypeID ASTWriter::GetOrCreateTypeID(QualType T) {
assert(Context);
return MakeTypeID(*Context, T, [&](QualType T) -> TypeIdx {
if (T.isNull())
return TypeIdx();
assert(!T.getLocalFastQualifiers());
TypeIdx &Idx = TypeIdxs[T];
if (Idx.getIndex() == 0) {
if (DoneWritingDeclsAndTypes) {
assert(0 && "New type seen after serializing all the types to emit!");
return TypeIdx();
}
// We haven't seen this type before. Assign it a new ID and put it
// into the queue of types to emit.
Idx = TypeIdx(NextTypeID++);
DeclTypesToEmit.push(T);
}
return Idx;
});
}
TypeID ASTWriter::getTypeID(QualType T) const {
assert(Context);
return MakeTypeID(*Context, T, [&](QualType T) -> TypeIdx {
if (T.isNull())
return TypeIdx();
assert(!T.getLocalFastQualifiers());
TypeIdxMap::const_iterator I = TypeIdxs.find(T);
assert(I != TypeIdxs.end() && "Type not emitted!");
return I->second;
});
}
void ASTWriter::AddDeclRef(const Decl *D, RecordDataImpl &Record) {
Record.push_back(GetDeclRef(D));
}
DeclID ASTWriter::GetDeclRef(const Decl *D) {
assert(WritingAST && "Cannot request a declaration ID before AST writing");
if (!D) {
return 0;
}
// If D comes from an AST file, its declaration ID is already known and
// fixed.
if (D->isFromASTFile())
return D->getGlobalID();
assert(!(reinterpret_cast<uintptr_t>(D) & 0x01) && "Invalid decl pointer");
DeclID &ID = DeclIDs[D];
if (ID == 0) {
if (DoneWritingDeclsAndTypes) {
assert(0 && "New decl seen after serializing all the decls to emit!");
return 0;
}
// We haven't seen this declaration before. Give it a new ID and
// enqueue it in the list of declarations to emit.
ID = NextDeclID++;
DeclTypesToEmit.push(const_cast<Decl *>(D));
}
return ID;
}
DeclID ASTWriter::getDeclID(const Decl *D) {
if (!D)
return 0;
// If D comes from an AST file, its declaration ID is already known and
// fixed.
if (D->isFromASTFile())
return D->getGlobalID();
assert(DeclIDs.find(D) != DeclIDs.end() && "Declaration not emitted!");
return DeclIDs[D];
}
void ASTWriter::associateDeclWithFile(const Decl *D, DeclID ID) {
assert(ID);
assert(D);
SourceLocation Loc = D->getLocation();
if (Loc.isInvalid())
return;
// We only keep track of the file-level declarations of each file.
if (!D->getLexicalDeclContext()->isFileContext())
return;
// FIXME: ParmVarDecls that are part of a function type of a parameter of
// a function/objc method, should not have TU as lexical context.
if (isa<ParmVarDecl>(D))
return;
SourceManager &SM = Context->getSourceManager();
SourceLocation FileLoc = SM.getFileLoc(Loc);
assert(SM.isLocalSourceLocation(FileLoc));
FileID FID;
unsigned Offset;
std::tie(FID, Offset) = SM.getDecomposedLoc(FileLoc);
if (FID.isInvalid())
return;
assert(SM.getSLocEntry(FID).isFile());
DeclIDInFileInfo *&Info = FileDeclIDs[FID];
if (!Info)
Info = new DeclIDInFileInfo();
std::pair<unsigned, serialization::DeclID> LocDecl(Offset, ID);
LocDeclIDsTy &Decls = Info->DeclIDs;
if (Decls.empty() || Decls.back().first <= Offset) {
Decls.push_back(LocDecl);
return;
}
LocDeclIDsTy::iterator I =
std::upper_bound(Decls.begin(), Decls.end(), LocDecl, llvm::less_first());
Decls.insert(I, LocDecl);
}
void ASTRecordWriter::AddDeclarationName(DeclarationName Name) {
// FIXME: Emit a stable enum for NameKind. 0 = Identifier etc.
Record->push_back(Name.getNameKind());
switch (Name.getNameKind()) {
case DeclarationName::Identifier:
AddIdentifierRef(Name.getAsIdentifierInfo());
break;
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
AddSelectorRef(Name.getObjCSelector());
break;
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
case DeclarationName::CXXConversionFunctionName:
AddTypeRef(Name.getCXXNameType());
break;
case DeclarationName::CXXDeductionGuideName:
AddDeclRef(Name.getCXXDeductionGuideTemplate());
break;
case DeclarationName::CXXOperatorName:
Record->push_back(Name.getCXXOverloadedOperator());
break;
case DeclarationName::CXXLiteralOperatorName:
AddIdentifierRef(Name.getCXXLiteralIdentifier());
break;
case DeclarationName::CXXUsingDirective:
// No extra data to emit
break;
}
}
unsigned ASTWriter::getAnonymousDeclarationNumber(const NamedDecl *D) {
assert(needsAnonymousDeclarationNumber(D) &&
"expected an anonymous declaration");
// Number the anonymous declarations within this context, if we've not
// already done so.
auto It = AnonymousDeclarationNumbers.find(D);
if (It == AnonymousDeclarationNumbers.end()) {
auto *DC = D->getLexicalDeclContext();
numberAnonymousDeclsWithin(DC, [&](const NamedDecl *ND, unsigned Number) {
AnonymousDeclarationNumbers[ND] = Number;
});
It = AnonymousDeclarationNumbers.find(D);
assert(It != AnonymousDeclarationNumbers.end() &&
"declaration not found within its lexical context");
}
return It->second;
}
void ASTRecordWriter::AddDeclarationNameLoc(const DeclarationNameLoc &DNLoc,
DeclarationName Name) {
switch (Name.getNameKind()) {
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
case DeclarationName::CXXConversionFunctionName:
AddTypeSourceInfo(DNLoc.NamedType.TInfo);
break;
case DeclarationName::CXXOperatorName:
AddSourceLocation(SourceLocation::getFromRawEncoding(
DNLoc.CXXOperatorName.BeginOpNameLoc));
AddSourceLocation(
SourceLocation::getFromRawEncoding(DNLoc.CXXOperatorName.EndOpNameLoc));
break;
case DeclarationName::CXXLiteralOperatorName:
AddSourceLocation(SourceLocation::getFromRawEncoding(
DNLoc.CXXLiteralOperatorName.OpNameLoc));
break;
case DeclarationName::Identifier:
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
case DeclarationName::CXXUsingDirective:
case DeclarationName::CXXDeductionGuideName:
break;
}
}
void ASTRecordWriter::AddDeclarationNameInfo(
const DeclarationNameInfo &NameInfo) {
AddDeclarationName(NameInfo.getName());
AddSourceLocation(NameInfo.getLoc());
AddDeclarationNameLoc(NameInfo.getInfo(), NameInfo.getName());
}
void ASTRecordWriter::AddQualifierInfo(const QualifierInfo &Info) {
AddNestedNameSpecifierLoc(Info.QualifierLoc);
Record->push_back(Info.NumTemplParamLists);
for (unsigned i = 0, e = Info.NumTemplParamLists; i != e; ++i)
AddTemplateParameterList(Info.TemplParamLists[i]);
}
void ASTRecordWriter::AddNestedNameSpecifier(NestedNameSpecifier *NNS) {
// Nested name specifiers usually aren't too long. I think that 8 would
// typically accommodate the vast majority.
SmallVector<NestedNameSpecifier *, 8> NestedNames;
// Push each of the NNS's onto a stack for serialization in reverse order.
while (NNS) {
NestedNames.push_back(NNS);
NNS = NNS->getPrefix();
}
Record->push_back(NestedNames.size());
while(!NestedNames.empty()) {
NNS = NestedNames.pop_back_val();
NestedNameSpecifier::SpecifierKind Kind = NNS->getKind();
Record->push_back(Kind);
switch (Kind) {
case NestedNameSpecifier::Identifier:
AddIdentifierRef(NNS->getAsIdentifier());
break;
case NestedNameSpecifier::Namespace:
AddDeclRef(NNS->getAsNamespace());
break;
case NestedNameSpecifier::NamespaceAlias:
AddDeclRef(NNS->getAsNamespaceAlias());
break;
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate:
AddTypeRef(QualType(NNS->getAsType(), 0));
Record->push_back(Kind == NestedNameSpecifier::TypeSpecWithTemplate);
break;
case NestedNameSpecifier::Global:
// Don't need to write an associated value.
break;
case NestedNameSpecifier::Super:
AddDeclRef(NNS->getAsRecordDecl());
break;
}
}
}
void ASTRecordWriter::AddNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
// Nested name specifiers usually aren't too long. I think that 8 would
// typically accommodate the vast majority.
SmallVector<NestedNameSpecifierLoc , 8> NestedNames;
// Push each of the nested-name-specifiers's onto a stack for
// serialization in reverse order.
while (NNS) {
NestedNames.push_back(NNS);
NNS = NNS.getPrefix();
}
Record->push_back(NestedNames.size());
while(!NestedNames.empty()) {
NNS = NestedNames.pop_back_val();
NestedNameSpecifier::SpecifierKind Kind
= NNS.getNestedNameSpecifier()->getKind();
Record->push_back(Kind);
switch (Kind) {
case NestedNameSpecifier::Identifier:
AddIdentifierRef(NNS.getNestedNameSpecifier()->getAsIdentifier());
AddSourceRange(NNS.getLocalSourceRange());
break;
case NestedNameSpecifier::Namespace:
AddDeclRef(NNS.getNestedNameSpecifier()->getAsNamespace());
AddSourceRange(NNS.getLocalSourceRange());
break;
case NestedNameSpecifier::NamespaceAlias:
AddDeclRef(NNS.getNestedNameSpecifier()->getAsNamespaceAlias());
AddSourceRange(NNS.getLocalSourceRange());
break;
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate:
Record->push_back(Kind == NestedNameSpecifier::TypeSpecWithTemplate);
AddTypeLoc(NNS.getTypeLoc());
AddSourceLocation(NNS.getLocalSourceRange().getEnd());
break;
case NestedNameSpecifier::Global:
AddSourceLocation(NNS.getLocalSourceRange().getEnd());
break;
case NestedNameSpecifier::Super:
AddDeclRef(NNS.getNestedNameSpecifier()->getAsRecordDecl());
AddSourceRange(NNS.getLocalSourceRange());
break;
}
}
}
void ASTRecordWriter::AddTemplateName(TemplateName Name) {
TemplateName::NameKind Kind = Name.getKind();
Record->push_back(Kind);
switch (Kind) {
case TemplateName::Template:
AddDeclRef(Name.getAsTemplateDecl());
break;
case TemplateName::OverloadedTemplate: {
OverloadedTemplateStorage *OvT = Name.getAsOverloadedTemplate();
Record->push_back(OvT->size());
for (const auto &I : *OvT)
AddDeclRef(I);
break;
}
case TemplateName::QualifiedTemplate: {
QualifiedTemplateName *QualT = Name.getAsQualifiedTemplateName();
AddNestedNameSpecifier(QualT->getQualifier());
Record->push_back(QualT->hasTemplateKeyword());
AddDeclRef(QualT->getTemplateDecl());
break;
}
case TemplateName::DependentTemplate: {
DependentTemplateName *DepT = Name.getAsDependentTemplateName();
AddNestedNameSpecifier(DepT->getQualifier());
Record->push_back(DepT->isIdentifier());
if (DepT->isIdentifier())
AddIdentifierRef(DepT->getIdentifier());
else
Record->push_back(DepT->getOperator());
break;
}
case TemplateName::SubstTemplateTemplateParm: {
SubstTemplateTemplateParmStorage *subst
= Name.getAsSubstTemplateTemplateParm();
AddDeclRef(subst->getParameter());
AddTemplateName(subst->getReplacement());
break;
}
case TemplateName::SubstTemplateTemplateParmPack: {
SubstTemplateTemplateParmPackStorage *SubstPack
= Name.getAsSubstTemplateTemplateParmPack();
AddDeclRef(SubstPack->getParameterPack());
AddTemplateArgument(SubstPack->getArgumentPack());
break;
}
}
}
void ASTRecordWriter::AddTemplateArgument(const TemplateArgument &Arg) {
Record->push_back(Arg.getKind());
switch (Arg.getKind()) {
case TemplateArgument::Null:
break;
case TemplateArgument::Type:
AddTypeRef(Arg.getAsType());
break;
case TemplateArgument::Declaration:
AddDeclRef(Arg.getAsDecl());
AddTypeRef(Arg.getParamTypeForDecl());
break;
case TemplateArgument::NullPtr:
AddTypeRef(Arg.getNullPtrType());
break;
case TemplateArgument::Integral:
AddAPSInt(Arg.getAsIntegral());
AddTypeRef(Arg.getIntegralType());
break;
case TemplateArgument::Template:
AddTemplateName(Arg.getAsTemplateOrTemplatePattern());
break;
case TemplateArgument::TemplateExpansion:
AddTemplateName(Arg.getAsTemplateOrTemplatePattern());
if (Optional<unsigned> NumExpansions = Arg.getNumTemplateExpansions())
Record->push_back(*NumExpansions + 1);
else
Record->push_back(0);
break;
case TemplateArgument::Expression:
AddStmt(Arg.getAsExpr());
break;
case TemplateArgument::Pack:
Record->push_back(Arg.pack_size());
for (const auto &P : Arg.pack_elements())
AddTemplateArgument(P);
break;
}
}
void ASTRecordWriter::AddTemplateParameterList(
const TemplateParameterList *TemplateParams) {
assert(TemplateParams && "No TemplateParams!");
AddSourceLocation(TemplateParams->getTemplateLoc());
AddSourceLocation(TemplateParams->getLAngleLoc());
AddSourceLocation(TemplateParams->getRAngleLoc());
// TODO: Concepts
Record->push_back(TemplateParams->size());
for (const auto &P : *TemplateParams)
AddDeclRef(P);
}
/// \brief Emit a template argument list.
void ASTRecordWriter::AddTemplateArgumentList(
const TemplateArgumentList *TemplateArgs) {
assert(TemplateArgs && "No TemplateArgs!");
Record->push_back(TemplateArgs->size());
for (int i = 0, e = TemplateArgs->size(); i != e; ++i)
AddTemplateArgument(TemplateArgs->get(i));
}
void ASTRecordWriter::AddASTTemplateArgumentListInfo(
const ASTTemplateArgumentListInfo *ASTTemplArgList) {
assert(ASTTemplArgList && "No ASTTemplArgList!");
AddSourceLocation(ASTTemplArgList->LAngleLoc);
AddSourceLocation(ASTTemplArgList->RAngleLoc);
Record->push_back(ASTTemplArgList->NumTemplateArgs);
const TemplateArgumentLoc *TemplArgs = ASTTemplArgList->getTemplateArgs();
for (int i = 0, e = ASTTemplArgList->NumTemplateArgs; i != e; ++i)
AddTemplateArgumentLoc(TemplArgs[i]);
}
void ASTRecordWriter::AddUnresolvedSet(const ASTUnresolvedSet &Set) {
Record->push_back(Set.size());
for (ASTUnresolvedSet::const_iterator
I = Set.begin(), E = Set.end(); I != E; ++I) {
AddDeclRef(I.getDecl());
Record->push_back(I.getAccess());
}
}
// FIXME: Move this out of the main ASTRecordWriter interface.
void ASTRecordWriter::AddCXXBaseSpecifier(const CXXBaseSpecifier &Base) {
Record->push_back(Base.isVirtual());
Record->push_back(Base.isBaseOfClass());
Record->push_back(Base.getAccessSpecifierAsWritten());
Record->push_back(Base.getInheritConstructors());
AddTypeSourceInfo(Base.getTypeSourceInfo());
AddSourceRange(Base.getSourceRange());
AddSourceLocation(Base.isPackExpansion()? Base.getEllipsisLoc()
: SourceLocation());
}
static uint64_t EmitCXXBaseSpecifiers(ASTWriter &W,
ArrayRef<CXXBaseSpecifier> Bases) {
ASTWriter::RecordData Record;
ASTRecordWriter Writer(W, Record);
Writer.push_back(Bases.size());
for (auto &Base : Bases)
Writer.AddCXXBaseSpecifier(Base);
return Writer.Emit(serialization::DECL_CXX_BASE_SPECIFIERS);
}
// FIXME: Move this out of the main ASTRecordWriter interface.
void ASTRecordWriter::AddCXXBaseSpecifiers(ArrayRef<CXXBaseSpecifier> Bases) {
AddOffset(EmitCXXBaseSpecifiers(*Writer, Bases));
}
static uint64_t
EmitCXXCtorInitializers(ASTWriter &W,
ArrayRef<CXXCtorInitializer *> CtorInits) {
ASTWriter::RecordData Record;
ASTRecordWriter Writer(W, Record);
Writer.push_back(CtorInits.size());
for (auto *Init : CtorInits) {
if (Init->isBaseInitializer()) {
Writer.push_back(CTOR_INITIALIZER_BASE);
Writer.AddTypeSourceInfo(Init->getTypeSourceInfo());
Writer.push_back(Init->isBaseVirtual());
} else if (Init->isDelegatingInitializer()) {
Writer.push_back(CTOR_INITIALIZER_DELEGATING);
Writer.AddTypeSourceInfo(Init->getTypeSourceInfo());
} else if (Init->isMemberInitializer()){
Writer.push_back(CTOR_INITIALIZER_MEMBER);
Writer.AddDeclRef(Init->getMember());
} else {
Writer.push_back(CTOR_INITIALIZER_INDIRECT_MEMBER);
Writer.AddDeclRef(Init->getIndirectMember());
}
Writer.AddSourceLocation(Init->getMemberLocation());
Writer.AddStmt(Init->getInit());
Writer.AddSourceLocation(Init->getLParenLoc());
Writer.AddSourceLocation(Init->getRParenLoc());
Writer.push_back(Init->isWritten());
if (Init->isWritten())
Writer.push_back(Init->getSourceOrder());
}
return Writer.Emit(serialization::DECL_CXX_CTOR_INITIALIZERS);
}
// FIXME: Move this out of the main ASTRecordWriter interface.
void ASTRecordWriter::AddCXXCtorInitializers(
ArrayRef<CXXCtorInitializer *> CtorInits) {
AddOffset(EmitCXXCtorInitializers(*Writer, CtorInits));
}
void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
auto &Data = D->data();
Record->push_back(Data.IsLambda);
Record->push_back(Data.UserDeclaredConstructor);
Record->push_back(Data.UserDeclaredSpecialMembers);
Record->push_back(Data.Aggregate);
Record->push_back(Data.PlainOldData);
Record->push_back(Data.Empty);
Record->push_back(Data.Polymorphic);
Record->push_back(Data.Abstract);
Record->push_back(Data.IsStandardLayout);
Record->push_back(Data.HasNoNonEmptyBases);
Record->push_back(Data.HasPrivateFields);
Record->push_back(Data.HasProtectedFields);
Record->push_back(Data.HasPublicFields);
Record->push_back(Data.HasMutableFields);
Record->push_back(Data.HasVariantMembers);
Record->push_back(Data.HasOnlyCMembers);
Record->push_back(Data.HasInClassInitializer);
Record->push_back(Data.HasUninitializedReferenceMember);
Record->push_back(Data.HasUninitializedFields);
Record->push_back(Data.HasInheritedConstructor);
Record->push_back(Data.HasInheritedAssignment);
+ Record->push_back(Data.NeedOverloadResolutionForCopyConstructor);
Record->push_back(Data.NeedOverloadResolutionForMoveConstructor);
Record->push_back(Data.NeedOverloadResolutionForMoveAssignment);
Record->push_back(Data.NeedOverloadResolutionForDestructor);
+ Record->push_back(Data.DefaultedCopyConstructorIsDeleted);
Record->push_back(Data.DefaultedMoveConstructorIsDeleted);
Record->push_back(Data.DefaultedMoveAssignmentIsDeleted);
Record->push_back(Data.DefaultedDestructorIsDeleted);
Record->push_back(Data.HasTrivialSpecialMembers);
Record->push_back(Data.DeclaredNonTrivialSpecialMembers);
Record->push_back(Data.HasIrrelevantDestructor);
Record->push_back(Data.HasConstexprNonCopyMoveConstructor);
Record->push_back(Data.HasDefaultedDefaultConstructor);
+ Record->push_back(Data.CanPassInRegisters);
Record->push_back(Data.DefaultedDefaultConstructorIsConstexpr);
Record->push_back(Data.HasConstexprDefaultConstructor);
Record->push_back(Data.HasNonLiteralTypeFieldsOrBases);
Record->push_back(Data.ComputedVisibleConversions);
Record->push_back(Data.UserProvidedDefaultConstructor);
Record->push_back(Data.DeclaredSpecialMembers);
Record->push_back(Data.ImplicitCopyConstructorCanHaveConstParamForVBase);
Record->push_back(Data.ImplicitCopyConstructorCanHaveConstParamForNonVBase);
Record->push_back(Data.ImplicitCopyAssignmentHasConstParam);
Record->push_back(Data.HasDeclaredCopyConstructorWithConstParam);
Record->push_back(Data.HasDeclaredCopyAssignmentWithConstParam);
// getODRHash will compute the ODRHash if it has not been previously computed.
Record->push_back(D->getODRHash());
bool ModulesDebugInfo = Writer->Context->getLangOpts().ModulesDebugInfo &&
Writer->WritingModule && !D->isDependentType();
Record->push_back(ModulesDebugInfo);
if (ModulesDebugInfo)
Writer->ModularCodegenDecls.push_back(Writer->GetDeclRef(D));
// IsLambda bit is already saved.
Record->push_back(Data.NumBases);
if (Data.NumBases > 0)
AddCXXBaseSpecifiers(Data.bases());
// FIXME: Make VBases lazily computed when needed to avoid storing them.
Record->push_back(Data.NumVBases);
if (Data.NumVBases > 0)
AddCXXBaseSpecifiers(Data.vbases());
AddUnresolvedSet(Data.Conversions.get(*Writer->Context));
AddUnresolvedSet(Data.VisibleConversions.get(*Writer->Context));
// Data.Definition is the owning decl, no need to write it.
AddDeclRef(D->getFirstFriend());
// Add lambda-specific data.
if (Data.IsLambda) {
auto &Lambda = D->getLambdaData();
Record->push_back(Lambda.Dependent);
Record->push_back(Lambda.IsGenericLambda);
Record->push_back(Lambda.CaptureDefault);
Record->push_back(Lambda.NumCaptures);
Record->push_back(Lambda.NumExplicitCaptures);
Record->push_back(Lambda.ManglingNumber);
AddDeclRef(D->getLambdaContextDecl());
AddTypeSourceInfo(Lambda.MethodTyInfo);
for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
const LambdaCapture &Capture = Lambda.Captures[I];
AddSourceLocation(Capture.getLocation());
Record->push_back(Capture.isImplicit());
Record->push_back(Capture.getCaptureKind());
switch (Capture.getCaptureKind()) {
case LCK_StarThis:
case LCK_This:
case LCK_VLAType:
break;
case LCK_ByCopy:
case LCK_ByRef:
VarDecl *Var =
Capture.capturesVariable() ? Capture.getCapturedVar() : nullptr;
AddDeclRef(Var);
AddSourceLocation(Capture.isPackExpansion() ? Capture.getEllipsisLoc()
: SourceLocation());
break;
}
}
}
}
void ASTWriter::ReaderInitialized(ASTReader *Reader) {
assert(Reader && "Cannot remove chain");
assert((!Chain || Chain == Reader) && "Cannot replace chain");
assert(FirstDeclID == NextDeclID &&
FirstTypeID == NextTypeID &&
FirstIdentID == NextIdentID &&
FirstMacroID == NextMacroID &&
FirstSubmoduleID == NextSubmoduleID &&
FirstSelectorID == NextSelectorID &&
"Setting chain after writing has started.");
Chain = Reader;
// Note, this will get called multiple times, once one the reader starts up
// and again each time it's done reading a PCH or module.
FirstDeclID = NUM_PREDEF_DECL_IDS + Chain->getTotalNumDecls();
FirstTypeID = NUM_PREDEF_TYPE_IDS + Chain->getTotalNumTypes();
FirstIdentID = NUM_PREDEF_IDENT_IDS + Chain->getTotalNumIdentifiers();
FirstMacroID = NUM_PREDEF_MACRO_IDS + Chain->getTotalNumMacros();
FirstSubmoduleID = NUM_PREDEF_SUBMODULE_IDS + Chain->getTotalNumSubmodules();
FirstSelectorID = NUM_PREDEF_SELECTOR_IDS + Chain->getTotalNumSelectors();
NextDeclID = FirstDeclID;
NextTypeID = FirstTypeID;
NextIdentID = FirstIdentID;
NextMacroID = FirstMacroID;
NextSelectorID = FirstSelectorID;
NextSubmoduleID = FirstSubmoduleID;
}
void ASTWriter::IdentifierRead(IdentID ID, IdentifierInfo *II) {
// Always keep the highest ID. See \p TypeRead() for more information.
IdentID &StoredID = IdentifierIDs[II];
if (ID > StoredID)
StoredID = ID;
}
void ASTWriter::MacroRead(serialization::MacroID ID, MacroInfo *MI) {
// Always keep the highest ID. See \p TypeRead() for more information.
MacroID &StoredID = MacroIDs[MI];
if (ID > StoredID)
StoredID = ID;
}
void ASTWriter::TypeRead(TypeIdx Idx, QualType T) {
// Always take the highest-numbered type index. This copes with an interesting
// case for chained AST writing where we schedule writing the type and then,
// later, deserialize the type from another AST. In this case, we want to
// keep the higher-numbered entry so that we can properly write it out to
// the AST file.
TypeIdx &StoredIdx = TypeIdxs[T];
if (Idx.getIndex() >= StoredIdx.getIndex())
StoredIdx = Idx;
}
void ASTWriter::SelectorRead(SelectorID ID, Selector S) {
// Always keep the highest ID. See \p TypeRead() for more information.
SelectorID &StoredID = SelectorIDs[S];
if (ID > StoredID)
StoredID = ID;
}
void ASTWriter::MacroDefinitionRead(serialization::PreprocessedEntityID ID,
MacroDefinitionRecord *MD) {
assert(MacroDefinitions.find(MD) == MacroDefinitions.end());
MacroDefinitions[MD] = ID;
}
void ASTWriter::ModuleRead(serialization::SubmoduleID ID, Module *Mod) {
assert(SubmoduleIDs.find(Mod) == SubmoduleIDs.end());
SubmoduleIDs[Mod] = ID;
}
void ASTWriter::CompletedTagDefinition(const TagDecl *D) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(D->isCompleteDefinition());
assert(!WritingAST && "Already writing the AST!");
if (auto *RD = dyn_cast<CXXRecordDecl>(D)) {
// We are interested when a PCH decl is modified.
if (RD->isFromASTFile()) {
// A forward reference was mutated into a definition. Rewrite it.
// FIXME: This happens during template instantiation, should we
// have created a new definition decl instead ?
assert(isTemplateInstantiation(RD->getTemplateSpecializationKind()) &&
"completed a tag from another module but not by instantiation?");
DeclUpdates[RD].push_back(
DeclUpdate(UPD_CXX_INSTANTIATED_CLASS_DEFINITION));
}
}
}
static bool isImportedDeclContext(ASTReader *Chain, const Decl *D) {
if (D->isFromASTFile())
return true;
// The predefined __va_list_tag struct is imported if we imported any decls.
// FIXME: This is a gross hack.
return D == D->getASTContext().getVaListTagDecl();
}
void ASTWriter::AddedVisibleDecl(const DeclContext *DC, const Decl *D) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(DC->isLookupContext() &&
"Should not add lookup results to non-lookup contexts!");
// TU is handled elsewhere.
if (isa<TranslationUnitDecl>(DC))
return;
// Namespaces are handled elsewhere, except for template instantiations of
// FunctionTemplateDecls in namespaces. We are interested in cases where the
// local instantiations are added to an imported context. Only happens when
// adding ADL lookup candidates, for example templated friends.
if (isa<NamespaceDecl>(DC) && D->getFriendObjectKind() == Decl::FOK_None &&
!isa<FunctionTemplateDecl>(D))
return;
// We're only interested in cases where a local declaration is added to an
// imported context.
if (D->isFromASTFile() || !isImportedDeclContext(Chain, cast<Decl>(DC)))
return;
assert(DC == DC->getPrimaryContext() && "added to non-primary context");
assert(!getDefinitiveDeclContext(DC) && "DeclContext not definitive!");
assert(!WritingAST && "Already writing the AST!");
if (UpdatedDeclContexts.insert(DC) && !cast<Decl>(DC)->isFromASTFile()) {
// We're adding a visible declaration to a predefined decl context. Ensure
// that we write out all of its lookup results so we don't get a nasty
// surprise when we try to emit its lookup table.
for (auto *Child : DC->decls())
DeclsToEmitEvenIfUnreferenced.push_back(Child);
}
DeclsToEmitEvenIfUnreferenced.push_back(D);
}
void ASTWriter::AddedCXXImplicitMember(const CXXRecordDecl *RD, const Decl *D) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(D->isImplicit());
// We're only interested in cases where a local declaration is added to an
// imported context.
if (D->isFromASTFile() || !isImportedDeclContext(Chain, RD))
return;
if (!isa<CXXMethodDecl>(D))
return;
// A decl coming from PCH was modified.
assert(RD->isCompleteDefinition());
assert(!WritingAST && "Already writing the AST!");
DeclUpdates[RD].push_back(DeclUpdate(UPD_CXX_ADDED_IMPLICIT_MEMBER, D));
}
void ASTWriter::ResolvedExceptionSpec(const FunctionDecl *FD) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!DoneWritingDeclsAndTypes && "Already done writing updates!");
if (!Chain) return;
Chain->forEachImportedKeyDecl(FD, [&](const Decl *D) {
// If we don't already know the exception specification for this redecl
// chain, add an update record for it.
if (isUnresolvedExceptionSpec(cast<FunctionDecl>(D)
->getType()
->castAs<FunctionProtoType>()
->getExceptionSpecType()))
DeclUpdates[D].push_back(UPD_CXX_RESOLVED_EXCEPTION_SPEC);
});
}
void ASTWriter::DeducedReturnType(const FunctionDecl *FD, QualType ReturnType) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
if (!Chain) return;
Chain->forEachImportedKeyDecl(FD, [&](const Decl *D) {
DeclUpdates[D].push_back(
DeclUpdate(UPD_CXX_DEDUCED_RETURN_TYPE, ReturnType));
});
}
void ASTWriter::ResolvedOperatorDelete(const CXXDestructorDecl *DD,
const FunctionDecl *Delete) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
assert(Delete && "Not given an operator delete");
if (!Chain) return;
Chain->forEachImportedKeyDecl(DD, [&](const Decl *D) {
DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_RESOLVED_DTOR_DELETE, Delete));
});
}
void ASTWriter::CompletedImplicitDefinition(const FunctionDecl *D) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
if (!D->isFromASTFile())
return; // Declaration not imported from PCH.
// Implicit function decl from a PCH was defined.
DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_ADDED_FUNCTION_DEFINITION));
}
void ASTWriter::FunctionDefinitionInstantiated(const FunctionDecl *D) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
if (!D->isFromASTFile())
return;
DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_ADDED_FUNCTION_DEFINITION));
}
void ASTWriter::StaticDataMemberInstantiated(const VarDecl *D) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
if (!D->isFromASTFile())
return;
// Since the actual instantiation is delayed, this really means that we need
// to update the instantiation location.
DeclUpdates[D].push_back(
DeclUpdate(UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER,
D->getMemberSpecializationInfo()->getPointOfInstantiation()));
}
void ASTWriter::DefaultArgumentInstantiated(const ParmVarDecl *D) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
if (!D->isFromASTFile())
return;
DeclUpdates[D].push_back(
DeclUpdate(UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT, D));
}
void ASTWriter::DefaultMemberInitializerInstantiated(const FieldDecl *D) {
assert(!WritingAST && "Already writing the AST!");
if (!D->isFromASTFile())
return;
DeclUpdates[D].push_back(
DeclUpdate(UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER, D));
}
void ASTWriter::AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
const ObjCInterfaceDecl *IFD) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
if (!IFD->isFromASTFile())
return; // Declaration not imported from PCH.
assert(IFD->getDefinition() && "Category on a class without a definition?");
ObjCClassesWithCategories.insert(
const_cast<ObjCInterfaceDecl *>(IFD->getDefinition()));
}
void ASTWriter::DeclarationMarkedUsed(const Decl *D) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
// If there is *any* declaration of the entity that's not from an AST file,
// we can skip writing the update record. We make sure that isUsed() triggers
// completion of the redeclaration chain of the entity.
for (auto Prev = D->getMostRecentDecl(); Prev; Prev = Prev->getPreviousDecl())
if (IsLocalDecl(Prev))
return;
DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_USED));
}
void ASTWriter::DeclarationMarkedOpenMPThreadPrivate(const Decl *D) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
if (!D->isFromASTFile())
return;
DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_OPENMP_THREADPRIVATE));
}
void ASTWriter::DeclarationMarkedOpenMPDeclareTarget(const Decl *D,
const Attr *Attr) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
if (!D->isFromASTFile())
return;
DeclUpdates[D].push_back(
DeclUpdate(UPD_DECL_MARKED_OPENMP_DECLARETARGET, Attr));
}
void ASTWriter::RedefinedHiddenDefinition(const NamedDecl *D, Module *M) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
assert(D->isHidden() && "expected a hidden declaration");
DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_EXPORTED, M));
}
void ASTWriter::AddedAttributeToRecord(const Attr *Attr,
const RecordDecl *Record) {
if (Chain && Chain->isProcessingUpdateRecords()) return;
assert(!WritingAST && "Already writing the AST!");
if (!Record->isFromASTFile())
return;
DeclUpdates[Record].push_back(DeclUpdate(UPD_ADDED_ATTR_TO_RECORD, Attr));
}
void ASTWriter::AddedCXXTemplateSpecialization(
const ClassTemplateDecl *TD, const ClassTemplateSpecializationDecl *D) {
assert(!WritingAST && "Already writing the AST!");
if (!TD->getFirstDecl()->isFromASTFile())
return;
if (Chain && Chain->isProcessingUpdateRecords())
return;
DeclsToEmitEvenIfUnreferenced.push_back(D);
}
void ASTWriter::AddedCXXTemplateSpecialization(
const VarTemplateDecl *TD, const VarTemplateSpecializationDecl *D) {
assert(!WritingAST && "Already writing the AST!");
if (!TD->getFirstDecl()->isFromASTFile())
return;
if (Chain && Chain->isProcessingUpdateRecords())
return;
DeclsToEmitEvenIfUnreferenced.push_back(D);
}
void ASTWriter::AddedCXXTemplateSpecialization(const FunctionTemplateDecl *TD,
const FunctionDecl *D) {
assert(!WritingAST && "Already writing the AST!");
if (!TD->getFirstDecl()->isFromASTFile())
return;
if (Chain && Chain->isProcessingUpdateRecords())
return;
DeclsToEmitEvenIfUnreferenced.push_back(D);
}
Index: head/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp (revision 322854)
+++ head/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp (revision 322855)
@@ -1,2482 +1,2495 @@
//== RegionStore.cpp - Field-sensitive store model --------------*- C++ -*--==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines a basic region store model. In this model, we do have field
// sensitivity. But we assume nothing about the heap shape. So recursive data
// structures are largely ignored. Basically we do 1-limiting analysis.
// Parameter pointers are assumed with no aliasing. Pointee objects of
// parameters are created lazily.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/Attr.h"
#include "clang/AST/CharUnits.h"
#include "clang/Analysis/Analyses/LiveVariables.h"
#include "clang/Analysis/AnalysisContext.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h"
#include "llvm/ADT/ImmutableMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Support/raw_ostream.h"
#include <utility>
using namespace clang;
using namespace ento;
//===----------------------------------------------------------------------===//
// Representation of binding keys.
//===----------------------------------------------------------------------===//
namespace {
class BindingKey {
public:
enum Kind { Default = 0x0, Direct = 0x1 };
private:
enum { Symbolic = 0x2 };
llvm::PointerIntPair<const MemRegion *, 2> P;
uint64_t Data;
/// Create a key for a binding to region \p r, which has a symbolic offset
/// from region \p Base.
explicit BindingKey(const SubRegion *r, const SubRegion *Base, Kind k)
: P(r, k | Symbolic), Data(reinterpret_cast<uintptr_t>(Base)) {
assert(r && Base && "Must have known regions.");
assert(getConcreteOffsetRegion() == Base && "Failed to store base region");
}
/// Create a key for a binding at \p offset from base region \p r.
explicit BindingKey(const MemRegion *r, uint64_t offset, Kind k)
: P(r, k), Data(offset) {
assert(r && "Must have known regions.");
assert(getOffset() == offset && "Failed to store offset");
assert((r == r->getBaseRegion() || isa<ObjCIvarRegion>(r)) && "Not a base");
}
public:
bool isDirect() const { return P.getInt() & Direct; }
bool hasSymbolicOffset() const { return P.getInt() & Symbolic; }
const MemRegion *getRegion() const { return P.getPointer(); }
uint64_t getOffset() const {
assert(!hasSymbolicOffset());
return Data;
}
const SubRegion *getConcreteOffsetRegion() const {
assert(hasSymbolicOffset());
return reinterpret_cast<const SubRegion *>(static_cast<uintptr_t>(Data));
}
const MemRegion *getBaseRegion() const {
if (hasSymbolicOffset())
return getConcreteOffsetRegion()->getBaseRegion();
return getRegion()->getBaseRegion();
}
void Profile(llvm::FoldingSetNodeID& ID) const {
ID.AddPointer(P.getOpaqueValue());
ID.AddInteger(Data);
}
static BindingKey Make(const MemRegion *R, Kind k);
bool operator<(const BindingKey &X) const {
if (P.getOpaqueValue() < X.P.getOpaqueValue())
return true;
if (P.getOpaqueValue() > X.P.getOpaqueValue())
return false;
return Data < X.Data;
}
bool operator==(const BindingKey &X) const {
return P.getOpaqueValue() == X.P.getOpaqueValue() &&
Data == X.Data;
}
void dump() const;
};
} // end anonymous namespace
BindingKey BindingKey::Make(const MemRegion *R, Kind k) {
const RegionOffset &RO = R->getAsOffset();
if (RO.hasSymbolicOffset())
return BindingKey(cast<SubRegion>(R), cast<SubRegion>(RO.getRegion()), k);
return BindingKey(RO.getRegion(), RO.getOffset(), k);
}
namespace llvm {
static inline
raw_ostream &operator<<(raw_ostream &os, BindingKey K) {
os << '(' << K.getRegion();
if (!K.hasSymbolicOffset())
os << ',' << K.getOffset();
os << ',' << (K.isDirect() ? "direct" : "default")
<< ')';
return os;
}
template <typename T> struct isPodLike;
template <> struct isPodLike<BindingKey> {
static const bool value = true;
};
} // end llvm namespace
LLVM_DUMP_METHOD void BindingKey::dump() const { llvm::errs() << *this; }
//===----------------------------------------------------------------------===//
// Actual Store type.
//===----------------------------------------------------------------------===//
typedef llvm::ImmutableMap<BindingKey, SVal> ClusterBindings;
typedef llvm::ImmutableMapRef<BindingKey, SVal> ClusterBindingsRef;
typedef std::pair<BindingKey, SVal> BindingPair;
typedef llvm::ImmutableMap<const MemRegion *, ClusterBindings>
RegionBindings;
namespace {
class RegionBindingsRef : public llvm::ImmutableMapRef<const MemRegion *,
ClusterBindings> {
ClusterBindings::Factory *CBFactory;
public:
typedef llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>
ParentTy;
RegionBindingsRef(ClusterBindings::Factory &CBFactory,
const RegionBindings::TreeTy *T,
RegionBindings::TreeTy::Factory *F)
: llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(T, F),
CBFactory(&CBFactory) {}
RegionBindingsRef(const ParentTy &P, ClusterBindings::Factory &CBFactory)
: llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(P),
CBFactory(&CBFactory) {}
RegionBindingsRef add(key_type_ref K, data_type_ref D) const {
return RegionBindingsRef(static_cast<const ParentTy *>(this)->add(K, D),
*CBFactory);
}
RegionBindingsRef remove(key_type_ref K) const {
return RegionBindingsRef(static_cast<const ParentTy *>(this)->remove(K),
*CBFactory);
}
RegionBindingsRef addBinding(BindingKey K, SVal V) const;
RegionBindingsRef addBinding(const MemRegion *R,
BindingKey::Kind k, SVal V) const;
const SVal *lookup(BindingKey K) const;
const SVal *lookup(const MemRegion *R, BindingKey::Kind k) const;
using llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>::lookup;
RegionBindingsRef removeBinding(BindingKey K);
RegionBindingsRef removeBinding(const MemRegion *R,
BindingKey::Kind k);
RegionBindingsRef removeBinding(const MemRegion *R) {
return removeBinding(R, BindingKey::Direct).
removeBinding(R, BindingKey::Default);
}
Optional<SVal> getDirectBinding(const MemRegion *R) const;
/// getDefaultBinding - Returns an SVal* representing an optional default
/// binding associated with a region and its subregions.
Optional<SVal> getDefaultBinding(const MemRegion *R) const;
/// Return the internal tree as a Store.
Store asStore() const {
return asImmutableMap().getRootWithoutRetain();
}
void dump(raw_ostream &OS, const char *nl) const {
for (iterator I = begin(), E = end(); I != E; ++I) {
const ClusterBindings &Cluster = I.getData();
for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end();
CI != CE; ++CI) {
OS << ' ' << CI.getKey() << " : " << CI.getData() << nl;
}
OS << nl;
}
}
LLVM_DUMP_METHOD void dump() const { dump(llvm::errs(), "\n"); }
};
} // end anonymous namespace
typedef const RegionBindingsRef& RegionBindingsConstRef;
Optional<SVal> RegionBindingsRef::getDirectBinding(const MemRegion *R) const {
return Optional<SVal>::create(lookup(R, BindingKey::Direct));
}
Optional<SVal> RegionBindingsRef::getDefaultBinding(const MemRegion *R) const {
if (R->isBoundable())
if (const TypedValueRegion *TR = dyn_cast<TypedValueRegion>(R))
if (TR->getValueType()->isUnionType())
return UnknownVal();
return Optional<SVal>::create(lookup(R, BindingKey::Default));
}
RegionBindingsRef RegionBindingsRef::addBinding(BindingKey K, SVal V) const {
const MemRegion *Base = K.getBaseRegion();
const ClusterBindings *ExistingCluster = lookup(Base);
ClusterBindings Cluster =
(ExistingCluster ? *ExistingCluster : CBFactory->getEmptyMap());
ClusterBindings NewCluster = CBFactory->add(Cluster, K, V);
return add(Base, NewCluster);
}
RegionBindingsRef RegionBindingsRef::addBinding(const MemRegion *R,
BindingKey::Kind k,
SVal V) const {
return addBinding(BindingKey::Make(R, k), V);
}
const SVal *RegionBindingsRef::lookup(BindingKey K) const {
const ClusterBindings *Cluster = lookup(K.getBaseRegion());
if (!Cluster)
return nullptr;
return Cluster->lookup(K);
}
const SVal *RegionBindingsRef::lookup(const MemRegion *R,
BindingKey::Kind k) const {
return lookup(BindingKey::Make(R, k));
}
RegionBindingsRef RegionBindingsRef::removeBinding(BindingKey K) {
const MemRegion *Base = K.getBaseRegion();
const ClusterBindings *Cluster = lookup(Base);
if (!Cluster)
return *this;
ClusterBindings NewCluster = CBFactory->remove(*Cluster, K);
if (NewCluster.isEmpty())
return remove(Base);
return add(Base, NewCluster);
}
RegionBindingsRef RegionBindingsRef::removeBinding(const MemRegion *R,
BindingKey::Kind k){
return removeBinding(BindingKey::Make(R, k));
}
//===----------------------------------------------------------------------===//
// Fine-grained control of RegionStoreManager.
//===----------------------------------------------------------------------===//
namespace {
struct minimal_features_tag {};
struct maximal_features_tag {};
class RegionStoreFeatures {
bool SupportsFields;
public:
RegionStoreFeatures(minimal_features_tag) :
SupportsFields(false) {}
RegionStoreFeatures(maximal_features_tag) :
SupportsFields(true) {}
void enableFields(bool t) { SupportsFields = t; }
bool supportsFields() const { return SupportsFields; }
};
}
//===----------------------------------------------------------------------===//
// Main RegionStore logic.
//===----------------------------------------------------------------------===//
namespace {
class invalidateRegionsWorker;
class RegionStoreManager : public StoreManager {
public:
const RegionStoreFeatures Features;
RegionBindings::Factory RBFactory;
mutable ClusterBindings::Factory CBFactory;
typedef std::vector<SVal> SValListTy;
private:
typedef llvm::DenseMap<const LazyCompoundValData *,
SValListTy> LazyBindingsMapTy;
LazyBindingsMapTy LazyBindingsMap;
/// The largest number of fields a struct can have and still be
/// considered "small".
///
/// This is currently used to decide whether or not it is worth "forcing" a
/// LazyCompoundVal on bind.
///
/// This is controlled by 'region-store-small-struct-limit' option.
/// To disable all small-struct-dependent behavior, set the option to "0".
unsigned SmallStructLimit;
/// \brief A helper used to populate the work list with the given set of
/// regions.
void populateWorkList(invalidateRegionsWorker &W,
ArrayRef<SVal> Values,
InvalidatedRegions *TopLevelRegions);
public:
RegionStoreManager(ProgramStateManager& mgr, const RegionStoreFeatures &f)
: StoreManager(mgr), Features(f),
RBFactory(mgr.getAllocator()), CBFactory(mgr.getAllocator()),
SmallStructLimit(0) {
if (SubEngine *Eng = StateMgr.getOwningEngine()) {
AnalyzerOptions &Options = Eng->getAnalysisManager().options;
SmallStructLimit =
Options.getOptionAsInteger("region-store-small-struct-limit", 2);
}
}
/// setImplicitDefaultValue - Set the default binding for the provided
/// MemRegion to the value implicitly defined for compound literals when
/// the value is not specified.
RegionBindingsRef setImplicitDefaultValue(RegionBindingsConstRef B,
const MemRegion *R, QualType T);
/// ArrayToPointer - Emulates the "decay" of an array to a pointer
/// type. 'Array' represents the lvalue of the array being decayed
/// to a pointer, and the returned SVal represents the decayed
/// version of that lvalue (i.e., a pointer to the first element of
/// the array). This is called by ExprEngine when evaluating
/// casts from arrays to pointers.
SVal ArrayToPointer(Loc Array, QualType ElementTy) override;
StoreRef getInitialStore(const LocationContext *InitLoc) override {
return StoreRef(RBFactory.getEmptyMap().getRootWithoutRetain(), *this);
}
//===-------------------------------------------------------------------===//
// Binding values to regions.
//===-------------------------------------------------------------------===//
RegionBindingsRef invalidateGlobalRegion(MemRegion::Kind K,
const Expr *Ex,
unsigned Count,
const LocationContext *LCtx,
RegionBindingsRef B,
InvalidatedRegions *Invalidated);
StoreRef invalidateRegions(Store store,
ArrayRef<SVal> Values,
const Expr *E, unsigned Count,
const LocationContext *LCtx,
const CallEvent *Call,
InvalidatedSymbols &IS,
RegionAndSymbolInvalidationTraits &ITraits,
InvalidatedRegions *Invalidated,
InvalidatedRegions *InvalidatedTopLevel) override;
bool scanReachableSymbols(Store S, const MemRegion *R,
ScanReachableSymbols &Callbacks) override;
RegionBindingsRef removeSubRegionBindings(RegionBindingsConstRef B,
const SubRegion *R);
public: // Part of public interface to class.
StoreRef Bind(Store store, Loc LV, SVal V) override {
return StoreRef(bind(getRegionBindings(store), LV, V).asStore(), *this);
}
RegionBindingsRef bind(RegionBindingsConstRef B, Loc LV, SVal V);
// BindDefault is only used to initialize a region with a default value.
StoreRef BindDefault(Store store, const MemRegion *R, SVal V) override {
+ // FIXME: The offsets of empty bases can be tricky because of
+ // of the so called "empty base class optimization".
+ // If a base class has been optimized out
+ // we should not try to create a binding, otherwise we should.
+ // Unfortunately, at the moment ASTRecordLayout doesn't expose
+ // the actual sizes of the empty bases
+ // and trying to infer them from offsets/alignments
+ // seems to be error-prone and non-trivial because of the trailing padding.
+ // As a temporary mitigation we don't create bindings for empty bases.
+ if (R->getKind() == MemRegion::CXXBaseObjectRegionKind &&
+ cast<CXXBaseObjectRegion>(R)->getDecl()->isEmpty())
+ return StoreRef(store, *this);
+
RegionBindingsRef B = getRegionBindings(store);
assert(!B.lookup(R, BindingKey::Direct));
BindingKey Key = BindingKey::Make(R, BindingKey::Default);
if (B.lookup(Key)) {
const SubRegion *SR = cast<SubRegion>(R);
assert(SR->getAsOffset().getOffset() ==
SR->getSuperRegion()->getAsOffset().getOffset() &&
"A default value must come from a super-region");
B = removeSubRegionBindings(B, SR);
} else {
B = B.addBinding(Key, V);
}
return StoreRef(B.asImmutableMap().getRootWithoutRetain(), *this);
}
/// Attempt to extract the fields of \p LCV and bind them to the struct region
/// \p R.
///
/// This path is used when it seems advantageous to "force" loading the values
/// within a LazyCompoundVal to bind memberwise to the struct region, rather
/// than using a Default binding at the base of the entire region. This is a
/// heuristic attempting to avoid building long chains of LazyCompoundVals.
///
/// \returns The updated store bindings, or \c None if binding non-lazily
/// would be too expensive.
Optional<RegionBindingsRef> tryBindSmallStruct(RegionBindingsConstRef B,
const TypedValueRegion *R,
const RecordDecl *RD,
nonloc::LazyCompoundVal LCV);
/// BindStruct - Bind a compound value to a structure.
RegionBindingsRef bindStruct(RegionBindingsConstRef B,
const TypedValueRegion* R, SVal V);
/// BindVector - Bind a compound value to a vector.
RegionBindingsRef bindVector(RegionBindingsConstRef B,
const TypedValueRegion* R, SVal V);
RegionBindingsRef bindArray(RegionBindingsConstRef B,
const TypedValueRegion* R,
SVal V);
/// Clears out all bindings in the given region and assigns a new value
/// as a Default binding.
RegionBindingsRef bindAggregate(RegionBindingsConstRef B,
const TypedRegion *R,
SVal DefaultVal);
/// \brief Create a new store with the specified binding removed.
/// \param ST the original store, that is the basis for the new store.
/// \param L the location whose binding should be removed.
StoreRef killBinding(Store ST, Loc L) override;
void incrementReferenceCount(Store store) override {
getRegionBindings(store).manualRetain();
}
/// If the StoreManager supports it, decrement the reference count of
/// the specified Store object. If the reference count hits 0, the memory
/// associated with the object is recycled.
void decrementReferenceCount(Store store) override {
getRegionBindings(store).manualRelease();
}
bool includedInBindings(Store store, const MemRegion *region) const override;
/// \brief Return the value bound to specified location in a given state.
///
/// The high level logic for this method is this:
/// getBinding (L)
/// if L has binding
/// return L's binding
/// else if L is in killset
/// return unknown
/// else
/// if L is on stack or heap
/// return undefined
/// else
/// return symbolic
SVal getBinding(Store S, Loc L, QualType T) override {
return getBinding(getRegionBindings(S), L, T);
}
Optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
RegionBindingsRef B = getRegionBindings(S);
// Default bindings are always applied over a base region so look up the
// base region's default binding, otherwise the lookup will fail when R
// is at an offset from R->getBaseRegion().
return B.getDefaultBinding(R->getBaseRegion());
}
SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType());
SVal getBindingForElement(RegionBindingsConstRef B, const ElementRegion *R);
SVal getBindingForField(RegionBindingsConstRef B, const FieldRegion *R);
SVal getBindingForObjCIvar(RegionBindingsConstRef B, const ObjCIvarRegion *R);
SVal getBindingForVar(RegionBindingsConstRef B, const VarRegion *R);
SVal getBindingForLazySymbol(const TypedValueRegion *R);
SVal getBindingForFieldOrElementCommon(RegionBindingsConstRef B,
const TypedValueRegion *R,
QualType Ty);
SVal getLazyBinding(const SubRegion *LazyBindingRegion,
RegionBindingsRef LazyBinding);
/// Get bindings for the values in a struct and return a CompoundVal, used
/// when doing struct copy:
/// struct s x, y;
/// x = y;
/// y's value is retrieved by this method.
SVal getBindingForStruct(RegionBindingsConstRef B, const TypedValueRegion *R);
SVal getBindingForArray(RegionBindingsConstRef B, const TypedValueRegion *R);
NonLoc createLazyBinding(RegionBindingsConstRef B, const TypedValueRegion *R);
/// Used to lazily generate derived symbols for bindings that are defined
/// implicitly by default bindings in a super region.
///
/// Note that callers may need to specially handle LazyCompoundVals, which
/// are returned as is in case the caller needs to treat them differently.
Optional<SVal> getBindingForDerivedDefaultValue(RegionBindingsConstRef B,
const MemRegion *superR,
const TypedValueRegion *R,
QualType Ty);
/// Get the state and region whose binding this region \p R corresponds to.
///
/// If there is no lazy binding for \p R, the returned value will have a null
/// \c second. Note that a null pointer can represents a valid Store.
std::pair<Store, const SubRegion *>
findLazyBinding(RegionBindingsConstRef B, const SubRegion *R,
const SubRegion *originalRegion);
/// Returns the cached set of interesting SVals contained within a lazy
/// binding.
///
/// The precise value of "interesting" is determined for the purposes of
/// RegionStore's internal analysis. It must always contain all regions and
/// symbols, but may omit constants and other kinds of SVal.
const SValListTy &getInterestingValues(nonloc::LazyCompoundVal LCV);
//===------------------------------------------------------------------===//
// State pruning.
//===------------------------------------------------------------------===//
/// removeDeadBindings - Scans the RegionStore of 'state' for dead values.
/// It returns a new Store with these values removed.
StoreRef removeDeadBindings(Store store, const StackFrameContext *LCtx,
SymbolReaper& SymReaper) override;
//===------------------------------------------------------------------===//
// Region "extents".
//===------------------------------------------------------------------===//
// FIXME: This method will soon be eliminated; see the note in Store.h.
DefinedOrUnknownSVal getSizeInElements(ProgramStateRef state,
const MemRegion* R,
QualType EleTy) override;
//===------------------------------------------------------------------===//
// Utility methods.
//===------------------------------------------------------------------===//
RegionBindingsRef getRegionBindings(Store store) const {
return RegionBindingsRef(CBFactory,
static_cast<const RegionBindings::TreeTy*>(store),
RBFactory.getTreeFactory());
}
void print(Store store, raw_ostream &Out, const char* nl,
const char *sep) override;
void iterBindings(Store store, BindingsHandler& f) override {
RegionBindingsRef B = getRegionBindings(store);
for (RegionBindingsRef::iterator I = B.begin(), E = B.end(); I != E; ++I) {
const ClusterBindings &Cluster = I.getData();
for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end();
CI != CE; ++CI) {
const BindingKey &K = CI.getKey();
if (!K.isDirect())
continue;
if (const SubRegion *R = dyn_cast<SubRegion>(K.getRegion())) {
// FIXME: Possibly incorporate the offset?
if (!f.HandleBinding(*this, store, R, CI.getData()))
return;
}
}
}
}
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// RegionStore creation.
//===----------------------------------------------------------------------===//
std::unique_ptr<StoreManager>
ento::CreateRegionStoreManager(ProgramStateManager &StMgr) {
RegionStoreFeatures F = maximal_features_tag();
return llvm::make_unique<RegionStoreManager>(StMgr, F);
}
std::unique_ptr<StoreManager>
ento::CreateFieldsOnlyRegionStoreManager(ProgramStateManager &StMgr) {
RegionStoreFeatures F = minimal_features_tag();
F.enableFields(true);
return llvm::make_unique<RegionStoreManager>(StMgr, F);
}
//===----------------------------------------------------------------------===//
// Region Cluster analysis.
//===----------------------------------------------------------------------===//
namespace {
/// Used to determine which global regions are automatically included in the
/// initial worklist of a ClusterAnalysis.
enum GlobalsFilterKind {
/// Don't include any global regions.
GFK_None,
/// Only include system globals.
GFK_SystemOnly,
/// Include all global regions.
GFK_All
};
template <typename DERIVED>
class ClusterAnalysis {
protected:
typedef llvm::DenseMap<const MemRegion *, const ClusterBindings *> ClusterMap;
typedef const MemRegion * WorkListElement;
typedef SmallVector<WorkListElement, 10> WorkList;
llvm::SmallPtrSet<const ClusterBindings *, 16> Visited;
WorkList WL;
RegionStoreManager &RM;
ASTContext &Ctx;
SValBuilder &svalBuilder;
RegionBindingsRef B;
protected:
const ClusterBindings *getCluster(const MemRegion *R) {
return B.lookup(R);
}
/// Returns true if all clusters in the given memspace should be initially
/// included in the cluster analysis. Subclasses may provide their
/// own implementation.
bool includeEntireMemorySpace(const MemRegion *Base) {
return false;
}
public:
ClusterAnalysis(RegionStoreManager &rm, ProgramStateManager &StateMgr,
RegionBindingsRef b)
: RM(rm), Ctx(StateMgr.getContext()),
svalBuilder(StateMgr.getSValBuilder()), B(std::move(b)) {}
RegionBindingsRef getRegionBindings() const { return B; }
bool isVisited(const MemRegion *R) {
return Visited.count(getCluster(R));
}
void GenerateClusters() {
// Scan the entire set of bindings and record the region clusters.
for (RegionBindingsRef::iterator RI = B.begin(), RE = B.end();
RI != RE; ++RI){
const MemRegion *Base = RI.getKey();
const ClusterBindings &Cluster = RI.getData();
assert(!Cluster.isEmpty() && "Empty clusters should be removed");
static_cast<DERIVED*>(this)->VisitAddedToCluster(Base, Cluster);
// If the base's memspace should be entirely invalidated, add the cluster
// to the workspace up front.
if (static_cast<DERIVED*>(this)->includeEntireMemorySpace(Base))
AddToWorkList(WorkListElement(Base), &Cluster);
}
}
bool AddToWorkList(WorkListElement E, const ClusterBindings *C) {
if (C && !Visited.insert(C).second)
return false;
WL.push_back(E);
return true;
}
bool AddToWorkList(const MemRegion *R) {
return static_cast<DERIVED*>(this)->AddToWorkList(R);
}
void RunWorkList() {
while (!WL.empty()) {
WorkListElement E = WL.pop_back_val();
const MemRegion *BaseR = E;
static_cast<DERIVED*>(this)->VisitCluster(BaseR, getCluster(BaseR));
}
}
void VisitAddedToCluster(const MemRegion *baseR, const ClusterBindings &C) {}
void VisitCluster(const MemRegion *baseR, const ClusterBindings *C) {}
void VisitCluster(const MemRegion *BaseR, const ClusterBindings *C,
bool Flag) {
static_cast<DERIVED*>(this)->VisitCluster(BaseR, C);
}
};
}
//===----------------------------------------------------------------------===//
// Binding invalidation.
//===----------------------------------------------------------------------===//
bool RegionStoreManager::scanReachableSymbols(Store S, const MemRegion *R,
ScanReachableSymbols &Callbacks) {
assert(R == R->getBaseRegion() && "Should only be called for base regions");
RegionBindingsRef B = getRegionBindings(S);
const ClusterBindings *Cluster = B.lookup(R);
if (!Cluster)
return true;
for (ClusterBindings::iterator RI = Cluster->begin(), RE = Cluster->end();
RI != RE; ++RI) {
if (!Callbacks.scan(RI.getData()))
return false;
}
return true;
}
static inline bool isUnionField(const FieldRegion *FR) {
return FR->getDecl()->getParent()->isUnion();
}
typedef SmallVector<const FieldDecl *, 8> FieldVector;
static void getSymbolicOffsetFields(BindingKey K, FieldVector &Fields) {
assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys");
const MemRegion *Base = K.getConcreteOffsetRegion();
const MemRegion *R = K.getRegion();
while (R != Base) {
if (const FieldRegion *FR = dyn_cast<FieldRegion>(R))
if (!isUnionField(FR))
Fields.push_back(FR->getDecl());
R = cast<SubRegion>(R)->getSuperRegion();
}
}
static bool isCompatibleWithFields(BindingKey K, const FieldVector &Fields) {
assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys");
if (Fields.empty())
return true;
FieldVector FieldsInBindingKey;
getSymbolicOffsetFields(K, FieldsInBindingKey);
ptrdiff_t Delta = FieldsInBindingKey.size() - Fields.size();
if (Delta >= 0)
return std::equal(FieldsInBindingKey.begin() + Delta,
FieldsInBindingKey.end(),
Fields.begin());
else
return std::equal(FieldsInBindingKey.begin(), FieldsInBindingKey.end(),
Fields.begin() - Delta);
}
/// Collects all bindings in \p Cluster that may refer to bindings within
/// \p Top.
///
/// Each binding is a pair whose \c first is the key (a BindingKey) and whose
/// \c second is the value (an SVal).
///
/// The \p IncludeAllDefaultBindings parameter specifies whether to include
/// default bindings that may extend beyond \p Top itself, e.g. if \p Top is
/// an aggregate within a larger aggregate with a default binding.
static void
collectSubRegionBindings(SmallVectorImpl<BindingPair> &Bindings,
SValBuilder &SVB, const ClusterBindings &Cluster,
const SubRegion *Top, BindingKey TopKey,
bool IncludeAllDefaultBindings) {
FieldVector FieldsInSymbolicSubregions;
if (TopKey.hasSymbolicOffset()) {
getSymbolicOffsetFields(TopKey, FieldsInSymbolicSubregions);
Top = cast<SubRegion>(TopKey.getConcreteOffsetRegion());
TopKey = BindingKey::Make(Top, BindingKey::Default);
}
// Find the length (in bits) of the region being invalidated.
uint64_t Length = UINT64_MAX;
SVal Extent = Top->getExtent(SVB);
if (Optional<nonloc::ConcreteInt> ExtentCI =
Extent.getAs<nonloc::ConcreteInt>()) {
const llvm::APSInt &ExtentInt = ExtentCI->getValue();
assert(ExtentInt.isNonNegative() || ExtentInt.isUnsigned());
// Extents are in bytes but region offsets are in bits. Be careful!
Length = ExtentInt.getLimitedValue() * SVB.getContext().getCharWidth();
} else if (const FieldRegion *FR = dyn_cast<FieldRegion>(Top)) {
if (FR->getDecl()->isBitField())
Length = FR->getDecl()->getBitWidthValue(SVB.getContext());
}
for (ClusterBindings::iterator I = Cluster.begin(), E = Cluster.end();
I != E; ++I) {
BindingKey NextKey = I.getKey();
if (NextKey.getRegion() == TopKey.getRegion()) {
// FIXME: This doesn't catch the case where we're really invalidating a
// region with a symbolic offset. Example:
// R: points[i].y
// Next: points[0].x
if (NextKey.getOffset() > TopKey.getOffset() &&
NextKey.getOffset() - TopKey.getOffset() < Length) {
// Case 1: The next binding is inside the region we're invalidating.
// Include it.
Bindings.push_back(*I);
} else if (NextKey.getOffset() == TopKey.getOffset()) {
// Case 2: The next binding is at the same offset as the region we're
// invalidating. In this case, we need to leave default bindings alone,
// since they may be providing a default value for a regions beyond what
// we're invalidating.
// FIXME: This is probably incorrect; consider invalidating an outer
// struct whose first field is bound to a LazyCompoundVal.
if (IncludeAllDefaultBindings || NextKey.isDirect())
Bindings.push_back(*I);
}
} else if (NextKey.hasSymbolicOffset()) {
const MemRegion *Base = NextKey.getConcreteOffsetRegion();
if (Top->isSubRegionOf(Base)) {
// Case 3: The next key is symbolic and we just changed something within
// its concrete region. We don't know if the binding is still valid, so
// we'll be conservative and include it.
if (IncludeAllDefaultBindings || NextKey.isDirect())
if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions))
Bindings.push_back(*I);
} else if (const SubRegion *BaseSR = dyn_cast<SubRegion>(Base)) {
// Case 4: The next key is symbolic, but we changed a known
// super-region. In this case the binding is certainly included.
if (Top == Base || BaseSR->isSubRegionOf(Top))
if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions))
Bindings.push_back(*I);
}
}
}
}
static void
collectSubRegionBindings(SmallVectorImpl<BindingPair> &Bindings,
SValBuilder &SVB, const ClusterBindings &Cluster,
const SubRegion *Top, bool IncludeAllDefaultBindings) {
collectSubRegionBindings(Bindings, SVB, Cluster, Top,
BindingKey::Make(Top, BindingKey::Default),
IncludeAllDefaultBindings);
}
RegionBindingsRef
RegionStoreManager::removeSubRegionBindings(RegionBindingsConstRef B,
const SubRegion *Top) {
BindingKey TopKey = BindingKey::Make(Top, BindingKey::Default);
const MemRegion *ClusterHead = TopKey.getBaseRegion();
if (Top == ClusterHead) {
// We can remove an entire cluster's bindings all in one go.
return B.remove(Top);
}
const ClusterBindings *Cluster = B.lookup(ClusterHead);
if (!Cluster) {
// If we're invalidating a region with a symbolic offset, we need to make
// sure we don't treat the base region as uninitialized anymore.
if (TopKey.hasSymbolicOffset()) {
const SubRegion *Concrete = TopKey.getConcreteOffsetRegion();
return B.addBinding(Concrete, BindingKey::Default, UnknownVal());
}
return B;
}
SmallVector<BindingPair, 32> Bindings;
collectSubRegionBindings(Bindings, svalBuilder, *Cluster, Top, TopKey,
/*IncludeAllDefaultBindings=*/false);
ClusterBindingsRef Result(*Cluster, CBFactory);
for (SmallVectorImpl<BindingPair>::const_iterator I = Bindings.begin(),
E = Bindings.end();
I != E; ++I)
Result = Result.remove(I->first);
// If we're invalidating a region with a symbolic offset, we need to make sure
// we don't treat the base region as uninitialized anymore.
// FIXME: This isn't very precise; see the example in
// collectSubRegionBindings.
if (TopKey.hasSymbolicOffset()) {
const SubRegion *Concrete = TopKey.getConcreteOffsetRegion();
Result = Result.add(BindingKey::Make(Concrete, BindingKey::Default),
UnknownVal());
}
if (Result.isEmpty())
return B.remove(ClusterHead);
return B.add(ClusterHead, Result.asImmutableMap());
}
namespace {
class invalidateRegionsWorker : public ClusterAnalysis<invalidateRegionsWorker>
{
const Expr *Ex;
unsigned Count;
const LocationContext *LCtx;
InvalidatedSymbols &IS;
RegionAndSymbolInvalidationTraits &ITraits;
StoreManager::InvalidatedRegions *Regions;
GlobalsFilterKind GlobalsFilter;
public:
invalidateRegionsWorker(RegionStoreManager &rm,
ProgramStateManager &stateMgr,
RegionBindingsRef b,
const Expr *ex, unsigned count,
const LocationContext *lctx,
InvalidatedSymbols &is,
RegionAndSymbolInvalidationTraits &ITraitsIn,
StoreManager::InvalidatedRegions *r,
GlobalsFilterKind GFK)
: ClusterAnalysis<invalidateRegionsWorker>(rm, stateMgr, b),
Ex(ex), Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r),
GlobalsFilter(GFK) {}
void VisitCluster(const MemRegion *baseR, const ClusterBindings *C);
void VisitBinding(SVal V);
using ClusterAnalysis::AddToWorkList;
bool AddToWorkList(const MemRegion *R);
/// Returns true if all clusters in the memory space for \p Base should be
/// be invalidated.
bool includeEntireMemorySpace(const MemRegion *Base);
/// Returns true if the memory space of the given region is one of the global
/// regions specially included at the start of invalidation.
bool isInitiallyIncludedGlobalRegion(const MemRegion *R);
};
}
bool invalidateRegionsWorker::AddToWorkList(const MemRegion *R) {
bool doNotInvalidateSuperRegion = ITraits.hasTrait(
R, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
const MemRegion *BaseR = doNotInvalidateSuperRegion ? R : R->getBaseRegion();
return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR));
}
void invalidateRegionsWorker::VisitBinding(SVal V) {
// A symbol? Mark it touched by the invalidation.
if (SymbolRef Sym = V.getAsSymbol())
IS.insert(Sym);
if (const MemRegion *R = V.getAsRegion()) {
AddToWorkList(R);
return;
}
// Is it a LazyCompoundVal? All references get invalidated as well.
if (Optional<nonloc::LazyCompoundVal> LCS =
V.getAs<nonloc::LazyCompoundVal>()) {
const RegionStoreManager::SValListTy &Vals = RM.getInterestingValues(*LCS);
for (RegionStoreManager::SValListTy::const_iterator I = Vals.begin(),
E = Vals.end();
I != E; ++I)
VisitBinding(*I);
return;
}
}
void invalidateRegionsWorker::VisitCluster(const MemRegion *baseR,
const ClusterBindings *C) {
bool PreserveRegionsContents =
ITraits.hasTrait(baseR,
RegionAndSymbolInvalidationTraits::TK_PreserveContents);
if (C) {
for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; ++I)
VisitBinding(I.getData());
// Invalidate regions contents.
if (!PreserveRegionsContents)
B = B.remove(baseR);
}
// BlockDataRegion? If so, invalidate captured variables that are passed
// by reference.
if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(baseR)) {
for (BlockDataRegion::referenced_vars_iterator
BI = BR->referenced_vars_begin(), BE = BR->referenced_vars_end() ;
BI != BE; ++BI) {
const VarRegion *VR = BI.getCapturedRegion();
const VarDecl *VD = VR->getDecl();
if (VD->hasAttr<BlocksAttr>() || !VD->hasLocalStorage()) {
AddToWorkList(VR);
}
else if (Loc::isLocType(VR->getValueType())) {
// Map the current bindings to a Store to retrieve the value
// of the binding. If that binding itself is a region, we should
// invalidate that region. This is because a block may capture
// a pointer value, but the thing pointed by that pointer may
// get invalidated.
SVal V = RM.getBinding(B, loc::MemRegionVal(VR));
if (Optional<Loc> L = V.getAs<Loc>()) {
if (const MemRegion *LR = L->getAsRegion())
AddToWorkList(LR);
}
}
}
return;
}
// Symbolic region?
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(baseR))
IS.insert(SR->getSymbol());
// Nothing else should be done in the case when we preserve regions context.
if (PreserveRegionsContents)
return;
// Otherwise, we have a normal data region. Record that we touched the region.
if (Regions)
Regions->push_back(baseR);
if (isa<AllocaRegion>(baseR) || isa<SymbolicRegion>(baseR)) {
// Invalidate the region by setting its default value to
// conjured symbol. The type of the symbol is irrelevant.
DefinedOrUnknownSVal V =
svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, Ctx.IntTy, Count);
B = B.addBinding(baseR, BindingKey::Default, V);
return;
}
if (!baseR->isBoundable())
return;
const TypedValueRegion *TR = cast<TypedValueRegion>(baseR);
QualType T = TR->getValueType();
if (isInitiallyIncludedGlobalRegion(baseR)) {
// If the region is a global and we are invalidating all globals,
// erasing the entry is good enough. This causes all globals to be lazily
// symbolicated from the same base symbol.
return;
}
if (T->isStructureOrClassType()) {
// Invalidate the region by setting its default value to
// conjured symbol. The type of the symbol is irrelevant.
DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx,
Ctx.IntTy, Count);
B = B.addBinding(baseR, BindingKey::Default, V);
return;
}
if (const ArrayType *AT = Ctx.getAsArrayType(T)) {
bool doNotInvalidateSuperRegion = ITraits.hasTrait(
baseR,
RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
if (doNotInvalidateSuperRegion) {
// We are not doing blank invalidation of the whole array region so we
// have to manually invalidate each elements.
Optional<uint64_t> NumElements;
// Compute lower and upper offsets for region within array.
if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
NumElements = CAT->getSize().getZExtValue();
if (!NumElements) // We are not dealing with a constant size array
goto conjure_default;
QualType ElementTy = AT->getElementType();
uint64_t ElemSize = Ctx.getTypeSize(ElementTy);
const RegionOffset &RO = baseR->getAsOffset();
const MemRegion *SuperR = baseR->getBaseRegion();
if (RO.hasSymbolicOffset()) {
// If base region has a symbolic offset,
// we revert to invalidating the super region.
if (SuperR)
AddToWorkList(SuperR);
goto conjure_default;
}
uint64_t LowerOffset = RO.getOffset();
uint64_t UpperOffset = LowerOffset + *NumElements * ElemSize;
bool UpperOverflow = UpperOffset < LowerOffset;
// Invalidate regions which are within array boundaries,
// or have a symbolic offset.
if (!SuperR)
goto conjure_default;
const ClusterBindings *C = B.lookup(SuperR);
if (!C)
goto conjure_default;
for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E;
++I) {
const BindingKey &BK = I.getKey();
Optional<uint64_t> ROffset =
BK.hasSymbolicOffset() ? Optional<uint64_t>() : BK.getOffset();
// Check offset is not symbolic and within array's boundaries.
// Handles arrays of 0 elements and of 0-sized elements as well.
if (!ROffset ||
((*ROffset >= LowerOffset && *ROffset < UpperOffset) ||
(UpperOverflow &&
(*ROffset >= LowerOffset || *ROffset < UpperOffset)) ||
(LowerOffset == UpperOffset && *ROffset == LowerOffset))) {
B = B.removeBinding(I.getKey());
// Bound symbolic regions need to be invalidated for dead symbol
// detection.
SVal V = I.getData();
const MemRegion *R = V.getAsRegion();
if (R && isa<SymbolicRegion>(R))
VisitBinding(V);
}
}
}
conjure_default:
// Set the default value of the array to conjured symbol.
DefinedOrUnknownSVal V =
svalBuilder.conjureSymbolVal(baseR, Ex, LCtx,
AT->getElementType(), Count);
B = B.addBinding(baseR, BindingKey::Default, V);
return;
}
DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx,
T,Count);
assert(SymbolManager::canSymbolicate(T) || V.isUnknown());
B = B.addBinding(baseR, BindingKey::Direct, V);
}
bool invalidateRegionsWorker::isInitiallyIncludedGlobalRegion(
const MemRegion *R) {
switch (GlobalsFilter) {
case GFK_None:
return false;
case GFK_SystemOnly:
return isa<GlobalSystemSpaceRegion>(R->getMemorySpace());
case GFK_All:
return isa<NonStaticGlobalSpaceRegion>(R->getMemorySpace());
}
llvm_unreachable("unknown globals filter");
}
bool invalidateRegionsWorker::includeEntireMemorySpace(const MemRegion *Base) {
if (isInitiallyIncludedGlobalRegion(Base))
return true;
const MemSpaceRegion *MemSpace = Base->getMemorySpace();
return ITraits.hasTrait(MemSpace,
RegionAndSymbolInvalidationTraits::TK_EntireMemSpace);
}
RegionBindingsRef
RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K,
const Expr *Ex,
unsigned Count,
const LocationContext *LCtx,
RegionBindingsRef B,
InvalidatedRegions *Invalidated) {
// Bind the globals memory space to a new symbol that we will use to derive
// the bindings for all globals.
const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K);
SVal V = svalBuilder.conjureSymbolVal(/* SymbolTag = */ (const void*) GS, Ex, LCtx,
/* type does not matter */ Ctx.IntTy,
Count);
B = B.removeBinding(GS)
.addBinding(BindingKey::Make(GS, BindingKey::Default), V);
// Even if there are no bindings in the global scope, we still need to
// record that we touched it.
if (Invalidated)
Invalidated->push_back(GS);
return B;
}
void RegionStoreManager::populateWorkList(invalidateRegionsWorker &W,
ArrayRef<SVal> Values,
InvalidatedRegions *TopLevelRegions) {
for (ArrayRef<SVal>::iterator I = Values.begin(),
E = Values.end(); I != E; ++I) {
SVal V = *I;
if (Optional<nonloc::LazyCompoundVal> LCS =
V.getAs<nonloc::LazyCompoundVal>()) {
const SValListTy &Vals = getInterestingValues(*LCS);
for (SValListTy::const_iterator I = Vals.begin(),
E = Vals.end(); I != E; ++I) {
// Note: the last argument is false here because these are
// non-top-level regions.
if (const MemRegion *R = (*I).getAsRegion())
W.AddToWorkList(R);
}
continue;
}
if (const MemRegion *R = V.getAsRegion()) {
if (TopLevelRegions)
TopLevelRegions->push_back(R);
W.AddToWorkList(R);
continue;
}
}
}
StoreRef
RegionStoreManager::invalidateRegions(Store store,
ArrayRef<SVal> Values,
const Expr *Ex, unsigned Count,
const LocationContext *LCtx,
const CallEvent *Call,
InvalidatedSymbols &IS,
RegionAndSymbolInvalidationTraits &ITraits,
InvalidatedRegions *TopLevelRegions,
InvalidatedRegions *Invalidated) {
GlobalsFilterKind GlobalsFilter;
if (Call) {
if (Call->isInSystemHeader())
GlobalsFilter = GFK_SystemOnly;
else
GlobalsFilter = GFK_All;
} else {
GlobalsFilter = GFK_None;
}
RegionBindingsRef B = getRegionBindings(store);
invalidateRegionsWorker W(*this, StateMgr, B, Ex, Count, LCtx, IS, ITraits,
Invalidated, GlobalsFilter);
// Scan the bindings and generate the clusters.
W.GenerateClusters();
// Add the regions to the worklist.
populateWorkList(W, Values, TopLevelRegions);
W.RunWorkList();
// Return the new bindings.
B = W.getRegionBindings();
// For calls, determine which global regions should be invalidated and
// invalidate them. (Note that function-static and immutable globals are never
// invalidated by this.)
// TODO: This could possibly be more precise with modules.
switch (GlobalsFilter) {
case GFK_All:
B = invalidateGlobalRegion(MemRegion::GlobalInternalSpaceRegionKind,
Ex, Count, LCtx, B, Invalidated);
// FALLTHROUGH
case GFK_SystemOnly:
B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind,
Ex, Count, LCtx, B, Invalidated);
// FALLTHROUGH
case GFK_None:
break;
}
return StoreRef(B.asStore(), *this);
}
//===----------------------------------------------------------------------===//
// Extents for regions.
//===----------------------------------------------------------------------===//
DefinedOrUnknownSVal
RegionStoreManager::getSizeInElements(ProgramStateRef state,
const MemRegion *R,
QualType EleTy) {
SVal Size = cast<SubRegion>(R)->getExtent(svalBuilder);
const llvm::APSInt *SizeInt = svalBuilder.getKnownValue(state, Size);
if (!SizeInt)
return UnknownVal();
CharUnits RegionSize = CharUnits::fromQuantity(SizeInt->getSExtValue());
if (Ctx.getAsVariableArrayType(EleTy)) {
// FIXME: We need to track extra state to properly record the size
// of VLAs. Returning UnknownVal here, however, is a stop-gap so that
// we don't have a divide-by-zero below.
return UnknownVal();
}
CharUnits EleSize = Ctx.getTypeSizeInChars(EleTy);
// If a variable is reinterpreted as a type that doesn't fit into a larger
// type evenly, round it down.
// This is a signed value, since it's used in arithmetic with signed indices.
return svalBuilder.makeIntVal(RegionSize / EleSize, false);
}
//===----------------------------------------------------------------------===//
// Location and region casting.
//===----------------------------------------------------------------------===//
/// ArrayToPointer - Emulates the "decay" of an array to a pointer
/// type. 'Array' represents the lvalue of the array being decayed
/// to a pointer, and the returned SVal represents the decayed
/// version of that lvalue (i.e., a pointer to the first element of
/// the array). This is called by ExprEngine when evaluating casts
/// from arrays to pointers.
SVal RegionStoreManager::ArrayToPointer(Loc Array, QualType T) {
if (Array.getAs<loc::ConcreteInt>())
return Array;
if (!Array.getAs<loc::MemRegionVal>())
return UnknownVal();
const SubRegion *R =
cast<SubRegion>(Array.castAs<loc::MemRegionVal>().getRegion());
NonLoc ZeroIdx = svalBuilder.makeZeroArrayIndex();
return loc::MemRegionVal(MRMgr.getElementRegion(T, ZeroIdx, R, Ctx));
}
//===----------------------------------------------------------------------===//
// Loading values from regions.
//===----------------------------------------------------------------------===//
SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T) {
assert(!L.getAs<UnknownVal>() && "location unknown");
assert(!L.getAs<UndefinedVal>() && "location undefined");
// For access to concrete addresses, return UnknownVal. Checks
// for null dereferences (and similar errors) are done by checkers, not
// the Store.
// FIXME: We can consider lazily symbolicating such memory, but we really
// should defer this when we can reason easily about symbolicating arrays
// of bytes.
if (L.getAs<loc::ConcreteInt>()) {
return UnknownVal();
}
if (!L.getAs<loc::MemRegionVal>()) {
return UnknownVal();
}
const MemRegion *MR = L.castAs<loc::MemRegionVal>().getRegion();
if (isa<BlockDataRegion>(MR)) {
return UnknownVal();
}
if (isa<AllocaRegion>(MR) ||
isa<SymbolicRegion>(MR) ||
isa<CodeTextRegion>(MR)) {
if (T.isNull()) {
if (const TypedRegion *TR = dyn_cast<TypedRegion>(MR))
T = TR->getLocationType();
else {
const SymbolicRegion *SR = cast<SymbolicRegion>(MR);
T = SR->getSymbol()->getType();
}
}
MR = GetElementZeroRegion(cast<SubRegion>(MR), T);
}
// FIXME: Perhaps this method should just take a 'const MemRegion*' argument
// instead of 'Loc', and have the other Loc cases handled at a higher level.
const TypedValueRegion *R = cast<TypedValueRegion>(MR);
QualType RTy = R->getValueType();
// FIXME: we do not yet model the parts of a complex type, so treat the
// whole thing as "unknown".
if (RTy->isAnyComplexType())
return UnknownVal();
// FIXME: We should eventually handle funny addressing. e.g.:
//
// int x = ...;
// int *p = &x;
// char *q = (char*) p;
// char c = *q; // returns the first byte of 'x'.
//
// Such funny addressing will occur due to layering of regions.
if (RTy->isStructureOrClassType())
return getBindingForStruct(B, R);
// FIXME: Handle unions.
if (RTy->isUnionType())
return createLazyBinding(B, R);
if (RTy->isArrayType()) {
if (RTy->isConstantArrayType())
return getBindingForArray(B, R);
else
return UnknownVal();
}
// FIXME: handle Vector types.
if (RTy->isVectorType())
return UnknownVal();
if (const FieldRegion* FR = dyn_cast<FieldRegion>(R))
return CastRetrievedVal(getBindingForField(B, FR), FR, T, false);
if (const ElementRegion* ER = dyn_cast<ElementRegion>(R)) {
// FIXME: Here we actually perform an implicit conversion from the loaded
// value to the element type. Eventually we want to compose these values
// more intelligently. For example, an 'element' can encompass multiple
// bound regions (e.g., several bound bytes), or could be a subset of
// a larger value.
return CastRetrievedVal(getBindingForElement(B, ER), ER, T, false);
}
if (const ObjCIvarRegion *IVR = dyn_cast<ObjCIvarRegion>(R)) {
// FIXME: Here we actually perform an implicit conversion from the loaded
// value to the ivar type. What we should model is stores to ivars
// that blow past the extent of the ivar. If the address of the ivar is
// reinterpretted, it is possible we stored a different value that could
// fit within the ivar. Either we need to cast these when storing them
// or reinterpret them lazily (as we do here).
return CastRetrievedVal(getBindingForObjCIvar(B, IVR), IVR, T, false);
}
if (const VarRegion *VR = dyn_cast<VarRegion>(R)) {
// FIXME: Here we actually perform an implicit conversion from the loaded
// value to the variable type. What we should model is stores to variables
// that blow past the extent of the variable. If the address of the
// variable is reinterpretted, it is possible we stored a different value
// that could fit within the variable. Either we need to cast these when
// storing them or reinterpret them lazily (as we do here).
return CastRetrievedVal(getBindingForVar(B, VR), VR, T, false);
}
const SVal *V = B.lookup(R, BindingKey::Direct);
// Check if the region has a binding.
if (V)
return *V;
// The location does not have a bound value. This means that it has
// the value it had upon its creation and/or entry to the analyzed
// function/method. These are either symbolic values or 'undefined'.
if (R->hasStackNonParametersStorage()) {
// All stack variables are considered to have undefined values
// upon creation. All heap allocated blocks are considered to
// have undefined values as well unless they are explicitly bound
// to specific values.
return UndefinedVal();
}
// All other values are symbolic.
return svalBuilder.getRegionValueSymbolVal(R);
}
static QualType getUnderlyingType(const SubRegion *R) {
QualType RegionTy;
if (const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(R))
RegionTy = TVR->getValueType();
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R))
RegionTy = SR->getSymbol()->getType();
return RegionTy;
}
/// Checks to see if store \p B has a lazy binding for region \p R.
///
/// If \p AllowSubregionBindings is \c false, a lazy binding will be rejected
/// if there are additional bindings within \p R.
///
/// Note that unlike RegionStoreManager::findLazyBinding, this will not search
/// for lazy bindings for super-regions of \p R.
static Optional<nonloc::LazyCompoundVal>
getExistingLazyBinding(SValBuilder &SVB, RegionBindingsConstRef B,
const SubRegion *R, bool AllowSubregionBindings) {
Optional<SVal> V = B.getDefaultBinding(R);
if (!V)
return None;
Optional<nonloc::LazyCompoundVal> LCV = V->getAs<nonloc::LazyCompoundVal>();
if (!LCV)
return None;
// If the LCV is for a subregion, the types might not match, and we shouldn't
// reuse the binding.
QualType RegionTy = getUnderlyingType(R);
if (!RegionTy.isNull() &&
!RegionTy->isVoidPointerType()) {
QualType SourceRegionTy = LCV->getRegion()->getValueType();
if (!SVB.getContext().hasSameUnqualifiedType(RegionTy, SourceRegionTy))
return None;
}
if (!AllowSubregionBindings) {
// If there are any other bindings within this region, we shouldn't reuse
// the top-level binding.
SmallVector<BindingPair, 16> Bindings;
collectSubRegionBindings(Bindings, SVB, *B.lookup(R->getBaseRegion()), R,
/*IncludeAllDefaultBindings=*/true);
if (Bindings.size() > 1)
return None;
}
return *LCV;
}
std::pair<Store, const SubRegion *>
RegionStoreManager::findLazyBinding(RegionBindingsConstRef B,
const SubRegion *R,
const SubRegion *originalRegion) {
if (originalRegion != R) {
if (Optional<nonloc::LazyCompoundVal> V =
getExistingLazyBinding(svalBuilder, B, R, true))
return std::make_pair(V->getStore(), V->getRegion());
}
typedef std::pair<Store, const SubRegion *> StoreRegionPair;
StoreRegionPair Result = StoreRegionPair();
if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
Result = findLazyBinding(B, cast<SubRegion>(ER->getSuperRegion()),
originalRegion);
if (Result.second)
Result.second = MRMgr.getElementRegionWithSuper(ER, Result.second);
} else if (const FieldRegion *FR = dyn_cast<FieldRegion>(R)) {
Result = findLazyBinding(B, cast<SubRegion>(FR->getSuperRegion()),
originalRegion);
if (Result.second)
Result.second = MRMgr.getFieldRegionWithSuper(FR, Result.second);
} else if (const CXXBaseObjectRegion *BaseReg =
dyn_cast<CXXBaseObjectRegion>(R)) {
// C++ base object region is another kind of region that we should blast
// through to look for lazy compound value. It is like a field region.
Result = findLazyBinding(B, cast<SubRegion>(BaseReg->getSuperRegion()),
originalRegion);
if (Result.second)
Result.second = MRMgr.getCXXBaseObjectRegionWithSuper(BaseReg,
Result.second);
}
return Result;
}
SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
const ElementRegion* R) {
// We do not currently model bindings of the CompoundLiteralregion.
if (isa<CompoundLiteralRegion>(R->getBaseRegion()))
return UnknownVal();
// Check if the region has a binding.
if (const Optional<SVal> &V = B.getDirectBinding(R))
return *V;
const MemRegion* superR = R->getSuperRegion();
// Check if the region is an element region of a string literal.
if (const StringRegion *StrR=dyn_cast<StringRegion>(superR)) {
// FIXME: Handle loads from strings where the literal is treated as
// an integer, e.g., *((unsigned int*)"hello")
QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType();
if (!Ctx.hasSameUnqualifiedType(T, R->getElementType()))
return UnknownVal();
const StringLiteral *Str = StrR->getStringLiteral();
SVal Idx = R->getIndex();
if (Optional<nonloc::ConcreteInt> CI = Idx.getAs<nonloc::ConcreteInt>()) {
int64_t i = CI->getValue().getSExtValue();
// Abort on string underrun. This can be possible by arbitrary
// clients of getBindingForElement().
if (i < 0)
return UndefinedVal();
int64_t length = Str->getLength();
// Technically, only i == length is guaranteed to be null.
// However, such overflows should be caught before reaching this point;
// the only time such an access would be made is if a string literal was
// used to initialize a larger array.
char c = (i >= length) ? '\0' : Str->getCodeUnit(i);
return svalBuilder.makeIntVal(c, T);
}
}
// Check for loads from a code text region. For such loads, just give up.
if (isa<CodeTextRegion>(superR))
return UnknownVal();
// Handle the case where we are indexing into a larger scalar object.
// For example, this handles:
// int x = ...
// char *y = &x;
// return *y;
// FIXME: This is a hack, and doesn't do anything really intelligent yet.
const RegionRawOffset &O = R->getAsArrayOffset();
// If we cannot reason about the offset, return an unknown value.
if (!O.getRegion())
return UnknownVal();
if (const TypedValueRegion *baseR =
dyn_cast_or_null<TypedValueRegion>(O.getRegion())) {
QualType baseT = baseR->getValueType();
if (baseT->isScalarType()) {
QualType elemT = R->getElementType();
if (elemT->isScalarType()) {
if (Ctx.getTypeSizeInChars(baseT) >= Ctx.getTypeSizeInChars(elemT)) {
if (const Optional<SVal> &V = B.getDirectBinding(superR)) {
if (SymbolRef parentSym = V->getAsSymbol())
return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R);
if (V->isUnknownOrUndef())
return *V;
// Other cases: give up. We are indexing into a larger object
// that has some value, but we don't know how to handle that yet.
return UnknownVal();
}
}
}
}
}
return getBindingForFieldOrElementCommon(B, R, R->getElementType());
}
SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B,
const FieldRegion* R) {
// Check if the region has a binding.
if (const Optional<SVal> &V = B.getDirectBinding(R))
return *V;
QualType Ty = R->getValueType();
return getBindingForFieldOrElementCommon(B, R, Ty);
}
Optional<SVal>
RegionStoreManager::getBindingForDerivedDefaultValue(RegionBindingsConstRef B,
const MemRegion *superR,
const TypedValueRegion *R,
QualType Ty) {
if (const Optional<SVal> &D = B.getDefaultBinding(superR)) {
const SVal &val = D.getValue();
if (SymbolRef parentSym = val.getAsSymbol())
return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R);
if (val.isZeroConstant())
return svalBuilder.makeZeroVal(Ty);
if (val.isUnknownOrUndef())
return val;
// Lazy bindings are usually handled through getExistingLazyBinding().
// We should unify these two code paths at some point.
if (val.getAs<nonloc::LazyCompoundVal>() ||
val.getAs<nonloc::CompoundVal>())
return val;
llvm_unreachable("Unknown default value");
}
return None;
}
SVal RegionStoreManager::getLazyBinding(const SubRegion *LazyBindingRegion,
RegionBindingsRef LazyBinding) {
SVal Result;
if (const ElementRegion *ER = dyn_cast<ElementRegion>(LazyBindingRegion))
Result = getBindingForElement(LazyBinding, ER);
else
Result = getBindingForField(LazyBinding,
cast<FieldRegion>(LazyBindingRegion));
// FIXME: This is a hack to deal with RegionStore's inability to distinguish a
// default value for /part/ of an aggregate from a default value for the
// /entire/ aggregate. The most common case of this is when struct Outer
// has as its first member a struct Inner, which is copied in from a stack
// variable. In this case, even if the Outer's default value is symbolic, 0,
// or unknown, it gets overridden by the Inner's default value of undefined.
//
// This is a general problem -- if the Inner is zero-initialized, the Outer
// will now look zero-initialized. The proper way to solve this is with a
// new version of RegionStore that tracks the extent of a binding as well
// as the offset.
//
// This hack only takes care of the undefined case because that can very
// quickly result in a warning.
if (Result.isUndef())
Result = UnknownVal();
return Result;
}
SVal
RegionStoreManager::getBindingForFieldOrElementCommon(RegionBindingsConstRef B,
const TypedValueRegion *R,
QualType Ty) {
// At this point we have already checked in either getBindingForElement or
// getBindingForField if 'R' has a direct binding.
// Lazy binding?
Store lazyBindingStore = nullptr;
const SubRegion *lazyBindingRegion = nullptr;
std::tie(lazyBindingStore, lazyBindingRegion) = findLazyBinding(B, R, R);
if (lazyBindingRegion)
return getLazyBinding(lazyBindingRegion,
getRegionBindings(lazyBindingStore));
// Record whether or not we see a symbolic index. That can completely
// be out of scope of our lookup.
bool hasSymbolicIndex = false;
// FIXME: This is a hack to deal with RegionStore's inability to distinguish a
// default value for /part/ of an aggregate from a default value for the
// /entire/ aggregate. The most common case of this is when struct Outer
// has as its first member a struct Inner, which is copied in from a stack
// variable. In this case, even if the Outer's default value is symbolic, 0,
// or unknown, it gets overridden by the Inner's default value of undefined.
//
// This is a general problem -- if the Inner is zero-initialized, the Outer
// will now look zero-initialized. The proper way to solve this is with a
// new version of RegionStore that tracks the extent of a binding as well
// as the offset.
//
// This hack only takes care of the undefined case because that can very
// quickly result in a warning.
bool hasPartialLazyBinding = false;
const SubRegion *SR = dyn_cast<SubRegion>(R);
while (SR) {
const MemRegion *Base = SR->getSuperRegion();
if (Optional<SVal> D = getBindingForDerivedDefaultValue(B, Base, R, Ty)) {
if (D->getAs<nonloc::LazyCompoundVal>()) {
hasPartialLazyBinding = true;
break;
}
return *D;
}
if (const ElementRegion *ER = dyn_cast<ElementRegion>(Base)) {
NonLoc index = ER->getIndex();
if (!index.isConstant())
hasSymbolicIndex = true;
}
// If our super region is a field or element itself, walk up the region
// hierarchy to see if there is a default value installed in an ancestor.
SR = dyn_cast<SubRegion>(Base);
}
if (R->hasStackNonParametersStorage()) {
if (isa<ElementRegion>(R)) {
// Currently we don't reason specially about Clang-style vectors. Check
// if superR is a vector and if so return Unknown.
if (const TypedValueRegion *typedSuperR =
dyn_cast<TypedValueRegion>(R->getSuperRegion())) {
if (typedSuperR->getValueType()->isVectorType())
return UnknownVal();
}
}
// FIXME: We also need to take ElementRegions with symbolic indexes into
// account. This case handles both directly accessing an ElementRegion
// with a symbolic offset, but also fields within an element with
// a symbolic offset.
if (hasSymbolicIndex)
return UnknownVal();
if (!hasPartialLazyBinding)
return UndefinedVal();
}
// All other values are symbolic.
return svalBuilder.getRegionValueSymbolVal(R);
}
SVal RegionStoreManager::getBindingForObjCIvar(RegionBindingsConstRef B,
const ObjCIvarRegion* R) {
// Check if the region has a binding.
if (const Optional<SVal> &V = B.getDirectBinding(R))
return *V;
const MemRegion *superR = R->getSuperRegion();
// Check if the super region has a default binding.
if (const Optional<SVal> &V = B.getDefaultBinding(superR)) {
if (SymbolRef parentSym = V->getAsSymbol())
return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R);
// Other cases: give up.
return UnknownVal();
}
return getBindingForLazySymbol(R);
}
SVal RegionStoreManager::getBindingForVar(RegionBindingsConstRef B,
const VarRegion *R) {
// Check if the region has a binding.
if (const Optional<SVal> &V = B.getDirectBinding(R))
return *V;
// Lazily derive a value for the VarRegion.
const VarDecl *VD = R->getDecl();
const MemSpaceRegion *MS = R->getMemorySpace();
// Arguments are always symbolic.
if (isa<StackArgumentsSpaceRegion>(MS))
return svalBuilder.getRegionValueSymbolVal(R);
// Is 'VD' declared constant? If so, retrieve the constant value.
if (VD->getType().isConstQualified())
if (const Expr *Init = VD->getInit())
if (Optional<SVal> V = svalBuilder.getConstantVal(Init))
return *V;
// This must come after the check for constants because closure-captured
// constant variables may appear in UnknownSpaceRegion.
if (isa<UnknownSpaceRegion>(MS))
return svalBuilder.getRegionValueSymbolVal(R);
if (isa<GlobalsSpaceRegion>(MS)) {
QualType T = VD->getType();
// Function-scoped static variables are default-initialized to 0; if they
// have an initializer, it would have been processed by now.
// FIXME: This is only true when we're starting analysis from main().
// We're losing a lot of coverage here.
if (isa<StaticGlobalSpaceRegion>(MS))
return svalBuilder.makeZeroVal(T);
if (Optional<SVal> V = getBindingForDerivedDefaultValue(B, MS, R, T)) {
assert(!V->getAs<nonloc::LazyCompoundVal>());
return V.getValue();
}
return svalBuilder.getRegionValueSymbolVal(R);
}
return UndefinedVal();
}
SVal RegionStoreManager::getBindingForLazySymbol(const TypedValueRegion *R) {
// All other values are symbolic.
return svalBuilder.getRegionValueSymbolVal(R);
}
const RegionStoreManager::SValListTy &
RegionStoreManager::getInterestingValues(nonloc::LazyCompoundVal LCV) {
// First, check the cache.
LazyBindingsMapTy::iterator I = LazyBindingsMap.find(LCV.getCVData());
if (I != LazyBindingsMap.end())
return I->second;
// If we don't have a list of values cached, start constructing it.
SValListTy List;
const SubRegion *LazyR = LCV.getRegion();
RegionBindingsRef B = getRegionBindings(LCV.getStore());
// If this region had /no/ bindings at the time, there are no interesting
// values to return.
const ClusterBindings *Cluster = B.lookup(LazyR->getBaseRegion());
if (!Cluster)
return (LazyBindingsMap[LCV.getCVData()] = std::move(List));
SmallVector<BindingPair, 32> Bindings;
collectSubRegionBindings(Bindings, svalBuilder, *Cluster, LazyR,
/*IncludeAllDefaultBindings=*/true);
for (SmallVectorImpl<BindingPair>::const_iterator I = Bindings.begin(),
E = Bindings.end();
I != E; ++I) {
SVal V = I->second;
if (V.isUnknownOrUndef() || V.isConstant())
continue;
if (Optional<nonloc::LazyCompoundVal> InnerLCV =
V.getAs<nonloc::LazyCompoundVal>()) {
const SValListTy &InnerList = getInterestingValues(*InnerLCV);
List.insert(List.end(), InnerList.begin(), InnerList.end());
continue;
}
List.push_back(V);
}
return (LazyBindingsMap[LCV.getCVData()] = std::move(List));
}
NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B,
const TypedValueRegion *R) {
if (Optional<nonloc::LazyCompoundVal> V =
getExistingLazyBinding(svalBuilder, B, R, false))
return *V;
return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R);
}
static bool isRecordEmpty(const RecordDecl *RD) {
if (!RD->field_empty())
return false;
if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD))
return CRD->getNumBases() == 0;
return true;
}
SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B,
const TypedValueRegion *R) {
const RecordDecl *RD = R->getValueType()->castAs<RecordType>()->getDecl();
if (!RD->getDefinition() || isRecordEmpty(RD))
return UnknownVal();
return createLazyBinding(B, R);
}
SVal RegionStoreManager::getBindingForArray(RegionBindingsConstRef B,
const TypedValueRegion *R) {
assert(Ctx.getAsConstantArrayType(R->getValueType()) &&
"Only constant array types can have compound bindings.");
return createLazyBinding(B, R);
}
bool RegionStoreManager::includedInBindings(Store store,
const MemRegion *region) const {
RegionBindingsRef B = getRegionBindings(store);
region = region->getBaseRegion();
// Quick path: if the base is the head of a cluster, the region is live.
if (B.lookup(region))
return true;
// Slow path: if the region is the VALUE of any binding, it is live.
for (RegionBindingsRef::iterator RI = B.begin(), RE = B.end(); RI != RE; ++RI) {
const ClusterBindings &Cluster = RI.getData();
for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end();
CI != CE; ++CI) {
const SVal &D = CI.getData();
if (const MemRegion *R = D.getAsRegion())
if (R->getBaseRegion() == region)
return true;
}
}
return false;
}
//===----------------------------------------------------------------------===//
// Binding values to regions.
//===----------------------------------------------------------------------===//
StoreRef RegionStoreManager::killBinding(Store ST, Loc L) {
if (Optional<loc::MemRegionVal> LV = L.getAs<loc::MemRegionVal>())
if (const MemRegion* R = LV->getRegion())
return StoreRef(getRegionBindings(ST).removeBinding(R)
.asImmutableMap()
.getRootWithoutRetain(),
*this);
return StoreRef(ST, *this);
}
RegionBindingsRef
RegionStoreManager::bind(RegionBindingsConstRef B, Loc L, SVal V) {
if (L.getAs<loc::ConcreteInt>())
return B;
// If we get here, the location should be a region.
const MemRegion *R = L.castAs<loc::MemRegionVal>().getRegion();
// Check if the region is a struct region.
if (const TypedValueRegion* TR = dyn_cast<TypedValueRegion>(R)) {
QualType Ty = TR->getValueType();
if (Ty->isArrayType())
return bindArray(B, TR, V);
if (Ty->isStructureOrClassType())
return bindStruct(B, TR, V);
if (Ty->isVectorType())
return bindVector(B, TR, V);
if (Ty->isUnionType())
return bindAggregate(B, TR, V);
}
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R)) {
// Binding directly to a symbolic region should be treated as binding
// to element 0.
QualType T = SR->getSymbol()->getType();
if (T->isAnyPointerType() || T->isReferenceType())
T = T->getPointeeType();
R = GetElementZeroRegion(SR, T);
}
// Clear out bindings that may overlap with this binding.
RegionBindingsRef NewB = removeSubRegionBindings(B, cast<SubRegion>(R));
return NewB.addBinding(BindingKey::Make(R, BindingKey::Direct), V);
}
RegionBindingsRef
RegionStoreManager::setImplicitDefaultValue(RegionBindingsConstRef B,
const MemRegion *R,
QualType T) {
SVal V;
if (Loc::isLocType(T))
V = svalBuilder.makeNull();
else if (T->isIntegralOrEnumerationType())
V = svalBuilder.makeZeroVal(T);
else if (T->isStructureOrClassType() || T->isArrayType()) {
// Set the default value to a zero constant when it is a structure
// or array. The type doesn't really matter.
V = svalBuilder.makeZeroVal(Ctx.IntTy);
}
else {
// We can't represent values of this type, but we still need to set a value
// to record that the region has been initialized.
// If this assertion ever fires, a new case should be added above -- we
// should know how to default-initialize any value we can symbolicate.
assert(!SymbolManager::canSymbolicate(T) && "This type is representable");
V = UnknownVal();
}
return B.addBinding(R, BindingKey::Default, V);
}
RegionBindingsRef
RegionStoreManager::bindArray(RegionBindingsConstRef B,
const TypedValueRegion* R,
SVal Init) {
const ArrayType *AT =cast<ArrayType>(Ctx.getCanonicalType(R->getValueType()));
QualType ElementTy = AT->getElementType();
Optional<uint64_t> Size;
if (const ConstantArrayType* CAT = dyn_cast<ConstantArrayType>(AT))
Size = CAT->getSize().getZExtValue();
// Check if the init expr is a string literal.
if (Optional<loc::MemRegionVal> MRV = Init.getAs<loc::MemRegionVal>()) {
const StringRegion *S = cast<StringRegion>(MRV->getRegion());
// Treat the string as a lazy compound value.
StoreRef store(B.asStore(), *this);
nonloc::LazyCompoundVal LCV = svalBuilder.makeLazyCompoundVal(store, S)
.castAs<nonloc::LazyCompoundVal>();
return bindAggregate(B, R, LCV);
}
// Handle lazy compound values.
if (Init.getAs<nonloc::LazyCompoundVal>())
return bindAggregate(B, R, Init);
if (Init.isUnknown())
return bindAggregate(B, R, UnknownVal());
// Remaining case: explicit compound values.
const nonloc::CompoundVal& CV = Init.castAs<nonloc::CompoundVal>();
nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end();
uint64_t i = 0;
RegionBindingsRef NewB(B);
for (; Size.hasValue() ? i < Size.getValue() : true ; ++i, ++VI) {
// The init list might be shorter than the array length.
if (VI == VE)
break;
const NonLoc &Idx = svalBuilder.makeArrayIndex(i);
const ElementRegion *ER = MRMgr.getElementRegion(ElementTy, Idx, R, Ctx);
if (ElementTy->isStructureOrClassType())
NewB = bindStruct(NewB, ER, *VI);
else if (ElementTy->isArrayType())
NewB = bindArray(NewB, ER, *VI);
else
NewB = bind(NewB, loc::MemRegionVal(ER), *VI);
}
// If the init list is shorter than the array length, set the
// array default value.
if (Size.hasValue() && i < Size.getValue())
NewB = setImplicitDefaultValue(NewB, R, ElementTy);
return NewB;
}
RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B,
const TypedValueRegion* R,
SVal V) {
QualType T = R->getValueType();
assert(T->isVectorType());
const VectorType *VT = T->getAs<VectorType>(); // Use getAs for typedefs.
// Handle lazy compound values and symbolic values.
if (V.getAs<nonloc::LazyCompoundVal>() || V.getAs<nonloc::SymbolVal>())
return bindAggregate(B, R, V);
// We may get non-CompoundVal accidentally due to imprecise cast logic or
// that we are binding symbolic struct value. Kill the field values, and if
// the value is symbolic go and bind it as a "default" binding.
if (!V.getAs<nonloc::CompoundVal>()) {
return bindAggregate(B, R, UnknownVal());
}
QualType ElemType = VT->getElementType();
nonloc::CompoundVal CV = V.castAs<nonloc::CompoundVal>();
nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end();
unsigned index = 0, numElements = VT->getNumElements();
RegionBindingsRef NewB(B);
for ( ; index != numElements ; ++index) {
if (VI == VE)
break;
NonLoc Idx = svalBuilder.makeArrayIndex(index);
const ElementRegion *ER = MRMgr.getElementRegion(ElemType, Idx, R, Ctx);
if (ElemType->isArrayType())
NewB = bindArray(NewB, ER, *VI);
else if (ElemType->isStructureOrClassType())
NewB = bindStruct(NewB, ER, *VI);
else
NewB = bind(NewB, loc::MemRegionVal(ER), *VI);
}
return NewB;
}
Optional<RegionBindingsRef>
RegionStoreManager::tryBindSmallStruct(RegionBindingsConstRef B,
const TypedValueRegion *R,
const RecordDecl *RD,
nonloc::LazyCompoundVal LCV) {
FieldVector Fields;
if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD))
if (Class->getNumBases() != 0 || Class->getNumVBases() != 0)
return None;
for (const auto *FD : RD->fields()) {
if (FD->isUnnamedBitfield())
continue;
// If there are too many fields, or if any of the fields are aggregates,
// just use the LCV as a default binding.
if (Fields.size() == SmallStructLimit)
return None;
QualType Ty = FD->getType();
if (!(Ty->isScalarType() || Ty->isReferenceType()))
return None;
Fields.push_back(FD);
}
RegionBindingsRef NewB = B;
for (FieldVector::iterator I = Fields.begin(), E = Fields.end(); I != E; ++I){
const FieldRegion *SourceFR = MRMgr.getFieldRegion(*I, LCV.getRegion());
SVal V = getBindingForField(getRegionBindings(LCV.getStore()), SourceFR);
const FieldRegion *DestFR = MRMgr.getFieldRegion(*I, R);
NewB = bind(NewB, loc::MemRegionVal(DestFR), V);
}
return NewB;
}
RegionBindingsRef RegionStoreManager::bindStruct(RegionBindingsConstRef B,
const TypedValueRegion* R,
SVal V) {
if (!Features.supportsFields())
return B;
QualType T = R->getValueType();
assert(T->isStructureOrClassType());
const RecordType* RT = T->getAs<RecordType>();
const RecordDecl *RD = RT->getDecl();
if (!RD->isCompleteDefinition())
return B;
// Handle lazy compound values and symbolic values.
if (Optional<nonloc::LazyCompoundVal> LCV =
V.getAs<nonloc::LazyCompoundVal>()) {
if (Optional<RegionBindingsRef> NewB = tryBindSmallStruct(B, R, RD, *LCV))
return *NewB;
return bindAggregate(B, R, V);
}
if (V.getAs<nonloc::SymbolVal>())
return bindAggregate(B, R, V);
// We may get non-CompoundVal accidentally due to imprecise cast logic or
// that we are binding symbolic struct value. Kill the field values, and if
// the value is symbolic go and bind it as a "default" binding.
if (V.isUnknown() || !V.getAs<nonloc::CompoundVal>())
return bindAggregate(B, R, UnknownVal());
const nonloc::CompoundVal& CV = V.castAs<nonloc::CompoundVal>();
nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end();
RecordDecl::field_iterator FI, FE;
RegionBindingsRef NewB(B);
for (FI = RD->field_begin(), FE = RD->field_end(); FI != FE; ++FI) {
if (VI == VE)
break;
// Skip any unnamed bitfields to stay in sync with the initializers.
if (FI->isUnnamedBitfield())
continue;
QualType FTy = FI->getType();
const FieldRegion* FR = MRMgr.getFieldRegion(*FI, R);
if (FTy->isArrayType())
NewB = bindArray(NewB, FR, *VI);
else if (FTy->isStructureOrClassType())
NewB = bindStruct(NewB, FR, *VI);
else
NewB = bind(NewB, loc::MemRegionVal(FR), *VI);
++VI;
}
// There may be fewer values in the initialize list than the fields of struct.
if (FI != FE) {
NewB = NewB.addBinding(R, BindingKey::Default,
svalBuilder.makeIntVal(0, false));
}
return NewB;
}
RegionBindingsRef
RegionStoreManager::bindAggregate(RegionBindingsConstRef B,
const TypedRegion *R,
SVal Val) {
// Remove the old bindings, using 'R' as the root of all regions
// we will invalidate. Then add the new binding.
return removeSubRegionBindings(B, R).addBinding(R, BindingKey::Default, Val);
}
//===----------------------------------------------------------------------===//
// State pruning.
//===----------------------------------------------------------------------===//
namespace {
class removeDeadBindingsWorker :
public ClusterAnalysis<removeDeadBindingsWorker> {
SmallVector<const SymbolicRegion*, 12> Postponed;
SymbolReaper &SymReaper;
const StackFrameContext *CurrentLCtx;
public:
removeDeadBindingsWorker(RegionStoreManager &rm,
ProgramStateManager &stateMgr,
RegionBindingsRef b, SymbolReaper &symReaper,
const StackFrameContext *LCtx)
: ClusterAnalysis<removeDeadBindingsWorker>(rm, stateMgr, b),
SymReaper(symReaper), CurrentLCtx(LCtx) {}
// Called by ClusterAnalysis.
void VisitAddedToCluster(const MemRegion *baseR, const ClusterBindings &C);
void VisitCluster(const MemRegion *baseR, const ClusterBindings *C);
using ClusterAnalysis<removeDeadBindingsWorker>::VisitCluster;
using ClusterAnalysis::AddToWorkList;
bool AddToWorkList(const MemRegion *R);
bool UpdatePostponed();
void VisitBinding(SVal V);
};
}
bool removeDeadBindingsWorker::AddToWorkList(const MemRegion *R) {
const MemRegion *BaseR = R->getBaseRegion();
return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR));
}
void removeDeadBindingsWorker::VisitAddedToCluster(const MemRegion *baseR,
const ClusterBindings &C) {
if (const VarRegion *VR = dyn_cast<VarRegion>(baseR)) {
if (SymReaper.isLive(VR))
AddToWorkList(baseR, &C);
return;
}
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(baseR)) {
if (SymReaper.isLive(SR->getSymbol()))
AddToWorkList(SR, &C);
else
Postponed.push_back(SR);
return;
}
if (isa<NonStaticGlobalSpaceRegion>(baseR)) {
AddToWorkList(baseR, &C);
return;
}
// CXXThisRegion in the current or parent location context is live.
if (const CXXThisRegion *TR = dyn_cast<CXXThisRegion>(baseR)) {
const StackArgumentsSpaceRegion *StackReg =
cast<StackArgumentsSpaceRegion>(TR->getSuperRegion());
const StackFrameContext *RegCtx = StackReg->getStackFrame();
if (CurrentLCtx &&
(RegCtx == CurrentLCtx || RegCtx->isParentOf(CurrentLCtx)))
AddToWorkList(TR, &C);
}
}
void removeDeadBindingsWorker::VisitCluster(const MemRegion *baseR,
const ClusterBindings *C) {
if (!C)
return;
// Mark the symbol for any SymbolicRegion with live bindings as live itself.
// This means we should continue to track that symbol.
if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(baseR))
SymReaper.markLive(SymR->getSymbol());
for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; ++I) {
// Element index of a binding key is live.
SymReaper.markElementIndicesLive(I.getKey().getRegion());
VisitBinding(I.getData());
}
}
void removeDeadBindingsWorker::VisitBinding(SVal V) {
// Is it a LazyCompoundVal? All referenced regions are live as well.
if (Optional<nonloc::LazyCompoundVal> LCS =
V.getAs<nonloc::LazyCompoundVal>()) {
const RegionStoreManager::SValListTy &Vals = RM.getInterestingValues(*LCS);
for (RegionStoreManager::SValListTy::const_iterator I = Vals.begin(),
E = Vals.end();
I != E; ++I)
VisitBinding(*I);
return;
}
// If V is a region, then add it to the worklist.
if (const MemRegion *R = V.getAsRegion()) {
AddToWorkList(R);
SymReaper.markLive(R);
// All regions captured by a block are also live.
if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(R)) {
BlockDataRegion::referenced_vars_iterator I = BR->referenced_vars_begin(),
E = BR->referenced_vars_end();
for ( ; I != E; ++I)
AddToWorkList(I.getCapturedRegion());
}
}
// Update the set of live symbols.
for (SymExpr::symbol_iterator SI = V.symbol_begin(), SE = V.symbol_end();
SI!=SE; ++SI)
SymReaper.markLive(*SI);
}
bool removeDeadBindingsWorker::UpdatePostponed() {
// See if any postponed SymbolicRegions are actually live now, after
// having done a scan.
bool changed = false;
for (SmallVectorImpl<const SymbolicRegion*>::iterator
I = Postponed.begin(), E = Postponed.end() ; I != E ; ++I) {
if (const SymbolicRegion *SR = *I) {
if (SymReaper.isLive(SR->getSymbol())) {
changed |= AddToWorkList(SR);
*I = nullptr;
}
}
}
return changed;
}
StoreRef RegionStoreManager::removeDeadBindings(Store store,
const StackFrameContext *LCtx,
SymbolReaper& SymReaper) {
RegionBindingsRef B = getRegionBindings(store);
removeDeadBindingsWorker W(*this, StateMgr, B, SymReaper, LCtx);
W.GenerateClusters();
// Enqueue the region roots onto the worklist.
for (SymbolReaper::region_iterator I = SymReaper.region_begin(),
E = SymReaper.region_end(); I != E; ++I) {
W.AddToWorkList(*I);
}
do W.RunWorkList(); while (W.UpdatePostponed());
// We have now scanned the store, marking reachable regions and symbols
// as live. We now remove all the regions that are dead from the store
// as well as update DSymbols with the set symbols that are now dead.
for (RegionBindingsRef::iterator I = B.begin(), E = B.end(); I != E; ++I) {
const MemRegion *Base = I.getKey();
// If the cluster has been visited, we know the region has been marked.
if (W.isVisited(Base))
continue;
// Remove the dead entry.
B = B.remove(Base);
if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(Base))
SymReaper.maybeDead(SymR->getSymbol());
// Mark all non-live symbols that this binding references as dead.
const ClusterBindings &Cluster = I.getData();
for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end();
CI != CE; ++CI) {
SVal X = CI.getData();
SymExpr::symbol_iterator SI = X.symbol_begin(), SE = X.symbol_end();
for (; SI != SE; ++SI)
SymReaper.maybeDead(*SI);
}
}
return StoreRef(B.asStore(), *this);
}
//===----------------------------------------------------------------------===//
// Utility methods.
//===----------------------------------------------------------------------===//
void RegionStoreManager::print(Store store, raw_ostream &OS,
const char* nl, const char *sep) {
RegionBindingsRef B = getRegionBindings(store);
OS << "Store (direct and default bindings), "
<< B.asStore()
<< " :" << nl;
B.dump(OS, nl);
}
Index: head/contrib/llvm/tools/clang
===================================================================
--- head/contrib/llvm/tools/clang (revision 322854)
+++ head/contrib/llvm/tools/clang (revision 322855)
Property changes on: head/contrib/llvm/tools/clang
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /vendor/clang/dist:r322737-322850
Index: head/contrib/llvm/tools/lld
===================================================================
--- head/contrib/llvm/tools/lld (revision 322854)
+++ head/contrib/llvm/tools/lld (revision 322855)
Property changes on: head/contrib/llvm/tools/lld
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /vendor/lld/dist:r322737-322850
Index: head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp
===================================================================
--- head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp (revision 322854)
+++ head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp (revision 322855)
@@ -1,137 +1,136 @@
//===-- RegisterContextLinux_i386.cpp --------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
#include "RegisterContextLinux_i386.h"
#include "RegisterContextPOSIX_x86.h"
using namespace lldb_private;
using namespace lldb;
struct GPR {
uint32_t ebx;
uint32_t ecx;
uint32_t edx;
uint32_t esi;
uint32_t edi;
uint32_t ebp;
uint32_t eax;
uint32_t ds;
uint32_t es;
uint32_t fs;
uint32_t gs;
uint32_t orig_eax;
uint32_t eip;
uint32_t cs;
uint32_t eflags;
uint32_t esp;
uint32_t ss;
};
struct FPR_i386 {
uint16_t fctrl; // FPU Control Word (fcw)
uint16_t fstat; // FPU Status Word (fsw)
- uint8_t ftag; // FPU Tag Word (ftw)
- uint8_t reserved_1; // Reserved
+ uint16_t ftag; // FPU Tag Word (ftw)
uint16_t fop; // Last Instruction Opcode (fop)
union {
struct {
uint64_t fip; // Instruction Pointer
uint64_t fdp; // Data Pointer
} x86_64;
struct {
uint32_t fioff; // FPU IP Offset (fip)
uint32_t fiseg; // FPU IP Selector (fcs)
uint32_t fooff; // FPU Operand Pointer Offset (foo)
uint32_t foseg; // FPU Operand Pointer Selector (fos)
} i386_; // Added _ in the end to avoid error with gcc defining i386 in some
// cases
} ptr;
uint32_t mxcsr; // MXCSR Register State
uint32_t mxcsrmask; // MXCSR Mask
MMSReg stmm[8]; // 8*16 bytes for each FP-reg = 128 bytes
XMMReg xmm[8]; // 8*16 bytes for each XMM-reg = 128 bytes
uint32_t padding[56];
};
struct UserArea {
GPR regs; // General purpose registers.
int32_t fpvalid; // True if FPU is being used.
FPR_i386 i387; // FPU registers.
uint32_t tsize; // Text segment size.
uint32_t dsize; // Data segment size.
uint32_t ssize; // Stack segment size.
uint32_t start_code; // VM address of text.
uint32_t start_stack; // VM address of stack bottom (top in rsp).
int32_t signal; // Signal causing core dump.
int32_t reserved; // Unused.
uint32_t ar0; // Location of GPR's.
uint32_t fpstate; // Location of FPR's. Should be a FXSTATE *, but this
// has to be 32-bits even on 64-bit systems.
uint32_t magic; // Identifier for core dumps.
char u_comm[32]; // Command causing core dump.
uint32_t u_debugreg[8]; // Debug registers (DR0 - DR7).
};
#define DR_SIZE sizeof(((UserArea *)NULL)->u_debugreg[0])
#define DR_0_OFFSET 0xFC
#define DR_OFFSET(reg_index) (DR_0_OFFSET + (reg_index * 4))
#define FPR_SIZE(reg) sizeof(((FPR_i386 *)NULL)->reg)
//---------------------------------------------------------------------------
// Include RegisterInfos_i386 to declare our g_register_infos_i386 structure.
//---------------------------------------------------------------------------
#define DECLARE_REGISTER_INFOS_I386_STRUCT
#include "RegisterInfos_i386.h"
#undef DECLARE_REGISTER_INFOS_I386_STRUCT
RegisterContextLinux_i386::RegisterContextLinux_i386(
const ArchSpec &target_arch)
: RegisterInfoInterface(target_arch) {
RegisterInfo orig_ax = {"orig_eax",
NULL,
sizeof(((GPR *)NULL)->orig_eax),
(LLVM_EXTENSION offsetof(GPR, orig_eax)),
eEncodingUint,
eFormatHex,
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
nullptr,
0};
d_register_infos.push_back(orig_ax);
}
size_t RegisterContextLinux_i386::GetGPRSize() const { return sizeof(GPR); }
const RegisterInfo *RegisterContextLinux_i386::GetRegisterInfo() const {
switch (m_target_arch.GetMachine()) {
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return g_register_infos_i386;
default:
assert(false && "Unhandled target architecture.");
return NULL;
}
}
uint32_t RegisterContextLinux_i386::GetRegisterCount() const {
return static_cast<uint32_t>(sizeof(g_register_infos_i386) /
sizeof(g_register_infos_i386[0]));
}
uint32_t RegisterContextLinux_i386::GetUserRegisterCount() const {
return static_cast<uint32_t>(k_num_user_registers_i386);
}
const std::vector<lldb_private::RegisterInfo> *
RegisterContextLinux_i386::GetDynamicRegisterInfoP() const {
return &d_register_infos;
}
Index: head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContext_x86.h
===================================================================
--- head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContext_x86.h (revision 322854)
+++ head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContext_x86.h (revision 322855)
@@ -1,359 +1,358 @@
//===-- RegisterContext_x86.h -----------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef liblldb_RegisterContext_x86_H_
#define liblldb_RegisterContext_x86_H_
#include <cstddef>
#include <cstdint>
#include "llvm/Support/Compiler.h"
//---------------------------------------------------------------------------
// i386 ehframe, dwarf regnums
//---------------------------------------------------------------------------
// Register numbers seen in eh_frame (eRegisterKindEHFrame) on i386 systems
// (non-Darwin)
//
enum {
ehframe_eax_i386 = 0,
ehframe_ecx_i386,
ehframe_edx_i386,
ehframe_ebx_i386,
// on Darwin esp & ebp are reversed in the eh_frame section for i386 (versus
// dwarf's reg numbering).
// To be specific:
// i386+darwin eh_frame: 4 is ebp, 5 is esp
// i386+everyone else eh_frame: 4 is esp, 5 is ebp
// i386 dwarf: 4 is esp, 5 is ebp
// lldb will get the darwin-specific eh_frame reg numberings from debugserver,
// or the ABI, so we
// only encode the generally correct 4 == esp, 5 == ebp numbers in this
// generic header.
ehframe_esp_i386,
ehframe_ebp_i386,
ehframe_esi_i386,
ehframe_edi_i386,
ehframe_eip_i386,
ehframe_eflags_i386,
ehframe_st0_i386 = 12,
ehframe_st1_i386,
ehframe_st2_i386,
ehframe_st3_i386,
ehframe_st4_i386,
ehframe_st5_i386,
ehframe_st6_i386,
ehframe_st7_i386,
ehframe_xmm0_i386 = 21,
ehframe_xmm1_i386,
ehframe_xmm2_i386,
ehframe_xmm3_i386,
ehframe_xmm4_i386,
ehframe_xmm5_i386,
ehframe_xmm6_i386,
ehframe_xmm7_i386,
ehframe_mm0_i386 = 29,
ehframe_mm1_i386,
ehframe_mm2_i386,
ehframe_mm3_i386,
ehframe_mm4_i386,
ehframe_mm5_i386,
ehframe_mm6_i386,
ehframe_mm7_i386,
};
// DWARF register numbers (eRegisterKindDWARF)
// Intel's x86 or IA-32
enum {
// General Purpose Registers.
dwarf_eax_i386 = 0,
dwarf_ecx_i386,
dwarf_edx_i386,
dwarf_ebx_i386,
dwarf_esp_i386,
dwarf_ebp_i386,
dwarf_esi_i386,
dwarf_edi_i386,
dwarf_eip_i386,
dwarf_eflags_i386,
// Floating Point Registers
dwarf_st0_i386 = 11,
dwarf_st1_i386,
dwarf_st2_i386,
dwarf_st3_i386,
dwarf_st4_i386,
dwarf_st5_i386,
dwarf_st6_i386,
dwarf_st7_i386,
// SSE Registers
dwarf_xmm0_i386 = 21,
dwarf_xmm1_i386,
dwarf_xmm2_i386,
dwarf_xmm3_i386,
dwarf_xmm4_i386,
dwarf_xmm5_i386,
dwarf_xmm6_i386,
dwarf_xmm7_i386,
// MMX Registers
dwarf_mm0_i386 = 29,
dwarf_mm1_i386,
dwarf_mm2_i386,
dwarf_mm3_i386,
dwarf_mm4_i386,
dwarf_mm5_i386,
dwarf_mm6_i386,
dwarf_mm7_i386,
dwarf_fctrl_i386 = 37, // x87 control word
dwarf_fstat_i386 = 38, // x87 status word
dwarf_mxcsr_i386 = 39,
dwarf_es_i386 = 40,
dwarf_cs_i386 = 41,
dwarf_ss_i386 = 42,
dwarf_ds_i386 = 43,
dwarf_fs_i386 = 44,
dwarf_gs_i386 = 45,
// I believe the ymm registers use the dwarf_xmm%_i386 register numbers and
// then differentiate based on size of the register.
dwarf_bnd0_i386 = 101,
dwarf_bnd1_i386,
dwarf_bnd2_i386,
dwarf_bnd3_i386,
};
//---------------------------------------------------------------------------
// AMD x86_64, AMD64, Intel EM64T, or Intel 64 ehframe, dwarf regnums
//---------------------------------------------------------------------------
// EHFrame and DWARF Register numbers (eRegisterKindEHFrame &
// eRegisterKindDWARF)
// This is the spec I used (as opposed to x86-64-abi-0.99.pdf):
// http://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf
enum {
// GP Registers
dwarf_rax_x86_64 = 0,
dwarf_rdx_x86_64,
dwarf_rcx_x86_64,
dwarf_rbx_x86_64,
dwarf_rsi_x86_64,
dwarf_rdi_x86_64,
dwarf_rbp_x86_64,
dwarf_rsp_x86_64,
// Extended GP Registers
dwarf_r8_x86_64 = 8,
dwarf_r9_x86_64,
dwarf_r10_x86_64,
dwarf_r11_x86_64,
dwarf_r12_x86_64,
dwarf_r13_x86_64,
dwarf_r14_x86_64,
dwarf_r15_x86_64,
// Return Address (RA) mapped to RIP
dwarf_rip_x86_64 = 16,
// SSE Vector Registers
dwarf_xmm0_x86_64 = 17,
dwarf_xmm1_x86_64,
dwarf_xmm2_x86_64,
dwarf_xmm3_x86_64,
dwarf_xmm4_x86_64,
dwarf_xmm5_x86_64,
dwarf_xmm6_x86_64,
dwarf_xmm7_x86_64,
dwarf_xmm8_x86_64,
dwarf_xmm9_x86_64,
dwarf_xmm10_x86_64,
dwarf_xmm11_x86_64,
dwarf_xmm12_x86_64,
dwarf_xmm13_x86_64,
dwarf_xmm14_x86_64,
dwarf_xmm15_x86_64,
// Floating Point Registers
dwarf_st0_x86_64 = 33,
dwarf_st1_x86_64,
dwarf_st2_x86_64,
dwarf_st3_x86_64,
dwarf_st4_x86_64,
dwarf_st5_x86_64,
dwarf_st6_x86_64,
dwarf_st7_x86_64,
// MMX Registers
dwarf_mm0_x86_64 = 41,
dwarf_mm1_x86_64,
dwarf_mm2_x86_64,
dwarf_mm3_x86_64,
dwarf_mm4_x86_64,
dwarf_mm5_x86_64,
dwarf_mm6_x86_64,
dwarf_mm7_x86_64,
// Control and Status Flags Register
dwarf_rflags_x86_64 = 49,
// selector registers
dwarf_es_x86_64 = 50,
dwarf_cs_x86_64,
dwarf_ss_x86_64,
dwarf_ds_x86_64,
dwarf_fs_x86_64,
dwarf_gs_x86_64,
// Floating point control registers
dwarf_mxcsr_x86_64 = 64, // Media Control and Status
dwarf_fctrl_x86_64, // x87 control word
dwarf_fstat_x86_64, // x87 status word
// Upper Vector Registers
dwarf_ymm0h_x86_64 = 67,
dwarf_ymm1h_x86_64,
dwarf_ymm2h_x86_64,
dwarf_ymm3h_x86_64,
dwarf_ymm4h_x86_64,
dwarf_ymm5h_x86_64,
dwarf_ymm6h_x86_64,
dwarf_ymm7h_x86_64,
dwarf_ymm8h_x86_64,
dwarf_ymm9h_x86_64,
dwarf_ymm10h_x86_64,
dwarf_ymm11h_x86_64,
dwarf_ymm12h_x86_64,
dwarf_ymm13h_x86_64,
dwarf_ymm14h_x86_64,
dwarf_ymm15h_x86_64,
// MPX registers
dwarf_bnd0_x86_64 = 126,
dwarf_bnd1_x86_64,
dwarf_bnd2_x86_64,
dwarf_bnd3_x86_64,
// AVX2 Vector Mask Registers
// dwarf_k0_x86_64 = 118,
// dwarf_k1_x86_64,
// dwarf_k2_x86_64,
// dwarf_k3_x86_64,
// dwarf_k4_x86_64,
// dwarf_k5_x86_64,
// dwarf_k6_x86_64,
// dwarf_k7_x86_64,
};
//---------------------------------------------------------------------------
// Generic floating-point registers
//---------------------------------------------------------------------------
struct MMSReg {
uint8_t bytes[10];
uint8_t pad[6];
};
struct XMMReg {
uint8_t bytes[16]; // 128-bits for each XMM register
};
// i387_fxsave_struct
struct FXSAVE {
uint16_t fctrl; // FPU Control Word (fcw)
uint16_t fstat; // FPU Status Word (fsw)
- uint8_t ftag; // FPU Tag Word (ftw)
- uint8_t reserved_1; // Reserved
+ uint16_t ftag; // FPU Tag Word (ftw)
uint16_t fop; // Last Instruction Opcode (fop)
union {
struct {
uint64_t fip; // Instruction Pointer
uint64_t fdp; // Data Pointer
} x86_64;
struct {
uint32_t fioff; // FPU IP Offset (fip)
uint32_t fiseg; // FPU IP Selector (fcs)
uint32_t fooff; // FPU Operand Pointer Offset (foo)
uint32_t foseg; // FPU Operand Pointer Selector (fos)
} i386_; // Added _ in the end to avoid error with gcc defining i386 in some
// cases
} ptr;
uint32_t mxcsr; // MXCSR Register State
uint32_t mxcsrmask; // MXCSR Mask
MMSReg stmm[8]; // 8*16 bytes for each FP-reg = 128 bytes
XMMReg xmm[16]; // 16*16 bytes for each XMM-reg = 256 bytes
uint8_t padding1[48];
uint64_t xcr0;
uint8_t padding2[40];
};
//---------------------------------------------------------------------------
// Extended floating-point registers
//---------------------------------------------------------------------------
struct YMMHReg {
uint8_t bytes[16]; // 16 * 8 bits for the high bytes of each YMM register
};
struct YMMReg {
uint8_t bytes[32]; // 16 * 16 bits for each YMM register
};
struct YMM {
YMMReg ymm[16]; // assembled from ymmh and xmm registers
};
struct MPXReg {
uint8_t bytes[16]; // MPX 128 bit bound registers
};
struct MPXCsr {
uint8_t bytes[8]; // MPX 64 bit bndcfgu and bndstatus registers (collectively
// BNDCSR state)
};
struct MPX {
MPXReg mpxr[4];
MPXCsr mpxc[2];
};
LLVM_PACKED_START
struct XSAVE_HDR {
uint64_t xstate_bv; // OS enabled xstate mask to determine the extended states
// supported by the processor
uint64_t xcomp_bv; // Mask to indicate the format of the XSAVE area and of
// the XRSTOR instruction
uint64_t reserved1[1];
uint64_t reserved2[5];
};
LLVM_PACKED_END
// x86 extensions to FXSAVE (i.e. for AVX and MPX processors)
LLVM_PACKED_START
struct LLVM_ALIGNAS(64) XSAVE {
FXSAVE i387; // floating point registers typical in i387_fxsave_struct
XSAVE_HDR header; // The xsave_hdr_struct can be used to determine if the
// following extensions are usable
YMMHReg ymmh[16]; // High 16 bytes of each of 16 YMM registers (the low bytes
// are in FXSAVE.xmm for compatibility with SSE)
uint64_t reserved3[16];
MPXReg mpxr[4]; // MPX BNDREG state, containing 128-bit bound registers
MPXCsr mpxc[2]; // MPX BNDCSR state, containing 64-bit BNDCFGU and
// BNDSTATUS registers
};
LLVM_PACKED_END
// Floating-point registers
struct FPR {
// Thread state for the floating-point unit of the processor read by ptrace.
union XSTATE {
FXSAVE fxsave; // Generic floating-point registers.
XSAVE xsave; // x86 extended processor state.
} xstate;
};
//---------------------------------------------------------------------------
// ptrace PTRACE_GETREGSET, PTRACE_SETREGSET structure
//---------------------------------------------------------------------------
struct IOVEC {
void *iov_base; // pointer to XSAVE
size_t iov_len; // sizeof(XSAVE)
};
#endif
Index: head/contrib/llvm/tools/lldb
===================================================================
--- head/contrib/llvm/tools/lldb (revision 322854)
+++ head/contrib/llvm/tools/lldb (revision 322855)
Property changes on: head/contrib/llvm/tools/lldb
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /vendor/lldb/dist:r322737-322850
Index: head/contrib/llvm
===================================================================
--- head/contrib/llvm (revision 322854)
+++ head/contrib/llvm (revision 322855)
Property changes on: head/contrib/llvm
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /vendor/llvm/dist:r322737-322850
Index: head/lib/clang/include/clang/Basic/Version.inc
===================================================================
--- head/lib/clang/include/clang/Basic/Version.inc (revision 322854)
+++ head/lib/clang/include/clang/Basic/Version.inc (revision 322855)
@@ -1,11 +1,11 @@
/* $FreeBSD$ */
#define CLANG_VERSION 5.0.0
#define CLANG_VERSION_STRING "5.0.0"
#define CLANG_VERSION_MAJOR 5
#define CLANG_VERSION_MINOR 0
#define CLANG_VERSION_PATCHLEVEL 0
#define CLANG_VENDOR "FreeBSD "
-#define SVN_REVISION "311219"
+#define SVN_REVISION "311606"
Index: head/lib/clang/include/lld/Config/Version.inc
===================================================================
--- head/lib/clang/include/lld/Config/Version.inc (revision 322854)
+++ head/lib/clang/include/lld/Config/Version.inc (revision 322855)
@@ -1,8 +1,8 @@
// $FreeBSD$
#define LLD_VERSION 5.0.0
#define LLD_VERSION_STRING "5.0.0"
#define LLD_VERSION_MAJOR 5
#define LLD_VERSION_MINOR 0
-#define LLD_REVISION_STRING "311219"
+#define LLD_REVISION_STRING "311606"
#define LLD_REPOSITORY_STRING "FreeBSD"
Index: head/lib/clang/include/llvm/Support/VCSRevision.h
===================================================================
--- head/lib/clang/include/llvm/Support/VCSRevision.h (revision 322854)
+++ head/lib/clang/include/llvm/Support/VCSRevision.h (revision 322855)
@@ -1,2 +1,2 @@
/* $FreeBSD$ */
-#define LLVM_REVISION "svn-r311219"
+#define LLVM_REVISION "svn-r311606"

File Metadata

Mime Type
application/octet-stream
Expires
Thu, Oct 16, 12:46 AM (1 d, 23 h)
Storage Engine
chunks
Storage Format
Chunks
Storage Handle
FT7k31vSR05e
Default Alt Text
(5 MB)

Event Timeline